1 """base classes and helper functions for css and stylesheets packages
2 """
3 __all__ = []
4 __docformat__ = 'restructuredtext'
5 __version__ = '$Id: util.py 1429 2008-08-11 19:01:52Z cthedot $'
6
7 import codecs
8 from itertools import ifilter
9 import types
10 import urllib2
11 import xml.dom
12
13 from helper import normalize
14 import tokenize2
15 import cssutils
16 import encutils
17
18 -class Base(object):
19 """
20 Base class for most CSS and StyleSheets classes
21
22 **Superceded by Base2 which is used for new seq handling class.**
23 See cssutils.util.Base2
24
25 Contains helper methods for inheriting classes helping parsing
26
27 ``_normalize`` is static as used by Preferences.
28 """
29 __tokenizer2 = tokenize2.Tokenizer()
30
31 _log = cssutils.log
32 _prods = tokenize2.CSSProductions
33
34
35
36
37 _SHORTHANDPROPERTIES = {
38 u'background': [],
39 u'background-position': [],
40 u'border': [],
41 u'border-left': [],
42 u'border-right': [],
43 u'border-top': [],
44 u'border-bottom': [],
45
46
47
48 u'cue': [],
49 u'font': [],
50 u'list-style': [],
51
52 u'outline': [],
53
54 u'pause': []
55 }
56
57 @staticmethod
59 """
60 normalizes x, namely:
61
62 - remove any \ before non unicode sequences (0-9a-zA-Z) so for
63 x=="c\olor\" return "color" (unicode escape sequences should have
64 been resolved by the tokenizer already)
65 - lowercase
66 """
67 return normalize(x)
68
70 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly"
71 if hasattr(self, '_readonly') and self._readonly:
72 raise xml.dom.NoModificationAllowedErr(
73 u'%s is readonly.' % self.__class__)
74 return True
75 return False
76
78 """
79 returns tuple (text, dict-of-namespaces) or if no namespaces are
80 in cssText returns (cssText, {})
81
82 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and
83 CSSStyleSheet
84 """
85 if isinstance(text_namespaces_tuple, tuple):
86 return text_namespaces_tuple[0], _SimpleNamespaces(self._log,
87 text_namespaces_tuple[1])
88 else:
89 return text_namespaces_tuple, _SimpleNamespaces(log=self._log)
90
92 """
93 returns tokens of textortokens which may already be tokens in which
94 case simply returns input
95 """
96 if not textortokens:
97 return None
98 elif isinstance(textortokens, basestring):
99
100 return self.__tokenizer2.tokenize(
101 textortokens)
102 elif types.GeneratorType == type(textortokens):
103
104 return textortokens
105 elif isinstance(textortokens, tuple):
106
107 return [textortokens]
108 else:
109
110 return (x for x in textortokens)
111
113 "returns next token in generator tokenizer or the default value"
114 try:
115 return tokenizer.next()
116 except (StopIteration, AttributeError):
117 return default
118
120 "returns type of Tokenizer token"
121 if token:
122 return token[0]
123 else:
124 return None
125
127 "returns value of Tokenizer token"
128 if token and normalize:
129 return Base._normalize(token[1])
130 elif token:
131 return token[1]
132 else:
133 return None
134
136 """
137 for STRING returns the actual content without surrounding "" or ''
138 and without respective escapes, e.g.::
139
140 "with \" char" => with " char
141 """
142 if token:
143 value = token[1]
144 return value.replace('\\'+value[0], value[0])[1:-1]
145 else:
146 return None
147
149 """
150 for URI returns the actual content without surrounding url()
151 or url(""), url('') and without respective escapes, e.g.::
152
153 url("\"") => "
154 """
155 if token:
156 value = token[1][4:-1].strip()
157 if (value[0] in '\'"') and (value[0] == value[-1]):
158
159 value = value.replace('\\'+value[0], value[0])[1:-1]
160 return value
161 else:
162 return None
163
164 - def _tokensupto2(self,
165 tokenizer,
166 starttoken=None,
167 blockstartonly=False,
168 blockendonly=False,
169 mediaendonly=False,
170 importmediaqueryendonly=False,
171 mediaqueryendonly=False,
172 semicolon=False,
173 propertynameendonly=False,
174 propertyvalueendonly=False,
175 propertypriorityendonly=False,
176 selectorattendonly=False,
177 funcendonly=False,
178 listseponly=False,
179 separateEnd=False
180 ):
181 """
182 returns tokens upto end of atrule and end index
183 end is defined by parameters, might be ; } ) or other
184
185 default looks for ending "}" and ";"
186 """
187 ends = u';}'
188 endtypes = ()
189 brace = bracket = parant = 0
190
191 if blockstartonly:
192 ends = u'{'
193 brace = -1
194 elif blockendonly:
195 ends = u'}'
196 brace = 1
197 elif mediaendonly:
198 ends = u'}'
199 brace = 1
200 elif importmediaqueryendonly:
201
202 ends = u';'
203 endtypes = ('STRING',)
204 elif mediaqueryendonly:
205
206
207 ends = u'{'
208 brace = -1
209 endtypes = ('STRING',)
210 elif semicolon:
211 ends = u';'
212 elif propertynameendonly:
213 ends = u':;'
214 elif propertyvalueendonly:
215 ends = u';!'
216 elif propertypriorityendonly:
217 ends = u';'
218 elif selectorattendonly:
219 ends = u']'
220 if starttoken and self._tokenvalue(starttoken) == u'[':
221 bracket = 1
222 elif funcendonly:
223 ends = u')'
224 parant = 1
225 elif listseponly:
226 ends = u','
227
228 resulttokens = []
229 if starttoken:
230 resulttokens.append(starttoken)
231 if tokenizer:
232 for token in tokenizer:
233 typ, val, line, col = token
234 if 'EOF' == typ:
235 resulttokens.append(token)
236 break
237 if u'{' == val:
238 brace += 1
239 elif u'}' == val:
240 brace -= 1
241 elif u'[' == val:
242 bracket += 1
243 elif u']' == val:
244 bracket -= 1
245
246 elif u'(' == val or \
247 Base._prods.FUNCTION == typ:
248 parant += 1
249 elif u')' == val:
250 parant -= 1
251
252 resulttokens.append(token)
253
254 if (brace == bracket == parant == 0) and (
255 val in ends or typ in endtypes):
256 break
257 elif mediaqueryendonly and brace == -1 and (
258 bracket == parant == 0) and typ in endtypes:
259
260 break
261
262 if separateEnd:
263
264 if resulttokens:
265 return resulttokens[:-1], resulttokens[-1]
266 else:
267 return resulttokens, None
268 else:
269 return resulttokens
270
272 """
273 returns string value of t (t may be a string, a list of token tuples
274 or a single tuple in format (type, value, line, col).
275 Mainly used to get a string value of t for error messages.
276 """
277 if not t:
278 return u''
279 elif isinstance(t, basestring):
280 return t
281 else:
282 return u''.join([x[1] for x in t])
283
285 """
286 adds default productions if not already present, used by
287 _parse only
288
289 each production should return the next expected token
290 normaly a name like "uri" or "EOF"
291 some have no expectation like S or COMMENT, so simply return
292 the current value of self.__expected
293 """
294 def ATKEYWORD(expected, seq, token, tokenizer=None):
295 "default impl for unexpected @rule"
296 if expected != 'EOF':
297
298 rule = cssutils.css.CSSUnknownRule()
299 rule.cssText = self._tokensupto2(tokenizer, token)
300 if rule.wellformed:
301 seq.append(rule)
302 return expected
303 else:
304 new['wellformed'] = False
305 self._log.error(u'Expected EOF.', token=token)
306 return expected
307
308 def COMMENT(expected, seq, token, tokenizer=None):
309 "default implementation for COMMENT token adds CSSCommentRule"
310 seq.append(cssutils.css.CSSComment([token]))
311 return expected
312
313 def S(expected, seq, token, tokenizer=None):
314 "default implementation for S token, does nothing"
315 return expected
316
317 def EOF(expected=None, seq=None, token=None, tokenizer=None):
318 "default implementation for EOF token"
319 return 'EOF'
320
321 p = {'ATKEYWORD': ATKEYWORD,
322 'COMMENT': COMMENT,
323 'S': S,
324 'EOF': EOF
325 }
326 p.update(productions)
327 return p
328
329 - def _parse(self, expected, seq, tokenizer, productions, default=None,
330 new=None):
331 """
332 puts parsed tokens in seq by calling a production with
333 (seq, tokenizer, token)
334
335 expected
336 a name what token or value is expected next, e.g. 'uri'
337 seq
338 to add rules etc to
339 tokenizer
340 call tokenizer.next() to get next token
341 productions
342 callbacks {tokentype: callback}
343 default
344 default callback if tokentype not in productions
345 new
346 used to init default productions
347
348 returns (wellformed, expected) which the last prod might have set
349 """
350 wellformed = True
351 if tokenizer:
352 prods = self._adddefaultproductions(productions, new)
353 for token in tokenizer:
354 p = prods.get(token[0], default)
355 if p:
356 expected = p(expected, seq, token, tokenizer)
357 else:
358 wellformed = False
359 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token)
360 return wellformed, expected
361
364 """
365 Base class for new seq handling, used by Selector for now only
366 """
369
371 """
372 sets newseq and makes it readonly
373 """
374 newseq._readonly = True
375 self._seq = newseq
376
377 seq = property(lambda self: self._seq, doc="seq for most classes")
378
380 "get a writeable Seq() which is added later"
381 return Seq(readonly=readonly)
382
384 """
385 adds default productions if not already present, used by
386 _parse only
387
388 each production should return the next expected token
389 normaly a name like "uri" or "EOF"
390 some have no expectation like S or COMMENT, so simply return
391 the current value of self.__expected
392 """
393 def ATKEYWORD(expected, seq, token, tokenizer=None):
394 "default impl for unexpected @rule"
395 if expected != 'EOF':
396
397 rule = cssutils.css.CSSUnknownRule()
398 rule.cssText = self._tokensupto2(tokenizer, token)
399 if rule.wellformed:
400 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE,
401 line=token[2], col=token[3])
402 return expected
403 else:
404 new['wellformed'] = False
405 self._log.error(u'Expected EOF.', token=token)
406 return expected
407
408 def COMMENT(expected, seq, token, tokenizer=None):
409 "default impl, adds CSSCommentRule if not token == EOF"
410 if expected == 'EOF':
411 new['wellformed'] = False
412 self._log.error(u'Expected EOF but found comment.', token=token)
413 seq.append(cssutils.css.CSSComment([token]), 'COMMENT')
414 return expected
415
416 def S(expected, seq, token, tokenizer=None):
417 "default impl, does nothing if not token == EOF"
418 if expected == 'EOF':
419 new['wellformed'] = False
420 self._log.error(u'Expected EOF but found whitespace.', token=token)
421 return expected
422
423 def EOF(expected=None, seq=None, token=None, tokenizer=None):
424 "default implementation for EOF token"
425 return 'EOF'
426
427 defaultproductions = {'ATKEYWORD': ATKEYWORD,
428 'COMMENT': COMMENT,
429 'S': S,
430 'EOF': EOF
431 }
432 defaultproductions.update(productions)
433 return defaultproductions
434
435
436 -class Seq(object):
437 """
438 property seq of Base2 inheriting classes, holds a list of Item objects.
439
440 used only by Selector for now
441
442 is normally readonly, only writable during parsing
443 """
445 """
446 only way to write to a Seq is to initialize it with new items
447 each itemtuple has (value, type, line) where line is optional
448 """
449 self._seq = []
450 self._readonly = readonly
451
454
457
460
462 return iter(self._seq)
463
465 return len(self._seq)
466
467 - def append(self, val, typ, line=None, col=None):
468 "if not readonly add new Item()"
469 if self._readonly:
470 raise AttributeError('Seq is readonly.')
471 else:
472 self._seq.append(Item(val, typ, line, col))
473
475 "if not readonly add item which must be an Item"
476 if self._readonly:
477 raise AttributeError('Seq is readonly.')
478 else:
479 self._seq.append(item)
480
481 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
482 """
483 if not readonly replace Item at index with new Item or
484 simply replace value or type
485 """
486 if self._readonly:
487 raise AttributeError('Seq is readonly.')
488 else:
489 self._seq[index] = Item(val, typ, line, col)
490
492 "returns a repr same as a list of tuples of (value, type)"
493 return u'cssutils.%s.%s([\n %s])' % (self.__module__,
494 self.__class__.__name__,
495 u',\n '.join([u'(%r, %r)' % (item.type, item.value)
496 for item in self._seq]
497 ))
499 return "<cssutils.%s.%s object length=%r at 0x%x>" % (
500 self.__module__, self.__class__.__name__, len(self), id(self))
501
503 """
504 an item in the seq list of classes (successor to tuple items in old seq)
505
506 each item has attributes:
507
508 type
509 a sematic type like "element", "attribute"
510 value
511 the actual value which may be a string, number etc or an instance
512 of e.g. a CSSComment
513 *line*
514 **NOT IMPLEMENTED YET, may contain the line in the source later**
515 """
516 - def __init__(self, value, type, line=None, col=None):
517 self.__value = value
518 self.__type = type
519 self.__line = line
520 self.__col = col
521
522 type = property(lambda self: self.__type)
523 value = property(lambda self: self.__value)
524 line = property(lambda self: self.__line)
525 col = property(lambda self: self.__col)
526
528 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % (
529 self.__module__, self.__class__.__name__,
530 self.__value, self.__type, self.__line, self.__col)
531
534 """
535 (EXPERIMENTAL)
536 A base class used for list classes like css.SelectorList or
537 stylesheets.MediaList
538
539 adds list like behaviour running on inhering class' property ``seq``
540
541 - item in x => bool
542 - len(x) => integer
543 - get, set and del x[i]
544 - for item in x
545 - append(item)
546
547 some methods must be overwritten in inheriting class
548 """
551
554
557
559 return self.seq[index]
560
562 def gen():
563 for x in self.seq:
564 yield x
565 return gen()
566
569
571 "must be overwritten"
572 raise NotImplementedError
573
575 "must be overwritten"
576 raise NotImplementedError
577
580 """
581 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet.
582 Works on effective namespaces, so e.g. if::
583
584 @namespace p1 "uri";
585 @namespace p2 "uri";
586
587 only the second rule is effective and kept.
588
589 namespaces
590 a dictionary {prefix: namespaceURI} containing the effective namespaces
591 only. These are the latest set in the CSSStyleSheet.
592 parentStyleSheet
593 the parent CSSStyleSheet
594 """
595 - def __init__(self, parentStyleSheet, log=None, *args):
599
602
619
621 try:
622 return self.namespaces[prefix]
623 except KeyError, e:
624 self._log.error('Prefix %r not found.' % prefix,
625 error=xml.dom.NamespaceErr)
626
629
632
648
655
663
664 namespaces = property(__getNamespaces,
665 doc=u'Holds only effective @namespace rules in self.parentStyleSheets'
666 '@namespace rules.')
667
668 - def get(self, prefix, default):
670
673
676
679
681 """
682 returns effective prefix for given namespaceURI or raises IndexError
683 if this cannot be found"""
684 for prefix, uri in self.namespaces.items():
685 if uri == namespaceURI:
686 return prefix
687 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
688
690 return u"<cssutils.util.%s object parentStyleSheet=%r at 0x%x>" % (
691 self.__class__.__name__, str(self.parentStyleSheet), id(self))
692
695 """
696 namespaces used in objects like Selector as long as they are not connected
697 to a CSSStyleSheet
698 """
703
706
707 namespaces = property(lambda self: self.__namespaces,
708 doc=u'Dict Wrapper for self.sheets @namespace rules.')
709
711 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % (
712 self.__class__.__name__, self.namespaces, id(self))
713
715 return u"cssutils.util.%s(%r)" % (self.__class__.__name__,
716 self.namespaces)
717
720 """Retrieve data from ``url``. cssutils default implementation of fetch
721 URL function.
722
723 Returns ``(encoding, string)`` or ``None``
724 """
725 try:
726 res = urllib2.urlopen(url)
727 except OSError, e:
728
729 cssutils.log.warn(e, error=OSError)
730 except (OSError, ValueError), e:
731
732 cssutils.log.warn(u'ValueError, %s' % e.message, error=ValueError)
733 except urllib2.HTTPError, e:
734
735 cssutils.log.warn(u'HTTPError opening url=%r: %s %s' %
736 (url, e.code, e.msg), error=e)
737 except urllib2.URLError, e:
738
739 cssutils.log.warn(u'URLError, %s' % e.reason, error=e)
740 else:
741 if res:
742 mimeType, encoding = encutils.getHTTPInfo(res)
743 if mimeType != u'text/css':
744 cssutils.log.error(u'Expected "text/css" mime type for url=%s but found: %r' %
745 (url, mimeType), error=ValueError)
746 return encoding, res.read()
747
748 -def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
749 """
750 Read cssText from url and decode it using all relevant methods (HTTP
751 header, BOM, @charset). Returns
752
753 - encoding used to decode text (which is needed to set encoding of
754 stylesheet properly)
755 - type of encoding (how it was retrieved, see list below)
756 - decodedCssText
757
758 ``fetcher``
759 see cssutils.registerFetchUrl for details
760 ``overrideEncoding``
761 If given this encoding is used and all other encoding information is
762 ignored (HTTP, BOM etc)
763 ``parentEncoding``
764 Encoding of parent stylesheet (while e.g. reading @import references sheets)
765 or document if available.
766
767 Priority or encoding information
768 --------------------------------
769 **cssutils only**: 0. overrideEncoding
770
771 1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols)
772 2. BOM and/or @charset (see below)
773 3. <link charset=""> or other metadata from the linking mechanism (if any)
774 4. charset of referring style sheet or document (if any)
775 5. Assume UTF-8
776
777 """
778 enctype = None
779
780 if not fetcher:
781 fetcher = _defaultFetcher
782 r = fetcher(url)
783 if r and len(r) == 2 and r[1] is not None:
784 httpEncoding, content = r
785
786 if overrideEncoding:
787 enctype = 0
788 encoding = overrideEncoding
789 elif httpEncoding:
790 enctype = 1
791 encoding = httpEncoding
792 else:
793
794 contentEncoding, explicit = cssutils.codec.detectencoding_str(content)
795 if explicit:
796 enctype = 2
797 encoding = contentEncoding
798 elif parentEncoding:
799 enctype = 4
800
801 encoding = parentEncoding
802 else:
803 enctype = 5
804 encoding = 'utf-8'
805
806 try:
807
808 if content is not None:
809 decodedCssText = codecs.lookup("css")[1](content, encoding=encoding)[0]
810 else:
811 decodedCssText = None
812 except UnicodeDecodeError, e:
813 cssutils.log.warn(e, neverraise=True)
814 decodedCssText = None
815
816 return encoding, enctype, decodedCssText
817 else:
818 return None, None, None
819