1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 """Extension to libxml2 for XMPP stream and stanza processing"""
20
21 __revision__="$Id: xmlextra.py,v 1.15 2004/10/11 18:33:51 jajcus Exp $"
22 __docformat__="restructuredtext en"
23
24 import sys
25 import libxml2
26 import threading
27 import re
28
29 from pyxmpp.exceptions import StreamParseError
30
31 common_doc = libxml2.newDoc("1.0")
32 common_root = common_doc.newChild(None,"root",None)
33 COMMON_NS = "http://pyxmpp.jajcus.net/xmlns/common"
34 common_ns = common_root.newNs(COMMON_NS, None)
35 common_root.setNs(common_ns)
36 common_doc.setRootElement(common_root)
37
39 """Base class for stream handler."""
42
47
49 """Process stream end."""
50 doc=libxml2.xmlDoc(_doc)
51 self.stream_end(doc)
52
54 """Process complete stanza."""
55 doc=libxml2.xmlDoc(_doc)
56 node=libxml2.xmlNode(_node)
57 self.stanza(doc,node)
58
60 """Called when the start tag of root element is encountered
61 in the stream.
62
63 :Parameters:
64 - `doc`: the document being parsed.
65 :Types:
66 - `doc`: `libxml2.xmlDoc`"""
67 print >>sys.stderr,"Unhandled stream start:",`doc.serialize()`
68
70 """Called when the end tag of root element is encountered
71 in the stream.
72
73 :Parameters:
74 - `doc`: the document being parsed.
75 :Types:
76 - `doc`: `libxml2.xmlDoc`"""
77 print >>sys.stderr,"Unhandled stream end",`doc.serialize()`
78
79 - def stanza(self, _unused, node):
80 """Called when the end tag of a direct child of the root
81 element is encountered in the stream.
82
83 Please note, that node will be removed from the document
84 and freed after this method returns. If it is needed after
85 that a copy must be made before the method returns.
86
87 :Parameters:
88 - `_unused`: the document being parsed.
89 - `node`: the (complete) element being processed
90 :Types:
91 - `_unused`: `libxml2.xmlDoc`
92 - `node`: `libxml2.xmlNode`"""
93 print >>sys.stderr,"Unhandled stanza",`node.serialize()`
94
96 """Called when an error is encountered in the stream.
97
98 :Parameters:
99 - `descr`: description of the error
100 :Types:
101 - `descr`: `str`"""
102 raise StreamParseError,descr
103
105 """Called when an warning is encountered in the stream.
106
107 :Parameters:
108 - `descr`: description of the warning
109 :Types:
110 - `descr`: `str`"""
111
112 if not desc.startswith('xmlns: URI vcard-temp is not absolute'):
113 print "XML STREAM WARNING:",desc
114
115 try:
116
117
118
119 from pyxmpp import _xmlextra
120 from pyxmpp._xmlextra import error
121
122 _create_reader = _xmlextra.sax_reader_new
123
125 """Replace namespaces in a whole subtree.
126
127 The old namespace declaration will be removed if present on the `node`.
128
129 :Parameters:
130 - `node`: the root of the subtree where namespaces should be replaced.
131 - `old_ns`: the namespace to replace.
132 - `new_ns`: the namespace to be used instead of old_ns.
133 :Types:
134 - `node`: `libxml2.xmlNode`
135 - `old_ns`: `libxml2.xmlNs`
136 - `new_ns`: `libxml2.xmlNs`
137
138 Both old_ns and new_ns may be None meaning no namespace set."""
139 if old_ns is None:
140 old_ns__o = None
141 else:
142 old_ns__o = old_ns._o
143 if new_ns is None:
144 new_ns__o = None
145 else:
146 new_ns__o = new_ns._o
147 if node is None:
148 node__o = None
149 else:
150 node__o = node._o
151 _xmlextra.replace_ns(node__o, old_ns__o, new_ns__o)
152 if old_ns__o:
153 _xmlextra.remove_ns(node__o, old_ns__o)
154
155 pure_python = False
156
157 except ImportError:
158
159
160
162 """Exception raised on a stream parse error."""
163 pass
164
166 """Escape data for XML"""
167 data=data.replace("&","&")
168 data=data.replace("<","<")
169 data=data.replace(">",">")
170 data=data.replace("'","'")
171 data=data.replace('"',""")
172 return data
173
175 """SAX events handler for the python-only stream parser."""
177 """Initialize the SAX handler.
178
179 :Parameters:
180 - `handler`: Object to handle stream start, end and stanzas.
181 :Types:
182 - `handler`: `StreamHandler`
183 """
184 self._handler = handler
185 self._head = ""
186 self._tail = ""
187 self._current = ""
188 self._level = 0
189 self._doc = None
190 self._root = None
191
193 ""
194 if self._level>1:
195 self._current += _escape(data)
196
198 ""
199 if self._level>1:
200 self._current += _escape(data)
201
205
209
211 ""
212 self._current+="</%s>" % (tag,)
213 self._level -= 1
214 if self._level > 1:
215 return
216 if self._level==1:
217 xml=self._head+self._current+self._tail
218 doc=libxml2.parseDoc(xml)
219 try:
220 node = doc.getRootElement().children
221 try:
222 node1 = node.docCopyNode(self._doc, 1)
223 try:
224 self._root.addChild(node1)
225 self._handler.stanza(self._doc, node1)
226 except:
227 node1.unlinkNode()
228 node1.freeNode()
229 del node1
230 finally:
231 del node
232 finally:
233 doc.freeDoc()
234 else:
235 xml=self._head+self._tail
236 doc=libxml2.parseDoc(xml)
237 try:
238 self._handler.stream_end(self._doc)
239 self._doc.freeDoc()
240 self._doc = None
241 self._root = None
242 finally:
243 doc.freeDoc()
244
246 ""
247 self._handler.error(msg)
248
249 fatalError = error
250
251 ignorableWhitespace = characters
252
254 ""
255 self._current += "&" + name + ";"
256
260
262 ""
263 s = "<"+tag
264 if attrs:
265 for a,v in attrs.items():
266 s+=" %s='%s'" % (a,_escape(v))
267 s += ">"
268 if self._level == 0:
269 self._head = s
270 self._tail = "</%s>" % (tag,)
271 xml=self._head+self._tail
272 self._doc = libxml2.parseDoc(xml)
273 self._handler.stream_start(self._doc)
274 self._root = self._doc.getRootElement()
275 elif self._level == 1:
276 self._current = s
277 else:
278 self._current += s
279 self._level += 1
280
284
286 """Python-only stream reader."""
288 """Initialize the reader.
289
290 :Parameters:
291 - `handler`: Object to handle stream start, end and stanzas.
292 :Types:
293 - `handler`: `StreamHandler`
294 """
295 self.handler = handler
296 self.sax = _SAXCallback(handler)
297 self.parser = libxml2.createPushParser(self.sax, '', 0, 'stream')
298
299 - def feed(self, data):
300 """Feed the parser with a chunk of data. Apropriate methods
301 of `self.handler` will be called whenever something interesting is
302 found.
303
304 :Parameters:
305 - `data`: the chunk of data to parse.
306 :Types:
307 - `data`: `str`"""
308 return self.parser.parseChunk(data, len(data), 0)
309
310 _create_reader = _PythonReader
311
313 """Get namespace of node.
314
315 :return: the namespace object or `None` if the node has no namespace
316 assigned.
317 :returntype: `libxml2.xmlNs`"""
318 try:
319 return node.ns()
320 except libxml2.treeError:
321 return None
322
324 """Replace namespaces in a whole subtree.
325
326 :Parameters:
327 - `node`: the root of the subtree where namespaces should be replaced.
328 - `old_ns`: the namespace to replace.
329 - `new_ns`: the namespace to be used instead of old_ns.
330 :Types:
331 - `node`: `libxml2.xmlNode`
332 - `old_ns`: `libxml2.xmlNs`
333 - `new_ns`: `libxml2.xmlNs`
334
335 Both old_ns and new_ns may be None meaning no namespace set."""
336
337 if old_ns is not None:
338 old_ns_uri = old_ns.content
339 old_ns_prefix = old_ns.name
340 else:
341 old_ns_uri = None
342 old_ns_prefix = None
343
344 ns = _get_ns(node)
345 if ns is None and old_ns is None:
346 node.setNs(new_ns)
347 elif ns and ns.content == old_ns_uri and ns.name == old_ns_prefix:
348 node.setNs(new_ns)
349
350 p = node.properties
351 while p:
352 ns = _get_ns(p)
353 if ns is None and old_ns is None:
354 p.setNs(new_ns)
355 if ns and ns.content == old_ns_uri and ns.name == old_ns_prefix:
356 p.setNs(new_ns)
357 p = p.next
358
359 n = node.children
360 while n:
361 if n.type == 'element':
362 skip_element = False
363 try:
364 nsd = n.nsDefs()
365 except libxml2.treeError:
366 nsd = None
367 while nsd:
368 if nsd.name == old_ns_prefix:
369 skip_element = True
370 break
371 nsd = nsd.next
372 if not skip_element:
373 replace_ns(n, old_ns, new_ns)
374 n = n.next
375
376 pure_python = True
377
378
379
380
381
383 """Namespace of an XML node.
384
385 :Parameters:
386 - `xmlnode`: the XML node to query.
387 :Types:
388 - `xmlnode`: `libxml2.xmlNode`
389
390 :return: namespace of the node or `None`
391 :returntype: `libxml2.xmlNs`"""
392 try:
393 return xmlnode.ns()
394 except libxml2.treeError:
395 return None
396
398 """Return namespace URI of an XML node.
399
400 :Parameters:
401 - `xmlnode`: the XML node to query.
402 :Types:
403 - `xmlnode`: `libxml2.xmlNode`
404
405 :return: namespace URI of the node or `None`
406 :returntype: `unicode`"""
407 ns=get_node_ns(xmlnode)
408 if ns:
409 return unicode(ns.getContent(),"utf-8")
410 else:
411 return None
412
414 """Iterate over sibling XML nodes. All types of nodes will be returned
415 (not only the elements).
416
417 Usually used to iterade over node's children like this::
418
419 xml_node_iter(node.children)
420
421 :Parameters:
422 - `nodelist`: start node of the list.
423 :Types:
424 - `nodelist`: `libxml2.xmlNode`
425 """
426 node = nodelist
427 while node:
428 yield node
429 node = node.next
430
432 """Iterate over sibling XML elements. Non-element nodes will be skipped.
433
434 Usually used to iterade over node's children like this::
435
436 xml_node_iter(node.children)
437
438 :Parameters:
439 - `nodelist`: start node of the list.
440 :Types:
441 - `nodelist`: `libxml2.xmlNode`
442 """
443 node = nodelist
444 while node:
445 if node.type == "element":
446 yield node
447 node = node.next
448
450 """Iterate over sibling XML elements. Only elements in the given namespace will be returned.
451
452 Usually used to iterade over node's children like this::
453
454 xml_node_iter(node.children)
455
456 :Parameters:
457 - `nodelist`: start node of the list.
458 :Types:
459 - `nodelist`: `libxml2.xmlNode`
460 """
461 node = nodelist
462 while node:
463 if node.type == "element" and get_node_ns_uri(node)==ns_uri:
464 yield node
465 node = node.next
466
467 evil_characters_re=re.compile(r"[\000-\010\013\014\016-\037]",re.UNICODE)
468 utf8_replacement_char=u"\ufffd".encode("utf-8")
469
476
477 bad_nsdef_replace_re=re.compile(r"^([^<]*\<[^><]*\s+)(xmlns=((\"[^\"]*\")|(\'[^\']*\')))")
478
480 """Serialize an XML element making sure the result is sane.
481
482 Remove control characters and invalid namespace declarations from the
483 result string.
484
485 :Parameters:
486 - `xmlnode`: the XML element to serialize.
487 :Types:
488 - `xmlnode`: `libxml2.xmlNode`
489
490 :return: UTF-8 encoded serialized and sanitized element.
491 :returntype: `string`"""
492 try:
493 ns = xmlnode.ns()
494 except libxml2.treeError:
495 ns = None
496 try:
497 nsdef = xmlnode.nsDefs()
498 except libxml2.treeError:
499 nsdef = None
500 s=xmlnode.serialize(encoding="UTF-8")
501 while nsdef:
502 if nsdef.name is None and (not ns or (nsdef.name, nsdef.content)!=(ns.name, ns.content)):
503 s = bad_nsdef_replace_re.sub("\\1",s,1)
504 break
505 nsdef = nsdef.next
506 s=remove_evil_characters(s)
507 return s
508
510 """A simple push-parser interface for XML streams."""
512 """Initialize `StreamReader` object.
513
514 :Parameters:
515 - `handler`: handler object for the stream content
516 :Types:
517 - `handler`: `StreamHandler` derived class
518 """
519 self.reader=_create_reader(handler)
520 self.lock=threading.RLock()
521 self.in_use=0
523 """Get the document being parsed.
524
525 :return: the document.
526 :returntype: `libxml2.xmlNode`"""
527 ret=self.reader.doc()
528 if ret:
529 return libxml2.xmlDoc(ret)
530 else:
531 return None
533 """Pass a string to the stream parser.
534
535 Parameters:
536 - `s`: string to parse.
537 Types:
538 - `s`: `str`
539
540 :return: `None` on EOF, `False` when whole input was parsed and `True`
541 if there is something still left in the buffer."""
542 self.lock.acquire()
543 if self.in_use:
544 self.lock.release()
545 raise StreamParseError,"StreamReader.feed() is not reentrant!"
546 self.in_use=1
547 try:
548 return self.reader.feed(s)
549 finally:
550 self.in_use=0
551 self.lock.release()
552
553
554
555