Package CedarBackup2 :: Module xmlutil
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.xmlutil

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2006 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # Portions Copyright (c) 2000 Fourthought Inc, USA. 
 15  # All Rights Reserved. 
 16  # 
 17  # This program is free software; you can redistribute it and/or 
 18  # modify it under the terms of the GNU General Public License, 
 19  # Version 2, as published by the Free Software Foundation. 
 20  # 
 21  # This program is distributed in the hope that it will be useful, 
 22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 24  # 
 25  # Copies of the GNU General Public License are available from 
 26  # the Free Software Foundation website, http://www.gnu.org/. 
 27  # 
 28  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 29  # 
 30  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 31  # Language : Python (>= 2.3) 
 32  # Project  : Cedar Backup, release 2 
 33  # Revision : $Id: xmlutil.py 1181 2007-03-25 16:18:22Z pronovic $ 
 34  # Purpose  : Provides general XML-related functionality. 
 35  # 
 36  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 37   
 38  ######################################################################## 
 39  # Module documentation 
 40  ######################################################################## 
 41   
 42  """ 
 43  Provides general XML-related functionality. 
 44   
 45  What I'm trying to do here is abstract much of the functionality that directly 
 46  accesses the DOM tree.  This is not so much to "protect" the other code from 
 47  the DOM, but to standardize the way it's used.  It will also help extension 
 48  authors write code that easily looks more like the rest of Cedar Backup. 
 49   
 50  @sort: createInputDom, createOutputDom, serializeDom, isElement, readChildren,  
 51         readFirstChild, readStringList, readString, readInteger, readBoolean, 
 52         addContainerNode, addStringNode, addIntegerNode, addBooleanNode, 
 53         TRUE_BOOLEAN_VALUES, FALSE_BOOLEAN_VALUES, VALID_BOOLEAN_VALUES 
 54   
 55  @var TRUE_BOOLEAN_VALUES: List of boolean values in XML representing C{True}. 
 56  @var FALSE_BOOLEAN_VALUES: List of boolean values in XML representing C{False}. 
 57  @var VALID_BOOLEAN_VALUES: List of valid boolean values in XML. 
 58   
 59  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 60  """ 
 61   
 62  ######################################################################## 
 63  # Imported modules 
 64  ######################################################################## 
 65   
 66  # System modules 
 67  import sys 
 68  import re 
 69  import logging 
 70  import codecs 
 71  from types import UnicodeType 
 72  from StringIO import StringIO 
 73   
 74  # XML-related modules 
 75  from xml.parsers.expat import ExpatError 
 76  from xml.dom.minidom import Node 
 77  from xml.dom.minidom import getDOMImplementation 
 78  from xml.dom.minidom import parseString 
 79   
 80   
 81  ######################################################################## 
 82  # Module-wide constants and variables 
 83  ######################################################################## 
 84   
 85  logger = logging.getLogger("CedarBackup2.log.xml") 
 86   
 87  TRUE_BOOLEAN_VALUES   = [ "Y", "y", ] 
 88  FALSE_BOOLEAN_VALUES  = [ "N", "n", ] 
 89  VALID_BOOLEAN_VALUES  = TRUE_BOOLEAN_VALUES + FALSE_BOOLEAN_VALUES 
 90   
 91   
 92  ######################################################################## 
 93  # Functions for creating and parsing DOM trees 
 94  ######################################################################## 
 95   
96 -def createInputDom(xmlData, name="cb_config"):
97 """ 98 Creates a DOM tree based on reading an XML string. 99 @param name: Assumed base name of the document (root node name). 100 @return: Tuple (xmlDom, parentNode) for the parsed document 101 @raise ValueError: If the document can't be parsed. 102 """ 103 try: 104 xmlDom = parseString(xmlData) 105 parentNode = readFirstChild(xmlDom, name) 106 return (xmlDom, parentNode) 107 except (IOError, ExpatError), e: 108 raise ValueError("Unable to parse XML document: %s" % e)
109
110 -def createOutputDom(name="cb_config"):
111 """ 112 Creates a DOM tree used for writing an XML document. 113 @param name: Base name of the document (root node name). 114 @return: Tuple (xmlDom, parentNode) for the new document 115 """ 116 impl = getDOMImplementation() 117 xmlDom = impl.createDocument(None, name, None) 118 return (xmlDom, xmlDom.documentElement)
119 120 121 ######################################################################## 122 # Functions for reading values out of XML documents 123 ######################################################################## 124
125 -def isElement(node):
126 """ 127 Returns True or False depending on whether the XML node is an element node. 128 """ 129 return node.nodeType == Node.ELEMENT_NODE
130
131 -def readChildren(parent, name):
132 """ 133 Returns a list of nodes with a given name immediately beneath the 134 parent. 135 136 By "immediately beneath" the parent, we mean from among nodes that are 137 direct children of the passed-in parent node. 138 139 Underneath, we use the Python C{getElementsByTagName} method, which is 140 pretty cool, but which (surprisingly?) returns a list of all children 141 with a given name below the parent, at any level. We just prune that 142 list to include only children whose C{parentNode} matches the passed-in 143 parent. 144 145 @param parent: Parent node to search beneath. 146 @param name: Name of nodes to search for. 147 148 @return: List of child nodes with correct parent, or an empty list if 149 no matching nodes are found. 150 """ 151 lst = [] 152 if parent is not None: 153 result = parent.getElementsByTagName(name) 154 for entry in result: 155 if entry.parentNode is parent: 156 lst.append(entry) 157 return lst
158
159 -def readFirstChild(parent, name):
160 """ 161 Returns the first child with a given name immediately beneath the parent. 162 163 By "immediately beneath" the parent, we mean from among nodes that are 164 direct children of the passed-in parent node. 165 166 @param parent: Parent node to search beneath. 167 @param name: Name of node to search for. 168 169 @return: First properly-named child of parent, or C{None} if no matching nodes are found. 170 """ 171 result = readChildren(parent, name) 172 if result is None or result == []: 173 return None 174 return result[0]
175
176 -def readStringList(parent, name):
177 """ 178 Returns a list of the string contents associated with nodes with a given 179 name immediately beneath the parent. 180 181 By "immediately beneath" the parent, we mean from among nodes that are 182 direct children of the passed-in parent node. 183 184 First, we find all of the nodes using L{readChildren}, and then we 185 retrieve the "string contents" of each of those nodes. The returned list 186 has one entry per matching node. We assume that string contents of a 187 given node belong to the first C{TEXT_NODE} child of that node. Nodes 188 which have no C{TEXT_NODE} children are not represented in the returned 189 list. 190 191 @param parent: Parent node to search beneath. 192 @param name: Name of node to search for. 193 194 @return: List of strings as described above, or C{None} if no matching nodes are found. 195 """ 196 lst = [] 197 result = readChildren(parent, name) 198 for entry in result: 199 if entry.hasChildNodes(): 200 for child in entry.childNodes: 201 if child.nodeType == Node.TEXT_NODE: 202 lst.append(child.nodeValue) 203 break 204 if lst == []: 205 lst = None 206 return lst
207
208 -def readString(parent, name):
209 """ 210 Returns string contents of the first child with a given name immediately 211 beneath the parent. 212 213 By "immediately beneath" the parent, we mean from among nodes that are 214 direct children of the passed-in parent node. We assume that string 215 contents of a given node belong to the first C{TEXT_NODE} child of that 216 node. 217 218 @param parent: Parent node to search beneath. 219 @param name: Name of node to search for. 220 221 @return: String contents of node or C{None} if no matching nodes are found. 222 """ 223 result = readStringList(parent, name) 224 if result is None: 225 return None 226 return result[0]
227
228 -def readInteger(parent, name):
229 """ 230 Returns integer contents of the first child with a given name immediately 231 beneath the parent. 232 233 By "immediately beneath" the parent, we mean from among nodes that are 234 direct children of the passed-in parent node. 235 236 @param parent: Parent node to search beneath. 237 @param name: Name of node to search for. 238 239 @return: Integer contents of node or C{None} if no matching nodes are found. 240 @raise ValueError: If the string at the location can't be converted to an integer. 241 """ 242 result = readString(parent, name) 243 if result is None: 244 return None 245 else: 246 return int(result)
247
248 -def readFloat(parent, name):
249 """ 250 Returns float contents of the first child with a given name immediately 251 beneath the parent. 252 253 By "immediately beneath" the parent, we mean from among nodes that are 254 direct children of the passed-in parent node. 255 256 @param parent: Parent node to search beneath. 257 @param name: Name of node to search for. 258 259 @return: Float contents of node or C{None} if no matching nodes are found. 260 @raise ValueError: If the string at the location can't be converted to a 261 float value. 262 """ 263 result = readString(parent, name) 264 if result is None: 265 return None 266 else: 267 return float(result)
268
269 -def readBoolean(parent, name):
270 """ 271 Returns boolean contents of the first child with a given name immediately 272 beneath the parent. 273 274 By "immediately beneath" the parent, we mean from among nodes that are 275 direct children of the passed-in parent node. 276 277 The string value of the node must be one of the values in L{VALID_BOOLEAN_VALUES}. 278 279 @param parent: Parent node to search beneath. 280 @param name: Name of node to search for. 281 282 @return: Boolean contents of node or C{None} if no matching nodes are found. 283 @raise ValueError: If the string at the location can't be converted to a boolean. 284 """ 285 result = readString(parent, name) 286 if result is None: 287 return None 288 else: 289 if result in TRUE_BOOLEAN_VALUES: 290 return True 291 elif result in FALSE_BOOLEAN_VALUES: 292 return False 293 else: 294 raise ValueError("Boolean values must be one of %s." % VALID_BOOLEAN_VALUES)
295 296 297 ######################################################################## 298 # Functions for writing values into XML documents 299 ######################################################################## 300
301 -def addContainerNode(xmlDom, parentNode, nodeName):
302 """ 303 Adds a container node as the next child of a parent node. 304 305 @param xmlDom: DOM tree as from C{impl.createDocument()}. 306 @param parentNode: Parent node to create child for. 307 @param nodeName: Name of the new container node. 308 309 @return: Reference to the newly-created node. 310 """ 311 containerNode = xmlDom.createElement(nodeName) 312 parentNode.appendChild(containerNode) 313 return containerNode
314
315 -def addStringNode(xmlDom, parentNode, nodeName, nodeValue):
316 """ 317 Adds a text node as the next child of a parent, to contain a string. 318 319 If the C{nodeValue} is None, then the node will be created, but will be 320 empty (i.e. will contain no text node child). 321 322 @param xmlDom: DOM tree as from C{impl.createDocument()}. 323 @param parentNode: Parent node to create child for. 324 @param nodeName: Name of the new container node. 325 @param nodeValue: The value to put into the node. 326 327 @return: Reference to the newly-created node. 328 """ 329 containerNode = addContainerNode(xmlDom, parentNode, nodeName) 330 if nodeValue is not None: 331 textNode = xmlDom.createTextNode(nodeValue) 332 containerNode.appendChild(textNode) 333 return containerNode
334
335 -def addIntegerNode(xmlDom, parentNode, nodeName, nodeValue):
336 """ 337 Adds a text node as the next child of a parent, to contain an integer. 338 339 If the C{nodeValue} is None, then the node will be created, but will be 340 empty (i.e. will contain no text node child). 341 342 The integer will be converted to a string using "%d". The result will be 343 added to the document via L{addStringNode}. 344 345 @param xmlDom: DOM tree as from C{impl.createDocument()}. 346 @param parentNode: Parent node to create child for. 347 @param nodeName: Name of the new container node. 348 @param nodeValue: The value to put into the node. 349 350 @return: Reference to the newly-created node. 351 """ 352 if nodeValue is None: 353 return addStringNode(xmlDom, parentNode, nodeName, None) 354 else: 355 return addStringNode(xmlDom, parentNode, nodeName, "%d" % nodeValue)
356
357 -def addBooleanNode(xmlDom, parentNode, nodeName, nodeValue):
358 """ 359 Adds a text node as the next child of a parent, to contain a boolean. 360 361 If the C{nodeValue} is None, then the node will be created, but will be 362 empty (i.e. will contain no text node child). 363 364 Boolean C{True}, or anything else interpreted as C{True} by Python, will 365 be converted to a string "Y". Anything else will be converted to a 366 string "N". The result is added to the document via L{addStringNode}. 367 368 @param xmlDom: DOM tree as from C{impl.createDocument()}. 369 @param parentNode: Parent node to create child for. 370 @param nodeName: Name of the new container node. 371 @param nodeValue: The value to put into the node. 372 373 @return: Reference to the newly-created node. 374 """ 375 if nodeValue is None: 376 return addStringNode(xmlDom, parentNode, nodeName, None) 377 else: 378 if nodeValue: 379 return addStringNode(xmlDom, parentNode, nodeName, "Y") 380 else: 381 return addStringNode(xmlDom, parentNode, nodeName, "N")
382 383 384 ######################################################################## 385 # Functions for serializing DOM trees 386 ######################################################################## 387
388 -def serializeDom(xmlDom, indent=3):
389 """ 390 Serializes a DOM tree and returns the result in a string. 391 @param xmlDom: XML DOM tree to serialize 392 @param indent: Number of spaces to indent, as an integer 393 @return: String form of DOM tree, pretty-printed. 394 """ 395 xmlBuffer = StringIO() 396 serializer = Serializer(xmlBuffer, "UTF-8", indent=indent) 397 serializer.serialize(xmlDom) 398 xmlData = xmlBuffer.getvalue() 399 xmlBuffer.close() 400 return xmlData
401
402 -class Serializer(object):
403 404 """ 405 XML serializer class. 406 407 This is a customized serializer that I hacked together based on what I found 408 in the PyXML distribution. Basically, around release 2.7.0, the only reason 409 I still had around a dependency on PyXML was for the PrettyPrint 410 functionality, and that seemed pointless. So, I stripped the PrettyPrint 411 code out of PyXML and hacked bits of it off until it did just what I needed 412 and no more. 413 414 This code started out being called PrintVisitor, but I decided it makes more 415 sense just calling it a serializer. I've made nearly all of the methods 416 private, and I've added a new high-level serialize() method rather than 417 having clients call C{visit()}. 418 419 Anyway, as a consequence of my hacking with it, this can't quite be called a 420 complete XML serializer any more. I ripped out support for HTML and XHTML, 421 and there is also no longer any support for namespaces (which I took out 422 because this dragged along a lot of extra code, and Cedar Backup doesn't use 423 namespaces). However, everything else should pretty much work as expected. 424 425 @copyright: This code, prior to customization, was part of the PyXML 426 codebase, and before that was part of the 4DOM suite developed by 427 Fourthought, Inc. It its original form, it was Copyright (c) 2000 428 Fourthought Inc, USA; All Rights Reserved. 429 """ 430
431 - def __init__(self, stream=sys.stdout, encoding="UTF-8", indent=3):
432 """ 433 Initialize a serializer. 434 @param stream: Stream to write output to. 435 @param encoding: Output encoding. 436 @param indent: Number of spaces to indent, as an integer 437 """ 438 self.stream = stream 439 self.encoding = encoding 440 self._indent = indent * " " 441 self._depth = 0 442 self._inText = 0
443
444 - def serialize(self, xmlDom):
445 """ 446 Serialize the passed-in XML document. 447 @param xmlDom: XML DOM tree to serialize 448 @raise ValueError: If there's an unknown node type in the document. 449 """ 450 self._visit(xmlDom) 451 self.stream.write("\n")
452
453 - def _write(self, text):
454 obj = _encodeText(text, self.encoding) 455 self.stream.write(obj) 456 return
457
458 - def _tryIndent(self):
459 if not self._inText and self._indent: 460 self._write('\n' + self._indent*self._depth) 461 return
462
463 - def _visit(self, node):
464 """ 465 @raise ValueError: If there's an unknown node type in the document. 466 """ 467 if node.nodeType == Node.ELEMENT_NODE: 468 return self._visitElement(node) 469 470 elif node.nodeType == Node.ATTRIBUTE_NODE: 471 return self._visitAttr(node) 472 473 elif node.nodeType == Node.TEXT_NODE: 474 return self._visitText(node) 475 476 elif node.nodeType == Node.CDATA_SECTION_NODE: 477 return self._visitCDATASection(node) 478 479 elif node.nodeType == Node.ENTITY_REFERENCE_NODE: 480 return self._visitEntityReference(node) 481 482 elif node.nodeType == Node.ENTITY_NODE: 483 return self._visitEntity(node) 484 485 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 486 return self._visitProcessingInstruction(node) 487 488 elif node.nodeType == Node.COMMENT_NODE: 489 return self._visitComment(node) 490 491 elif node.nodeType == Node.DOCUMENT_NODE: 492 return self._visitDocument(node) 493 494 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 495 return self._visitDocumentType(node) 496 497 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 498 return self._visitDocumentFragment(node) 499 500 elif node.nodeType == Node.NOTATION_NODE: 501 return self._visitNotation(node) 502 503 # It has a node type, but we don't know how to handle it 504 raise ValueError("Unknown node type: %s" % repr(node))
505
506 - def _visitNodeList(self, node, exclude=None):
507 for curr in node: 508 curr is not exclude and self._visit(curr) 509 return
510
511 - def _visitNamedNodeMap(self, node):
512 for item in node.values(): 513 self._visit(item) 514 return
515
516 - def _visitAttr(self, node):
517 self._write(' ' + node.name) 518 value = node.value 519 text = _translateCDATA(value, self.encoding) 520 text, delimiter = _translateCDATAAttr(text) 521 self.stream.write("=%s%s%s" % (delimiter, text, delimiter)) 522 return
523
524 - def _visitProlog(self):
525 self._write("<?xml version='1.0' encoding='%s'?>" % (self.encoding or 'utf-8')) 526 self._inText = 0 527 return
528
529 - def _visitDocument(self, node):
530 self._visitProlog() 531 node.doctype and self._visitDocumentType(node.doctype) 532 self._visitNodeList(node.childNodes, exclude=node.doctype) 533 return
534
535 - def _visitDocumentFragment(self, node):
536 self._visitNodeList(node.childNodes) 537 return
538
539 - def _visitElement(self, node):
540 self._tryIndent() 541 self._write('<%s' % node.tagName) 542 for attr in node.attributes.values(): 543 self._visitAttr(attr) 544 if len(node.childNodes): 545 self._write('>') 546 self._depth = self._depth + 1 547 self._visitNodeList(node.childNodes) 548 self._depth = self._depth - 1 549 not (self._inText) and self._tryIndent() 550 self._write('</%s>' % node.tagName) 551 else: 552 self._write('/>') 553 self._inText = 0 554 return
555
556 - def _visitText(self, node):
557 text = node.data 558 if self._indent: 559 text.strip() 560 if text: 561 text = _translateCDATA(text, self.encoding) 562 self.stream.write(text) 563 self._inText = 1 564 return
565
566 - def _visitDocumentType(self, doctype):
567 if not doctype.systemId and not doctype.publicId: return 568 self._tryIndent() 569 self._write('<!DOCTYPE %s' % doctype.name) 570 if doctype.systemId and '"' in doctype.systemId: 571 system = "'%s'" % doctype.systemId 572 else: 573 system = '"%s"' % doctype.systemId 574 if doctype.publicId and '"' in doctype.publicId: 575 # We should probably throw an error 576 # Valid characters: <space> | <newline> | <linefeed> | 577 # [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 578 public = "'%s'" % doctype.publicId 579 else: 580 public = '"%s"' % doctype.publicId 581 if doctype.publicId and doctype.systemId: 582 self._write(' PUBLIC %s %s' % (public, system)) 583 elif doctype.systemId: 584 self._write(' SYSTEM %s' % system) 585 if doctype.entities or doctype.notations: 586 self._write(' [') 587 self._depth = self._depth + 1 588 self._visitNamedNodeMap(doctype.entities) 589 self._visitNamedNodeMap(doctype.notations) 590 self._depth = self._depth - 1 591 self._tryIndent() 592 self._write(']>') 593 else: 594 self._write('>') 595 self._inText = 0 596 return
597
598 - def _visitEntity(self, node):
599 """Visited from a NamedNodeMap in DocumentType""" 600 self._tryIndent() 601 self._write('<!ENTITY %s' % (node.nodeName)) 602 node.publicId and self._write(' PUBLIC %s' % node.publicId) 603 node.systemId and self._write(' SYSTEM %s' % node.systemId) 604 node.notationName and self._write(' NDATA %s' % node.notationName) 605 self._write('>') 606 return
607
608 - def _visitNotation(self, node):
609 """Visited from a NamedNodeMap in DocumentType""" 610 self._tryIndent() 611 self._write('<!NOTATION %s' % node.nodeName) 612 node.publicId and self._write(' PUBLIC %s' % node.publicId) 613 node.systemId and self._write(' SYSTEM %s' % node.systemId) 614 self._write('>') 615 return
616
617 - def _visitCDATASection(self, node):
618 self._tryIndent() 619 self._write('<![CDATA[%s]]>' % (node.data)) 620 self._inText = 0 621 return
622
623 - def _visitComment(self, node):
624 self._tryIndent() 625 self._write('<!--%s-->' % (node.data)) 626 self._inText = 0 627 return
628
629 - def _visitEntityReference(self, node):
630 self._write('&%s;' % node.nodeName) 631 self._inText = 1 632 return
633
634 - def _visitProcessingInstruction(self, node):
635 self._tryIndent() 636 self._write('<?%s %s?>' % (node.target, node.data)) 637 self._inText = 0 638 return
639
640 -def _encodeText(text, encoding):
641 """ 642 @copyright: This code, prior to customization, was part of the PyXML 643 codebase, and before that was part of the 4DOM suite developed by 644 Fourthought, Inc. It its original form, it was attributed to Martin v. 645 Löwis and was Copyright (c) 2000 Fourthought Inc, USA; All Rights Reserved. 646 """ 647 encoder = codecs.lookup(encoding)[0] # encode,decode,reader,writer 648 if type(text) is not UnicodeType: 649 text = unicode(text, "utf-8") 650 return encoder(text)[0] # result,size
651
652 -def _translateCDATAAttr(characters):
653 """ 654 Handles normalization and some intelligence about quoting. 655 656 @copyright: This code, prior to customization, was part of the PyXML 657 codebase, and before that was part of the 4DOM suite developed by 658 Fourthought, Inc. It its original form, it was Copyright (c) 2000 659 Fourthought Inc, USA; All Rights Reserved. 660 """ 661 if not characters: 662 return '', "'" 663 if "'" in characters: 664 delimiter = '"' 665 new_chars = re.sub('"', '&quot;', characters) 666 else: 667 delimiter = "'" 668 new_chars = re.sub("'", '&apos;', characters) 669 #FIXME: There's more to normalization 670 #Convert attribute new-lines to character entity 671 # characters is possibly shorter than new_chars (no entities) 672 if "\n" in characters: 673 new_chars = re.sub('\n', '&#10;', new_chars) 674 return new_chars, delimiter
675 676 #Note: Unicode object only for now
677 -def _translateCDATA(characters, encoding='UTF-8', prev_chars='', markupSafe=0):
678 """ 679 @copyright: This code, prior to customization, was part of the PyXML 680 codebase, and before that was part of the 4DOM suite developed by 681 Fourthought, Inc. It its original form, it was Copyright (c) 2000 682 Fourthought Inc, USA; All Rights Reserved. 683 """ 684 CDATA_CHAR_PATTERN = re.compile('[&<]|]]>') 685 CHAR_TO_ENTITY = { '&': '&amp;', '<': '&lt;', ']]>': ']]&gt;', } 686 ILLEGAL_LOW_CHARS = '[\x01-\x08\x0B-\x0C\x0E-\x1F]' 687 ILLEGAL_HIGH_CHARS = '\xEF\xBF[\xBE\xBF]' 688 XML_ILLEGAL_CHAR_PATTERN = re.compile('%s|%s'%(ILLEGAL_LOW_CHARS, ILLEGAL_HIGH_CHARS)) 689 if not characters: 690 return '' 691 if not markupSafe: 692 if CDATA_CHAR_PATTERN.search(characters): 693 new_string = CDATA_CHAR_PATTERN.subn(lambda m, d=CHAR_TO_ENTITY: d[m.group()], characters)[0] 694 else: 695 new_string = characters 696 if prev_chars[-2:] == ']]' and characters[0] == '>': 697 new_string = '&gt;' + new_string[1:] 698 else: 699 new_string = characters 700 #Note: use decimal char entity rep because some browsers are broken 701 #FIXME: This will bomb for high characters. Should, for instance, detect 702 #The UTF-8 for 0xFFFE and put out &#xFFFE; 703 if XML_ILLEGAL_CHAR_PATTERN.search(new_string): 704 new_string = XML_ILLEGAL_CHAR_PATTERN.subn(lambda m: '&#%i;' % ord(m.group()), new_string)[0] 705 new_string = _encodeText(new_string, encoding) 706 return new_string
707