1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """module that provides modified DOM functionality for our needs
24
25 Note that users of ourdom should ensure that no code might still use classes
26 directly from minidom, like minidom.Element, minidom.Document or methods such
27 as minidom.parseString, since the functionality provided here will not be in
28 those objects.
29 """
30
31 from xml.dom import minidom
32 from xml.dom import expatbuilder
33
34
35
37 """A replacement for writexml that formats it like typical XML files.
38 Nodes are intendented but text nodes, where whitespace can be significant, are not indented."""
39
40
41
42 writer.write(indent+"<" + self.tagName)
43
44 attrs = self._get_attributes()
45 a_names = attrs.keys()
46 a_names.sort()
47
48 for a_name in a_names:
49 writer.write(" %s=\"" % a_name)
50 minidom._write_data(writer, attrs[a_name].value)
51 writer.write("\"")
52 if self.childNodes:
53
54
55
56
57
58
59 haveText = False
60 for childNode in self.childNodes:
61 if childNode.nodeType == self.TEXT_NODE and childNode.data.strip():
62 haveText = True
63 break
64 if haveText:
65 writer.write(">")
66 for node in self.childNodes:
67 node.writexml(writer, "", "", "")
68 writer.write("</%s>%s" % (self.tagName, newl))
69 else:
70
71 writer.write(">%s"%(newl))
72 for node in self.childNodes:
73 if node.nodeType != self.TEXT_NODE:
74 node.writexml(writer, indent+addindent, addindent, newl)
75 writer.write("%s</%s>%s" % (indent, self.tagName, newl))
76 else:
77 writer.write("/>%s"%(newl))
78
80 """A reimplementation of getElementsByTagName as an iterator.
81
82 Note that this is not compatible with getElementsByTagName that returns a
83 list, therefore, the class below exposes this through yieldElementsByTagName"""
84
85 for node in parent.childNodes:
86 if node.nodeType == minidom.Node.ELEMENT_NODE and \
87 (name == "*" or node.tagName == name):
88 yield node
89 if node.hasChildNodes():
90 for othernode in node.getElementsByTagName(name):
91 yield othernode
92
94 """limits the search to within tags occuring in onlysearch"""
95 for node in parent.childNodes:
96 if node.nodeType == minidom.Node.ELEMENT_NODE and \
97 (name == "*" or node.tagName == name):
98 yield node
99 if node.nodeType == minidom.Node.ELEMENT_NODE and node.tagName in onlysearch:
100 for node in node.searchElementsByTagName(name, onlysearch):
101 yield node
102
104 results = node.yieldElementsByTagName(name)
105
106
107
108
109
110 try:
111 result = results.next()
112 return result
113 except StopIteration:
114 return None
115
116 -def getnodetext(node):
117 """returns the node's text by iterating through the child nodes"""
118 if node is None:
119 return ""
120 return "".join([t.data for t in node.childNodes if t.nodeType == t.TEXT_NODE])
121
122
123
127
133 - def writexml(self, writer, indent, addindent, newl):
135
151
152 theDOMImplementation = DOMImplementation()
153
154
155
158 """Free all data structures used during DOM construction."""
159 self.document = theDOMImplementation.createDocument(
160 expatbuilder.EMPTY_NAMESPACE, None, None)
161 self.curNode = self.document
162 self._elem_info = self.document._elem_info
163 self._cdata = False
164 self._initNamespaces()
165
167
168
169 if ' ' in name:
170 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name)
171 else:
172 uri = expatbuilder.EMPTY_NAMESPACE
173 qname = name
174 localname = None
175 prefix = expatbuilder.EMPTY_PREFIX
176 node = Element(qname, uri, prefix, localname)
177 node.ownerDocument = self.document
178 expatbuilder._append_child(self.curNode, node)
179 self.curNode = node
180
181 if self._ns_ordered_prefixes:
182 for prefix, uri in self._ns_ordered_prefixes:
183 if prefix:
184 a = minidom.Attr(expatbuilder._intern(self, 'xmlns:' + prefix),
185 expatbuilder.XMLNS_NAMESPACE, prefix, "xmlns")
186 else:
187 a = minidom.Attr("xmlns", expatbuilder.XMLNS_NAMESPACE,
188 "xmlns", expatbuilder.EMPTY_PREFIX)
189 d = a.childNodes[0].__dict__
190 d['data'] = d['nodeValue'] = uri
191 d = a.__dict__
192 d['value'] = d['nodeValue'] = uri
193 d['ownerDocument'] = self.document
194 expatbuilder._set_attribute_node(node, a)
195 del self._ns_ordered_prefixes[:]
196
197 if attributes:
198 _attrs = node._attrs
199 _attrsNS = node._attrsNS
200 for i in range(0, len(attributes), 2):
201 aname = attributes[i]
202 value = attributes[i+1]
203 if ' ' in aname:
204 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, aname)
205 a = minidom.Attr(qname, uri, localname, prefix)
206 _attrs[qname] = a
207 _attrsNS[(uri, localname)] = a
208 else:
209 a = minidom.Attr(aname, expatbuilder.EMPTY_NAMESPACE,
210 aname, expatbuilder.EMPTY_PREFIX)
211 _attrs[aname] = a
212 _attrsNS[(expatbuilder.EMPTY_NAMESPACE, aname)] = a
213 d = a.childNodes[0].__dict__
214 d['data'] = d['nodeValue'] = value
215 d = a.__dict__
216 d['ownerDocument'] = self.document
217 d['value'] = d['nodeValue'] = value
218 d['ownerElement'] = node
219
220 if __debug__:
221
222
223
224
225
227 curNode = self.curNode
228 if ' ' in name:
229 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name)
230 assert (curNode.namespaceURI == uri
231 and curNode.localName == localname
232 and curNode.prefix == prefix), \
233 "element stack messed up! (namespace)"
234 else:
235 assert curNode.nodeName == name, \
236 "element stack messed up - bad nodeName"
237 assert curNode.namespaceURI == expatbuilder.EMPTY_NAMESPACE, \
238 "element stack messed up - bad namespaceURI"
239 self.curNode = curNode.parentNode
240 self._finish_end_element(curNode)
241
242
243
244 -def parse(file, parser=None, bufsize=None):
245 """Parse a file into a DOM by filename or file object."""
246 builder = ExpatBuilderNS()
247 if isinstance(file, basestring):
248 fp = open(file, 'rb')
249 try:
250 result = builder.parseFile(fp)
251 finally:
252 fp.close()
253 else:
254 result = builder.parseFile(file)
255 return result
256
261