1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
24
25 import re
26
27 from translate.storage import base
28 from translate.lang import data
29 try:
30 from lxml import etree
31 except ImportError, e:
32 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
33
35 """joins together the text from all the text nodes in the nodelist and their children"""
36
37
38 if node:
39 return node.xpath("string()")
40 else:
41 return data.forceunicode(node.text) or u""
42
43
45 """generate match objects for all @re_obj matches in @text."""
46 start = 0
47 max = len(text)
48 while start < max:
49 m = re_obj.search(text, start)
50 if not m: break
51 yield m
52 start = m.end()
53
54 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)', '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
55 re_placeholders = [re.compile(ph) for ph in placeholders]
65
66 XML_NS = 'http://www.w3.org/XML/1998/namespace'
67
69 """Sets the xml:lang attribute on node"""
70 node.set("{%s}lang" % XML_NS, lang)
71
73 """Sets the xml:space attribute on node"""
74 node.set("{%s}space" % XML_NS, value)
75
77 """Returns name in Clark notation within the given namespace.
78
79 For example namespaced("source") in an XLIFF document might return
80 {urn:oasis:names:tc:xliff:document:1.1}source
81 This is needed throughout lxml.
82 """
83 if namespace:
84 return "{%s}%s" % (namespace, name)
85 else:
86 return name
87
89 """A single unit in the file.
90 Provisional work is done to make several languages possible."""
91
92
93 rootNode = ""
94
95 languageNode = ""
96
97 textNode = ""
98
99 namespace = None
100
101 - def __init__(self, source, empty=False):
102 """Constructs a unit containing the given source string"""
103 if empty:
104 return
105 self.xmlelement = etree.Element(self.rootNode)
106
107
108 super(LISAunit, self).__init__(source)
109
111 """Compares two units"""
112 languageNodes = self.getlanguageNodes()
113 otherlanguageNodes = other.getlanguageNodes()
114 if len(languageNodes) != len(otherlanguageNodes):
115 return False
116 for i in range(len(languageNodes)):
117 mytext = self.getNodeText(languageNodes[i])
118 othertext = other.getNodeText(otherlanguageNodes[i])
119 if mytext != othertext:
120
121 return False
122 return True
123
125 """Returns name in Clark notation.
126
127 For example namespaced("source") in an XLIFF document might return
128 {urn:oasis:names:tc:xliff:document:1.1}source
129 This is needed throughout lxml.
130 """
131 return namespaced(self.namespace, name)
132
133 - def setsource(self, source, sourcelang='en'):
141
144 source = property(getsource, setsource)
145
146 - def settarget(self, text, lang='xx', append=False):
163
172 target = property(gettarget, settarget)
173
175 """Returns a xml Element setup with given parameters to represent a
176 single language entry. Has to be overridden."""
177 return None
178
180 """Create the text node in parent containing all the ph tags"""
181 matches = _getPhMatches(text)
182 if not matches:
183 parent.text = text
184 return
185
186
187 start = matches[0].start()
188 pretext = text[:start]
189 if pretext:
190 parent.text = pretext
191 lasttag = parent
192 for i, m in enumerate(matches):
193
194 pretext = text[start:m.start()]
195
196 if pretext:
197 lasttag.tail = pretext
198
199 phnode = etree.SubElement(parent, "ph")
200 phnode.set("id", str(i+1))
201 phnode.text = m.group()
202 lasttag = phnode
203 start = m.end()
204
205 if text[start:]:
206 lasttag.tail = text[start:]
207
209 """Returns a list of all nodes that contain per language information."""
210 return self.xmlelement.findall(self.namespaced(self.languageNode))
211
213 """Retrieves a languageNode either by language or by index"""
214 if lang is None and index is None:
215 raise KeyError("No criterea for languageNode given")
216 languageNodes = self.getlanguageNodes()
217 if lang:
218 for set in languageNodes:
219 if set.get("{%s}lang" % XML_NS) == lang:
220 return set
221 else:
222 if index >= len(languageNodes):
223 return None
224 else:
225 return languageNodes[index]
226 return None
227
228 - def getNodeText(self, languageNode):
229 """Retrieves the term from the given languageNode"""
230 if languageNode is None:
231 return None
232 if self.textNode:
233 terms = languageNode.findall('.//%s' % self.namespaced(self.textNode))
234 if len(terms) == 0:
235 return None
236 return getText(terms[0])
237 else:
238 return getText(languageNode)
239
241 return etree.tostring(self.xmlelement, pretty_print=True, encoding='utf-8')
242
244 term = cls(None, empty=True)
245 term.xmlelement = element
246 return term
247 createfromxmlElement = classmethod(createfromxmlElement)
248
250 """A class representing a file store for one of the LISA file formats."""
251 UnitClass = LISAunit
252
253 rootNode = ""
254
255 bodyNode = ""
256
257 XMLskeleton = ""
258
259 namespace = None
260
261 - def __init__(self, inputfile=None, sourcelanguage='en', targetlanguage=None, unitclass=None):
273
275 """Method to be overridden to initialise headers, etc."""
276 pass
277
279 """Returns name in Clark notation.
280
281 For example namespaced("source") in an XLIFF document might return
282 {urn:oasis:names:tc:xliff:document:1.1}source
283 This is needed throughout lxml.
284 """
285 return namespaced(self.namespace, name)
286
287 - def initbody(self):
288 """Initialises self.body so it never needs to be retrieved from the XML again."""
289 self.namespace = self.document.getroot().nsmap.get(None, None)
290 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
291
293 """Sets the source language for this store"""
294 self.sourcelanguage = sourcelanguage
295
297 """Sets the target language for this store"""
298 self.targetlanguage = targetlanguage
299
301
302 """Adds and returns a new unit with the given string as first entry."""
303 newunit = self.UnitClass(source)
304 self.addunit(newunit)
305 return newunit
306
311
313 """Converts to a string containing the file's XML"""
314 return etree.tostring(self.document, pretty_print=True, xml_declaration=True, encoding='utf-8')
315
317 """Populates this object from the given xml string"""
318 if not hasattr(self, 'filename'):
319 self.filename = getattr(xml, 'name', '')
320 if hasattr(xml, "read"):
321 xml.seek(0)
322 posrc = xml.read()
323 xml = posrc
324 self.document = etree.fromstring(xml).getroottree()
325 self.encoding = self.document.docinfo.encoding
326 self.initbody()
327 assert self.document.getroot().tag == self.namespaced(self.rootNode)
328 termEntries = self.body.findall('.//%s' % self.namespaced(self.UnitClass.rootNode))
329 if termEntries is None:
330 return
331 for entry in termEntries:
332 term = self.UnitClass.createfromxmlElement(entry)
333 term.namespace = self.namespace
334 self.units.append(term)
335