1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of .dtd files (dtdunit) or entire files (dtdfile)
23 these are specific .dtd files for localisation used by mozilla"""
24
25 from translate.storage import base
26 from translate.misc import quote
27
28 import re
29 import sys
30 import warnings
31
40
52
53 -class dtdunit(base.TranslationUnit):
54 """this class represents an entity definition from a dtd file (and possibly associated comments)"""
56 """construct the dtdunit, prepare it for parsing"""
57 super(dtdunit, self).__init__(source)
58 self.comments = []
59 self.unparsedlines = []
60 self.incomment = 0
61 self.inentity = 0
62 self.entity = "FakeEntityOnlyForInitialisationAndTesting"
63 self.source = source
64
65
67 """Sets the definition to the quoted value of source"""
68 self.definition = quotefordtd(source)
69
71 """gets the unquoted source string"""
72 return unquotefromdtd(self.definition)
73 source = property(getsource, setsource)
74
80
82 """gets the unquoted target string"""
83 return unquotefromdtd(self.definition)
84 target = property(gettarget, settarget)
85
87 """returns whether this dtdunit doesn't actually have an entity definition"""
88
89
90 return self.entity is None
91
93 """read the first dtd element from the source code into this object, return linesprocessed"""
94 self.comments = []
95
96 self.locfilenotes = self.comments
97 self.locgroupstarts = self.comments
98 self.locgroupends = self.comments
99 self.locnotes = self.comments
100
101
102
103
104
105 self.entity = None
106 self.definition = ''
107 if not dtdsrc:
108 return 0
109 lines = dtdsrc.split("\n")
110 linesprocessed = 0
111 comment = ""
112 for line in lines:
113 line += "\n"
114 linesprocessed += 1
115
116 if not self.incomment:
117 if (line.find('<!--') != -1):
118 self.incomment = 1
119 self.continuecomment = 0
120
121 (comment, dummy) = quote.extract(line,"<!--","-->",None,0)
122 if comment.find('LOCALIZATION NOTE') != -1:
123 l = quote.findend(comment,'LOCALIZATION NOTE')
124 while (comment[l] == ' '): l += 1
125 if comment.find('FILE',l) == l:
126 self.commenttype = "locfile"
127 elif comment.find('BEGIN',l) == l:
128 self.commenttype = "locgroupstart"
129 elif comment.find('END',l) == l:
130 self.commenttype = "locgroupend"
131 else:
132 self.commenttype = "locnote"
133 else:
134
135 self.commenttype = "comment"
136
137 if self.incomment:
138
139 (comment, self.incomment) = quote.extract(line,"<!--","-->",None,self.continuecomment)
140
141 self.continuecomment = self.incomment
142
143 line = line.replace(comment, "", 1)
144
145 if not self.incomment:
146 if line.isspace():
147 comment += line
148 line = ''
149 else:
150 comment += '\n'
151
152
153
154
155
156
157
158 commentpair = (self.commenttype,comment)
159 if self.commenttype == "locfile":
160 self.locfilenotes.append(commentpair)
161 elif self.commenttype == "locgroupstart":
162 self.locgroupstarts.append(commentpair)
163 elif self.commenttype == "locgroupend":
164 self.locgroupends.append(commentpair)
165 elif self.commenttype == "locnote":
166 self.locnotes.append(commentpair)
167 elif self.commenttype == "comment":
168 self.comments.append(commentpair)
169
170 if not self.inentity and not self.incomment:
171 entitypos = line.find('<!ENTITY')
172 if entitypos != -1:
173 self.inentity = 1
174 beforeentity = line[:entitypos].strip()
175 if beforeentity.startswith("#"):
176 self.hashprefix = beforeentity
177 self.entitypart = "start"
178 else:
179 self.unparsedlines.append(line)
180
181 if self.inentity:
182 if self.entitypart == "start":
183
184 e = quote.findend(line,'<!ENTITY')
185 line = line[e:]
186 self.entitypart = "name"
187 self.entitytype = "internal"
188 if self.entitypart == "name":
189 e = 0
190 while (e < len(line) and line[e].isspace()): e += 1
191 self.entity = ''
192 if (e < len(line) and line[e] == '%'):
193 self.entitytype = "external"
194 self.entityparameter = ""
195 e += 1
196 while (e < len(line) and line[e].isspace()): e += 1
197 while (e < len(line) and not line[e].isspace()):
198 self.entity += line[e]
199 e += 1
200 while (e < len(line) and line[e].isspace()): e += 1
201 if self.entity:
202 if self.entitytype == "external":
203 self.entitypart = "parameter"
204 else:
205 self.entitypart = "definition"
206
207 if e == len(line):
208 self.entityhelp = None
209 continue
210 elif self.entitypart == "definition":
211 self.entityhelp = (e,line[e])
212 self.instring = 0
213 if self.entitypart == "parameter":
214 paramstart = e
215 while (e < len(line) and line[e].isalnum()): e += 1
216 self.entityparameter += line[paramstart:e]
217 while (e < len(line) and line[e].isspace()): e += 1
218 line = line[e:]
219 e = 0
220 if not line:
221 continue
222 if line[0] in ('"', "'"):
223 self.entitypart = "definition"
224 self.entityhelp = (e,line[e])
225 self.instring = 0
226 if self.entitypart == "definition":
227 if self.entityhelp is None:
228 e = 0
229 while (e < len(line) and line[e].isspace()): e += 1
230 if e == len(line):
231 continue
232 self.entityhelp = (e,line[e])
233 self.instring = 0
234
235 e = self.entityhelp[0]
236 if (self.entityhelp[1] == "'"):
237 (defpart,self.instring) = quote.extract(line[e:],"'","'",startinstring=self.instring,allowreentry=False)
238 elif (self.entityhelp[1] == '"'):
239 (defpart,self.instring) = quote.extract(line[e:],'"','"',startinstring=self.instring,allowreentry=False)
240 else:
241 raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1]))
242
243 self.entityhelp = (0,self.entityhelp[1])
244 self.definition += defpart
245 if not self.instring:
246 self.inentity = 0
247 break
248
249
250 if 0:
251 for attr in dir(self):
252 r = repr(getattr(self,attr))
253 if len(r) > 60: r = r[:57]+"..."
254 self.comments.append(("comment","self.%s = %s" % (attr,r) ))
255 return linesprocessed
256
263
265 """convert the dtd entity back to string form"""
266 lines = []
267 lines.extend([comment for commenttype,comment in self.comments])
268 lines.extend(self.unparsedlines)
269 if self.isnull():
270 result = "".join(lines)
271 return result.rstrip() + "\n"
272
273
274
275
276 if len(self.entity) > 0:
277 if getattr(self, 'entitytype', None) == 'external':
278 entityline = '<!ENTITY % '+self.entity+' '+self.entityparameter+' '+self.definition+'>'
279 else:
280 entityline = '<!ENTITY '+self.entity+' '+self.definition+'>'
281 if getattr(self, 'hashprefix', None):
282 entityline = self.hashprefix + " " + entityline
283 if isinstance(entityline, unicode):
284 entityline = entityline.encode('UTF-8')
285 lines.append(entityline+'\n')
286 return "".join(lines)
287
288 -class dtdfile(base.TranslationStore):
289 """this class represents a .dtd file, made up of dtdunits"""
290 UnitClass = dtdunit
300
301 - def parse(self, dtdsrc):
302 """read the source code of a dtd file in and include them as dtdunits in self.units (any existing units are lost)"""
303 self.units = []
304 start = 0
305 end = 0
306 lines = dtdsrc.split("\n")
307 while end < len(lines):
308 if (start == end): end += 1
309 foundentity = 0
310 while end < len(lines):
311 if end >= len(lines):
312 break
313 if lines[end].find('<!ENTITY') > -1:
314 foundentity = 1
315 if foundentity and re.match("[\"']\s*>", lines[end]):
316 end += 1
317 break
318 end += 1
319
320
321 linesprocessed = 1
322 while linesprocessed >= 1:
323 newdtd = dtdunit()
324 try:
325 linesprocessed = newdtd.parse("\n".join(lines[start:end]))
326 if linesprocessed >= 1 and (not newdtd.isnull() or newdtd.unparsedlines):
327 self.units.append(newdtd)
328 except Exception, e:
329 warnings.warn("%s\nError occured between lines %d and %d:\n%s" % (e, start+1, end, "\n".join(lines[start:end])))
330 start += linesprocessed
331
338
340 """convert the units back to source"""
341 sources = [str(dtd) for dtd in self.units]
342 return "".join(sources)
343
345 """makes self.index dictionary keyed on entities"""
346 self.index = {}
347 for dtd in self.units:
348 if not dtd.isnull():
349 self.index[dtd.entity] = dtd
350
352 for dtd in self.units:
353 lines = dtd.definition.split("\n")
354 if len(lines) > 1:
355 definition = lines[0]
356 for line in lines[1:]:
357 if definition[-1:].isspace() or line[:1].isspace():
358 definition += line
359 else:
360 definition += " " + line
361 dtd.definition = definition
362
363 if __name__ == "__main__":
364 import sys
365 d = dtdfile(sys.stdin)
366 d.rewrap()
367 sys.stdout.write(str(d))
368