1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 from translate.misc.multistring import multistring
26 from translate.misc import quote
27 from translate.misc import textwrap
28 from translate.lang import data
29 from translate.storage import pocommon, base
30 import re
31 import copy
32 import cStringIO
33 import poparser
34
35 lsep = "\n#: "
36 """Seperator for #: entries"""
37
38
39
40 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
41 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
42
44 """Escapes a line for po format. assumes no \n occurs in the line.
45
46 @param line: unescaped text
47 """
48 special_locations = []
49 for special_key in po_escape_map:
50 special_locations.extend(quote.find_all(line, special_key))
51 special_locations = dict.fromkeys(special_locations).keys()
52 special_locations.sort()
53 escaped_line = ""
54 last_location = 0
55 for location in special_locations:
56 escaped_line += line[last_location:location]
57 escaped_line += po_escape_map[line[location:location+1]]
58 last_location = location+1
59 escaped_line += line[last_location:]
60 return escaped_line
61
65
67 """Wrap text for po files."""
68 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
69
70
71 if len(wrappedlines) > 1:
72 for index, line in enumerate(wrappedlines[1:]):
73 if line.startswith(' '):
74
75 wrappedlines[index+1] = line[1:]
76
77
78 wrappedlines[index] += ' '
79 return wrappedlines
80
82 """quotes the given text for a PO file, returning quoted and escaped lines"""
83 polines = []
84 if text is None:
85 return polines
86 lines = text.split("\n")
87 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
88 if len(lines) != 2 or lines[1]:
89 polines.extend(['""'])
90 for line in lines[:-1]:
91
92 lns = wrapline(line)
93 if len(lns) > 0:
94 for ln in lns[:-1]:
95 polines.extend(['"' + escapeforpo(ln) + '"'])
96 if lns[-1]:
97 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
98 else:
99 polines.extend(['"\\n"'])
100 if lines[-1]:
101 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
102 return polines
103
105 """Remove quote and unescape line from po file.
106
107 @param line: a quoted line from a po file (msgid or msgstr)
108 """
109 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
110 return extracted
111
114
116 """Tests whether the given encoding is known in the python runtime, or returns utf-8.
117 This function is used to ensure that a valid encoding is always used."""
118 if encoding == "CHARSET" or encoding == None:
119 return 'utf-8'
120 return encoding
121
122
123
124
125
126
127
128
130 return lst == [] or len(lst) == 1 and lst[0] == '""'
131
133 left = string.find('"')
134 right = string.rfind('"')
135 if right > -1:
136 return string[left:right+1]
137 else:
138 return string[left:] + '"'
139
140 -class pounit(pocommon.pounit):
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155 __shallow__ = ['_store']
156
157 - def __init__(self, source=None, encoding="UTF-8"):
158 self._encoding = encodingToUse(encoding)
159 self.obsolete = False
160 self._initallcomments(blankall=True)
161 self.prev_msgctxt = []
162 self.prev_msgid = []
163 self.prev_msgid_plural = []
164 self.msgctxt = []
165 self.msgid = []
166 self.msgid_pluralcomments = []
167 self.msgid_plural = []
168 self.msgstr = []
169 self.obsoletemsgctxt = []
170 self.obsoletemsgid = []
171 self.obsoletemsgid_pluralcomments = []
172 self.obsoletemsgid_plural = []
173 self.obsoletemsgstr = []
174 pocommon.pounit.__init__(self, source)
175
185
193
194 allcomments = property(_get_all_comments)
195
204
222
226
228 """Sets the msgid to the given (unescaped) value.
229
230 @param source: an unescaped source string.
231 """
232 self.msgid, self.msgid_plural = self._set_source_vars(source)
233 source = property(getsource, setsource)
234
236 """Returns the unescaped msgid"""
237 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
238
240 """Sets the msgid to the given (unescaped) value.
241
242 @param source: an unescaped source string.
243 """
244 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
245 prev_source = property(_get_prev_source, _set_prev_source)
246
254
256 """Sets the msgstr to the given (unescaped) value"""
257 self._rich_target = None
258 if isinstance(target, str):
259 target = target.decode(self._encoding)
260 if self.hasplural():
261 if isinstance(target, multistring):
262 target = target.strings
263 elif isinstance(target, basestring):
264 target = [target]
265 elif isinstance(target, (dict, list)):
266 if len(target) == 1:
267 target = target[0]
268 else:
269 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
270 templates = self.msgstr
271 if isinstance(templates, list):
272 templates = {0: templates}
273 if isinstance(target, list):
274 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
275 elif isinstance(target, dict):
276 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
277 else:
278 self.msgstr = quoteforpo(target)
279 target = property(gettarget, settarget)
280
282 """Return comments based on origin value (programmer, developer, source code and translator)"""
283 if origin == None:
284 comments = u"".join([comment[2:] for comment in self.othercomments])
285 comments += u"".join([comment[3:] for comment in self.automaticcomments])
286 elif origin == "translator":
287 comments = u"".join ([comment[2:] for comment in self.othercomments])
288 elif origin in ["programmer", "developer", "source code"]:
289 comments = u"".join([comment[3:] for comment in self.automaticcomments])
290 else:
291 raise ValueError("Comment type not valid")
292
293 return comments[:-1]
294
295 - def addnote(self, text, origin=None, position="append"):
296 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
297
298 if not (text and text.strip()):
299 return
300 text = data.forceunicode(text)
301 commentlist = self.othercomments
302 linestart = "# "
303 autocomments = False
304 if origin in ["programmer", "developer", "source code"]:
305 autocomments = True
306 commentlist = self.automaticcomments
307 linestart = "#. "
308 text = text.split("\n")
309 if position == "append":
310 commentlist += [linestart + line + "\n" for line in text]
311 else:
312 newcomments = [linestart + line + "\n" for line in text]
313 newcomments += [line for line in commentlist]
314 if autocomments:
315 self.automaticcomments = newcomments
316 else:
317 self.othercomments = newcomments
318
320 """Remove all the translator's notes (other comments)"""
321 self.othercomments = []
322
324
325 new_unit = self.__class__()
326
327
328 shallow = set(self.__shallow__)
329
330 for key, value in self.__dict__.iteritems():
331 if key not in shallow:
332 setattr(new_unit, key, copy.deepcopy(value))
333
334 for key in set(shallow):
335 setattr(new_unit, key, getattr(self, key))
336
337
338 memo[id(self)] = self
339
340 return new_unit
341
343 return copy.deepcopy(self)
344
350
352 if isinstance(self.msgstr, dict):
353 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()])
354 return len(combinedstr.strip())
355 else:
356 return len(unquotefrompo(self.msgstr).strip())
357
358 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
359 """Merges the otherpo (with the same msgid) into this one.
360
361 Overwrite non-blank self.msgstr only if overwrite is True
362 merge comments only if comments is True
363 """
364
365 def mergelists(list1, list2, split=False):
366
367 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
368 for position, item in enumerate(list1):
369 if isinstance(item, str):
370 list1[position] = item.decode("utf-8")
371 for position, item in enumerate(list2):
372 if isinstance(item, str):
373 list2[position] = item.decode("utf-8")
374
375
376 lineend = ""
377 if list1 and list1[0]:
378 for candidate in ["\n", "\r", "\n\r"]:
379 if list1[0].endswith(candidate):
380 lineend = candidate
381 if not lineend:
382 lineend = ""
383 else:
384 lineend = "\n"
385
386
387 if split:
388 splitlist1 = []
389 splitlist2 = []
390 prefix = "#"
391 for item in list1:
392 splitlist1.extend(item.split()[1:])
393 prefix = item.split()[0]
394 for item in list2:
395 splitlist2.extend(item.split()[1:])
396 prefix = item.split()[0]
397 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
398 else:
399
400 if list1 != list2:
401 for item in list2:
402 if lineend:
403 item = item.rstrip() + lineend
404
405 if item not in list1 or len(item) < 5:
406 list1.append(item)
407 if not isinstance(otherpo, pounit):
408 super(pounit, self).merge(otherpo, overwrite, comments)
409 return
410 if comments:
411 mergelists(self.othercomments, otherpo.othercomments)
412 mergelists(self.typecomments, otherpo.typecomments)
413 if not authoritative:
414
415
416 mergelists(self.automaticcomments, otherpo.automaticcomments)
417 mergelists(self.msgidcomments, otherpo.msgidcomments)
418 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
419 if not self.istranslated() or overwrite:
420
421 if self._extract_msgidcomments(otherpo.target):
422 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '')
423 self.target = otherpo.target
424 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
425 self.markfuzzy()
426 else:
427 self.markfuzzy(otherpo.isfuzzy())
428 elif not otherpo.istranslated():
429 if self.source != otherpo.source:
430 self.markfuzzy()
431 else:
432 if self.target != otherpo.target:
433 self.markfuzzy()
434
436
437
438 return (is_null(self.msgid)
439 and not is_null(self.msgstr)
440 and self.msgidcomments == []
441 and is_null(self.msgctxt)
442 )
443
445 if self.isheader() or len(self.msgidcomments):
446 return False
447 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
448 return True
449 return False
450
451
452
453
458
466
476
479
482
485
487 """Makes this unit obsolete"""
488 self.obsolete = True
489 if self.msgctxt:
490 self.obsoletemsgctxt = self.msgctxt
491 if self.msgid:
492 self.obsoletemsgid = self.msgid
493 self.msgid = []
494 if self.msgidcomments:
495 self.obsoletemsgidcomments = self.msgidcomments
496 self.msgidcomments = []
497 if self.msgid_plural:
498 self.obsoletemsgid_plural = self.msgid_plural
499 self.msgid_plural = []
500 if self.msgstr:
501 self.obsoletemsgstr = self.msgstr
502 self.msgstr = []
503 self.sourcecomments = []
504 self.automaticcomments = []
505
507 """Makes an obsolete unit normal"""
508 self.obsolete = False
509 if self.obsoletemsgctxt:
510 self.msgid = self.obsoletemsgctxt
511 self.obsoletemsgctxt = []
512 if self.obsoletemsgid:
513 self.msgid = self.obsoletemsgid
514 self.obsoletemsgid = []
515 if self.obsoletemsgidcomments:
516 self.msgidcomments = self.obsoletemsgidcomments
517 self.obsoletemsgidcomments = []
518 if self.obsoletemsgid_plural:
519 self.msgid_plural = self.obsoletemsgid_plural
520 self.obsoletemsgid_plural = []
521 if self.obsoletemsgstr:
522 self.msgstr = self.obsoletemsgstr
523 self.obsoletemgstr = []
524
526 """returns whether this pounit contains plural strings..."""
527 return len(self.msgid_plural) > 0
528
531
533 if isinstance(partlines, dict):
534 partkeys = partlines.keys()
535 partkeys.sort()
536 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
537 partstr = partname + " "
538 partstartline = 0
539 if len(partlines) > 0 and len(partcomments) == 0:
540 partstr += partlines[0]
541 partstartline = 1
542 elif len(partcomments) > 0:
543 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
544
545 partstr += partlines[0] + '\n'
546
547 if len(partlines) > 1:
548 partstartline += 1
549 else:
550
551 partstr += '""\n'
552
553 if len(partcomments) > 1:
554 combinedcomment = []
555 for comment in partcomments:
556 comment = unquotefrompo([comment])
557 if comment.startswith("_:"):
558 comment = comment[len("_:"):]
559 if comment.endswith("\\n"):
560 comment = comment[:-len("\\n")]
561
562 combinedcomment.append(comment)
563 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
564
565 partstr += "\n".join(partcomments)
566 partstr = quote.rstripeol(partstr)
567 else:
568 partstr += '""'
569 partstr += '\n'
570
571 for partline in partlines[partstartline:]:
572 partstr += partline + '\n'
573 return partstr
574
576 """encodes unicode strings and returns other strings unchanged"""
577 if isinstance(output, unicode):
578 encoding = encodingToUse(getattr(self, "encoding", "UTF-8"))
579 return output.encode(encoding)
580 return output
581
583 """convert to a string. double check that unicode is handled somehow here"""
584 output = self._getoutput()
585 return self._encodeifneccessary(output)
586
588 """return this po element as a string"""
589 def add_prev_msgid_lines(lines, header, var):
590 if len(var) > 0:
591 lines.append("#| %s %s\n" % (header, var[0]))
592 lines.extend("#| %s\n" % line for line in var[1:])
593
594 def add_prev_msgid_info(lines):
595 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt)
596 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid)
597 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural)
598
599 lines = []
600 lines.extend(self.othercomments)
601 if self.isobsolete():
602 lines.extend(self.typecomments)
603 obsoletelines = []
604 if self.obsoletemsgctxt:
605 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
606 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
607 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
608 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
609 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
610 for index, obsoleteline in enumerate(obsoletelines):
611
612 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
613 lines.extend(obsoletelines)
614 lines = [self._encodeifneccessary(line) for line in lines]
615 return "".join(lines)
616
617
618 if is_null(self.msgid):
619 if not (self.isheader() or self.getcontext() or self.sourcecomments):
620 return "".join(lines)
621 lines.extend(self.automaticcomments)
622 lines.extend(self.sourcecomments)
623 lines.extend(self.typecomments)
624 add_prev_msgid_info(lines)
625 if self.msgctxt:
626 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt))
627 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments))
628 if self.msgid_plural or self.msgid_pluralcomments:
629 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
630 lines.append(self._getmsgpartstr("msgstr", self.msgstr))
631 lines = [self._encodeifneccessary(line) for line in lines]
632 postr = "".join(lines)
633 return postr
634
636 """Get a list of locations from sourcecomments in the PO unit
637
638 rtype: List
639 return: A list of the locations with '#: ' stripped
640
641 """
642 locations = []
643 for sourcecomment in self.sourcecomments:
644 locations += quote.rstripeol(sourcecomment)[3:].split()
645 return locations
646
648 """Add a location to sourcecomments in the PO unit
649
650 @param location: Text location e.g. 'file.c:23' does not include #:
651 @type location: String
652
653 """
654 self.sourcecomments.append("#: %s\n" % location)
655
666
672
673 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
674
675 - def getcontext(self):
676 """Get the message context."""
677 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
678
680 """Returns a unique identifier for this unit."""
681 context = self.getcontext()
682
683
684
685
686
687 id = self.source
688 if self.msgidcomments:
689 id = u"_: %s\n%s" % (context, id)
690 elif context:
691 id = u"%s\04%s" % (context, id)
692 return id
693
694 -class pofile(pocommon.pofile):
695 """A .po file containing various units"""
696 UnitClass = pounit
697
699 """Construct a pofile, optionally reading in from inputfile.
700 encoding can be specified but otherwise will be read from the PO header"""
701 self.UnitClass = unitclass
702 pocommon.pofile.__init__(self, unitclass=unitclass)
703 self.units = []
704 self.filename = ''
705 self._encoding = encodingToUse(encoding)
706 if inputfile is not None:
707 self.parse(inputfile)
708
710 """Deprecated: changes the encoding on the file."""
711
712
713
714 raise DeprecationWarning
715
716 self._encoding = encodingToUse(newencoding)
717 if not self.units:
718 return
719 header = self.header()
720 if not header or header.isblank():
721 return
722 charsetline = None
723 headerstr = unquotefrompo(header.msgstr)
724 for line in headerstr.split("\n"):
725 if not ":" in line:
726 continue
727 key, value = line.strip().split(":", 1)
728 if key.strip() != "Content-Type":
729 continue
730 charsetline = line
731 if charsetline is None:
732 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
733 else:
734 charset = re.search("charset=([^ ]*)", charsetline)
735 if charset is None:
736 newcharsetline = charsetline
737 if not newcharsetline.strip().endswith(";"):
738 newcharsetline += ";"
739 newcharsetline += " charset=%s" % self._encoding
740 else:
741 charset = charset.group(1)
742 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1)
743 headerstr = headerstr.replace(charsetline, newcharsetline, 1)
744 header.msgstr = quoteforpo(headerstr)
745
747 """Parses the given file or file source string."""
748 try:
749 if hasattr(input, 'name'):
750 self.filename = input.name
751 elif not getattr(self, 'filename', ''):
752 self.filename = ''
753 if isinstance(input, str):
754 input = cStringIO.StringIO(input)
755 poparser.parse_units(poparser.ParseState(input, pounit), self)
756 except Exception, e:
757 raise base.ParseError(e)
758
760 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
761
762
763 id_dict = {}
764 uniqueunits = []
765
766
767 markedpos = []
768 def addcomment(thepo):
769 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
770 markedpos.append(thepo)
771 for thepo in self.units:
772 id = thepo.getid()
773 if thepo.isheader() and not thepo.getlocations():
774
775 uniqueunits.append(thepo)
776 elif id in id_dict:
777 if duplicatestyle == "merge":
778 if id:
779 id_dict[id].merge(thepo)
780 else:
781 addcomment(thepo)
782 uniqueunits.append(thepo)
783 elif duplicatestyle == "msgctxt":
784 origpo = id_dict[id]
785 if origpo not in markedpos:
786 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
787 markedpos.append(thepo)
788 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
789 uniqueunits.append(thepo)
790 else:
791 if not id:
792 if duplicatestyle == "merge":
793 addcomment(thepo)
794 else:
795 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
796 id_dict[id] = thepo
797 uniqueunits.append(thepo)
798 self.units = uniqueunits
799
801 """Convert to a string. double check that unicode is handled somehow here"""
802 output = self._getoutput()
803 if isinstance(output, unicode):
804 return output.encode(getattr(self, "encoding", "UTF-8"))
805 return output
806
808 """convert the units back to lines"""
809 lines = []
810 for unit in self.units:
811 unitsrc = str(unit) + "\n"
812 lines.append(unitsrc)
813 lines = "".join(self.encode(lines)).rstrip()
814
815 if lines:
816 lines += "\n"
817 return lines
818
820 """encode any unicode strings in lines in self._encoding"""
821 newlines = []
822 encoding = self._encoding
823 if encoding is None or encoding.lower() == "charset":
824 encoding = 'UTF-8'
825 for line in lines:
826 if isinstance(line, unicode):
827 line = line.encode(encoding)
828 newlines.append(line)
829 return newlines
830
832 """decode any non-unicode strings in lines with self._encoding"""
833 newlines = []
834 for line in lines:
835 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
836 try:
837 line = line.decode(self._encoding)
838 except UnicodeError, e:
839 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
840 newlines.append(line)
841 return newlines
842
844 for unit in self.units:
845 if not (unit.isheader() or unit.isobsolete()):
846 yield unit
847