1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of .po files (pounit) or entire files (pofile)
23 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
24
25 from __future__ import generators
26 from translate.misc.multistring import multistring
27 from translate.misc import quote
28 from translate.misc import textwrap
29 from translate.lang import data
30 from translate.storage import pocommon
31 import re
32
33 lsep = "\n#: "
34 """Seperator for #: entries"""
35
36
37
38 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
39 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
40
42 """Escapes a line for po format. assumes no \n occurs in the line.
43
44 @param line: unescaped text
45 """
46 special_locations = []
47 for special_key in po_escape_map:
48 special_locations.extend(quote.find_all(line, special_key))
49 special_locations = dict.fromkeys(special_locations).keys()
50 special_locations.sort()
51 escaped_line = ""
52 last_location = 0
53 for location in special_locations:
54 escaped_line += line[last_location:location]
55 escaped_line += po_escape_map[line[location:location+1]]
56 last_location = location+1
57 escaped_line += line[last_location:]
58 return escaped_line
59
63
65 """Wrap text for po files."""
66 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
67
68
69 if len(wrappedlines) > 1:
70 for index, line in enumerate(wrappedlines[1:]):
71 if line.startswith(' '):
72
73 wrappedlines[index+1] = line[1:]
74
75
76 wrappedlines[index] += ' '
77 return wrappedlines
78
80 """quotes the given text for a PO file, returning quoted and escaped lines"""
81 polines = []
82 if text is None:
83 return polines
84 lines = text.split("\n")
85 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
86 if len(lines) != 2 or lines[1]:
87 polines.extend(['""'])
88 for line in lines[:-1]:
89 lns = wrapline(line)
90 if len(lns) > 0:
91 for ln in lns[:-1]:
92 polines.extend(['"' + escapeforpo(ln) + '"'])
93 if lns[-1]:
94 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
95 else:
96 polines.extend(['"\\n"'])
97 if lines[-1]:
98 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
99 return polines
100
102 """Remove quote and unescape line from po file.
103
104 @param line: a quoted line from a po file (msgid or msgstr)
105 """
106 extracted = quote.extractwithoutquotes(line,'"','"','\\',includeescapes=unescapehandler)[0]
107 return extracted
108
110 if joinwithlinebreak:
111 joiner = u"\n"
112 if postr and postr[0] == '""': postr = postr[1:]
113 else:
114 joiner = u""
115 return joiner.join([extractpoline(line) for line in postr])
116
118 """Tests whether the given encoding is known in the python runtime, or returns utf-8.
119 This function is used to ensure that a valid encoding is always used."""
120 if encoding == "CHARSET" or encoding == None: return 'utf-8'
121 return encoding
122
123
124
125
126
127
128
129
130 """
131 From the GNU gettext manual:
132 WHITE-SPACE
133 # TRANSLATOR-COMMENTS
134 #. AUTOMATIC-COMMENTS
135 #| PREVIOUS MSGID (Gettext 0.16 - check if this is the correct position - not yet implemented)
136 #: REFERENCE...
137 #, FLAG...
138 msgctxt CONTEXT (Gettext 0.15)
139 msgid UNTRANSLATED-STRING
140 msgstr TRANSLATED-STRING
141 """
142
143 -class pounit(pocommon.pounit):
144
145
146
147
148
149
150
151
152
153 - def __init__(self, source=None, encoding="UTF-8"):
154 self._encoding = encodingToUse(encoding)
155 self.obsolete = False
156 self._initallcomments(blankall=True)
157 self.msgctxt = []
158 self.msgid = []
159 self.msgid_pluralcomments = []
160 self.msgid_plural = []
161 self.msgstr = []
162 self.obsoletemsgctxt = []
163 self.obsoletemsgid = []
164 self.obsoletemsgid_pluralcomments = []
165 self.obsoletemsgid_plural = []
166 self.obsoletemsgstr = []
167 if source:
168 self.setsource(source)
169 super(pounit, self).__init__(source)
170
186
188 """Returns the unescaped msgid"""
189 multi = multistring(unquotefrompo(self.msgid), self._encoding)
190 if self.hasplural():
191 pluralform = unquotefrompo(self.msgid_plural)
192 if isinstance(pluralform, str):
193 pluralform = pluralform.decode(self._encoding)
194 multi.strings.append(pluralform)
195 return multi
196
212 source = property(getsource, setsource)
213
221
223 """Sets the msgstr to the given (unescaped) value"""
224 if isinstance(target, str):
225 target = target.decode(self._encoding)
226 if target == self.target:
227 return
228 if self.hasplural():
229 if isinstance(target, multistring):
230 target = target.strings
231 elif isinstance(target, basestring):
232 target = [target]
233 elif isinstance(target,(dict, list)):
234 if len(target) == 1:
235 target = target[0]
236 else:
237 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
238 templates = self.msgstr
239 if isinstance(templates, list):
240 templates = {0: templates}
241 if isinstance(target, list):
242 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
243 elif isinstance(target, dict):
244 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
245 else:
246 self.msgstr = quoteforpo(target)
247 target = property(gettarget, settarget)
248
250 """Return comments based on origin value (programmer, developer, source code and translator)"""
251 if origin == None:
252 comments = u"".join([comment[2:] for comment in self.othercomments])
253 comments += u"".join([comment[3:] for comment in self.automaticcomments])
254 elif origin == "translator":
255 comments = u"".join ([comment[2:] for comment in self.othercomments])
256 elif origin in ["programmer", "developer", "source code"]:
257 comments = u"".join([comment[3:] for comment in self.automaticcomments])
258 else:
259 raise ValueError("Comment type not valid")
260
261 return comments[:-1]
262
263 - def addnote(self, text, origin=None, position="append"):
264 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
265
266 if not text:
267 return
268 text = data.forceunicode(text)
269 commentlist = self.othercomments
270 linestart = "# "
271 if origin in ["programmer", "developer", "source code"]:
272 autocomments = True
273 commentlist = self.automaticcomments
274 linestart = "#. "
275 text = text.split("\n")
276 if position == "append":
277 commentlist += [linestart + line + "\n" for line in text]
278 else:
279 newcomments = [linestart + line + "\n" for line in text]
280 newcomments += [line for line in commentlist]
281 if autocomments:
282 self.automaticcomments = newcomments
283 else:
284 self.othercomments = newcomments
285
287 """Remove all the translator's notes (other comments)"""
288 self.othercomments = []
289
291 newpo = self.__class__()
292 newpo.othercomments = self.othercomments[:]
293 newpo.automaticcomments = self.automaticcomments[:]
294 newpo.sourcecomments = self.sourcecomments[:]
295 newpo.typecomments = self.typecomments[:]
296 newpo.obsolete = self.obsolete
297 newpo.msgidcomments = self.msgidcomments[:]
298 newpo._initallcomments()
299 newpo.msgctxt = self.msgctxt[:]
300 newpo.msgid = self.msgid[:]
301 newpo.msgid_pluralcomments = self.msgid_pluralcomments[:]
302 newpo.msgid_plural = self.msgid_plural[:]
303 if isinstance(self.msgstr, dict):
304 newpo.msgstr = self.msgstr.copy()
305 else:
306 newpo.msgstr = self.msgstr[:]
307
308 newpo.obsoletemsgctxt = self.obsoletemsgctxt[:]
309 newpo.obsoletemsgid = self.obsoletemsgid[:]
310 newpo.obsoletemsgid_pluralcomments = self.obsoletemsgid_pluralcomments[:]
311 newpo.obsoletemsgid_plural = self.obsoletemsgid_plural[:]
312 if isinstance(self.obsoletemsgstr, dict):
313 newpo.obsoletemsgstr = self.obsoletemsgstr.copy()
314 else:
315 newpo.obsoletemsgstr = self.obsoletemsgstr[:]
316 return newpo
317
323
325 if isinstance(self.msgstr, dict):
326 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()])
327 return len(combinedstr.strip())
328 else:
329 return len(unquotefrompo(self.msgstr).strip())
330
331 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
332 """Merges the otherpo (with the same msgid) into this one.
333
334 Overwrite non-blank self.msgstr only if overwrite is True
335 merge comments only if comments is True
336
337 """
338
339 def mergelists(list1, list2, split=False):
340
341 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
342 for position, item in enumerate(list1):
343 if isinstance(item, str):
344 list1[position] = item.decode("utf-8")
345 for position, item in enumerate(list2):
346 if isinstance(item, str):
347 list2[position] = item.decode("utf-8")
348
349
350 lineend = ""
351 if list1 and list1[0]:
352 for candidate in ["\n", "\r", "\n\r"]:
353 if list1[0].endswith(candidate):
354 lineend = candidate
355 if not lineend:
356 lineend = ""
357 else:
358 lineend = "\n"
359
360
361 if split:
362 splitlist1 = []
363 splitlist2 = []
364 prefix = "#"
365 for item in list1:
366 splitlist1.extend(item.split()[1:])
367 prefix = item.split()[0]
368 for item in list2:
369 splitlist2.extend(item.split()[1:])
370 prefix = item.split()[0]
371 list1.extend(["%s %s%s" % (prefix,item,lineend) for item in splitlist2 if not item in splitlist1])
372 else:
373
374 if list1 != list2:
375 for item in list2:
376 if lineend:
377 item = item.rstrip() + lineend
378
379 if item not in list1 or len(item) < 5:
380 list1.append(item)
381 if not isinstance(otherpo, pounit):
382 super(pounit, self).merge(otherpo, overwrite, comments)
383 return
384 if comments:
385 mergelists(self.othercomments, otherpo.othercomments)
386 mergelists(self.typecomments, otherpo.typecomments)
387 if not authoritative:
388
389
390 mergelists(self.automaticcomments, otherpo.automaticcomments)
391 mergelists(self.msgidcomments, otherpo.msgidcomments)
392 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
393 if not self.istranslated() or overwrite:
394
395 if self._extract_msgidcomments(otherpo.target):
396 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '')
397 self.target = otherpo.target
398 if self.source != otherpo.source:
399 self.markfuzzy()
400 else:
401 self.markfuzzy(otherpo.isfuzzy())
402 elif not otherpo.istranslated():
403 if self.source != otherpo.source:
404 self.markfuzzy()
405 else:
406 if self.target != otherpo.target:
407 self.markfuzzy()
408
410
411
412 return ((self.msgid == [] or self.msgid == ['""']) and
413 not (self.msgstr == [] or self.msgstr == ['""'])
414 and self.msgidcomments == []
415 and (self.msgctxt == [] or self.msgctxt == ['""'])
416 and (self.sourcecomments == [] or self.sourcecomments == [""]))
417
419 if self.isheader() or len(self.msgidcomments):
420 return False
421 if (self.msgidlen() == 0) and (self.msgstrlen() == 0):
422 return True
423 return False
424
425
426
427
432
440
450
453
456
459
462
465
468
470 """Makes this unit obsolete"""
471 self.obsolete = True
472 if self.msgctxt:
473 self.obsoletemsgctxt = self.msgctxt
474 if self.msgid:
475 self.obsoletemsgid = self.msgid
476 self.msgid = []
477 if self.msgidcomments:
478 self.obsoletemsgidcomments = self.msgidcomments
479 self.msgidcomments = []
480 if self.msgid_plural:
481 self.obsoletemsgid_plural = self.msgid_plural
482 self.msgid_plural = []
483 if self.msgstr:
484 self.obsoletemsgstr = self.msgstr
485 self.msgstr = []
486 self.sourcecomments = []
487 self.automaticcomments = []
488
490 """Makes an obsolete unit normal"""
491 self.obsolete = False
492 if self.obsoletemsgctxt:
493 self.msgid = self.obsoletemsgctxt
494 self.obsoletemsgctxt = []
495 if self.obsoletemsgid:
496 self.msgid = self.obsoletemsgid
497 self.obsoletemsgid = []
498 if self.obsoletemsgidcomments:
499 self.msgidcomments = self.obsoletemsgidcomments
500 self.obsoletemsgidcomments = []
501 if self.obsoletemsgid_plural:
502 self.msgid_plural = self.obsoletemsgid_plural
503 self.obsoletemsgid_plural = []
504 if self.obsoletemsgstr:
505 self.msgstr = self.obsoletemsgstr
506 self.obsoletemgstr = []
507
509 """returns whether this pounit contains plural strings..."""
510 return len(self.msgid_plural) > 0
511
513 if isinstance(src, str):
514
515 src = src.decode(self._encoding)
516 inmsgctxt = 0
517 inmsgid = 0
518 inmsgid_comment = 0
519 inmsgid_plural = 0
520 inmsgstr = 0
521 msgstr_pluralid = None
522 linesprocessed = 0
523 for line in src.split("\n"):
524 line = line + "\n"
525 linesprocessed += 1
526 if len(line) == 0:
527 continue
528 elif line[0] == '#':
529 if inmsgstr and not line[1] == '~':
530
531 break
532 if line[1] == '.':
533 self.automaticcomments.append(line)
534 elif line[1] == ':':
535 self.sourcecomments.append(line)
536 elif line[1] == ',':
537 self.typecomments.append(line)
538 elif line[1] == '~':
539 line = line[3:]
540 self.obsolete = True
541 else:
542 self.othercomments.append(line)
543 if line.startswith('msgid_plural'):
544 inmsgctxt = 0
545 inmsgid = 0
546 inmsgid_plural = 1
547 inmsgstr = 0
548 inmsgid_comment = 0
549 elif line.startswith('msgctxt'):
550 inmsgctxt = 1
551 inmsgid = 0
552 inmsgid_plural = 0
553 inmsgstr = 0
554 inmsgid_comment = 0
555 elif line.startswith('msgid'):
556
557
558 if inmsgstr or inmsgid_plural:
559 break
560 inmsgctxt = 0
561 inmsgid = 1
562 inmsgid_plural = 0
563 inmsgstr = 0
564 inmsgid_comment = 0
565 elif line.startswith('msgstr'):
566 inmsgctxt = 0
567 inmsgid = 0
568 inmsgid_plural = 0
569 inmsgstr = 1
570 if line.startswith('msgstr['):
571 msgstr_pluralid = int(line[len('msgstr['):line.find(']')].strip())
572 else:
573 msgstr_pluralid = None
574 extracted = quote.extractstr(line)
575 if not extracted is None:
576 if inmsgctxt:
577 self.msgctxt.append(extracted)
578 elif inmsgid:
579
580 if extracted.find("_:") != -1:
581 inmsgid_comment = 1
582 if inmsgid_comment:
583 self.msgidcomments.append(extracted)
584 else:
585 self.msgid.append(extracted)
586 if inmsgid_comment and extracted.find("\\n") != -1:
587 inmsgid_comment = 0
588 elif inmsgid_plural:
589 if extracted.find("_:") != -1:
590 inmsgid_comment = 1
591 if inmsgid_comment:
592 self.msgid_pluralcomments.append(extracted)
593 else:
594 self.msgid_plural.append(extracted)
595 if inmsgid_comment and extracted.find("\\n") != -1:
596 inmsgid_comment = 0
597 elif inmsgstr:
598 if msgstr_pluralid is None:
599 self.msgstr.append(extracted)
600 else:
601 if type(self.msgstr) == list:
602 self.msgstr = {0: self.msgstr}
603 if msgstr_pluralid not in self.msgstr:
604 self.msgstr[msgstr_pluralid] = []
605 self.msgstr[msgstr_pluralid].append(extracted)
606 if self.obsolete:
607 self.makeobsolete()
608
609
610 if self.isheader():
611 charset = re.search("charset=([^\\s]+)", unquotefrompo(self.msgstr))
612 if charset:
613 self._encoding = encodingToUse(charset.group(1))
614 return linesprocessed
615
617 if isinstance(partlines, dict):
618 partkeys = partlines.keys()
619 partkeys.sort()
620 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
621 partstr = partname + " "
622 partstartline = 0
623 if len(partlines) > 0 and len(partcomments) == 0:
624 partstr += partlines[0]
625 partstartline = 1
626 elif len(partcomments) > 0:
627 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
628
629 partstr += partlines[0] + '\n'
630
631 if len(partlines) > 1:
632 partstartline += 1
633 else:
634
635 partstr += '""\n'
636
637 if len(partcomments) > 1:
638 combinedcomment = []
639 for comment in partcomments:
640 comment = unquotefrompo([comment])
641 if comment.startswith("_:"):
642 comment = comment[len("_:"):]
643 if comment.endswith("\\n"):
644 comment = comment[:-len("\\n")]
645
646 combinedcomment.append(comment)
647 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
648
649 partstr += "\n".join(partcomments)
650 partstr = quote.rstripeol(partstr)
651 else:
652 partstr += '""'
653 partstr += '\n'
654
655 for partline in partlines[partstartline:]:
656 partstr += partline + '\n'
657 return partstr
658
660 """encodes unicode strings and returns other strings unchanged"""
661 if isinstance(output, unicode):
662 encoding = encodingToUse(getattr(self, "encoding", "UTF-8"))
663 return output.encode(encoding)
664 return output
665
667 """convert to a string. double check that unicode is handled somehow here"""
668 output = self._getoutput()
669 return self._encodeifneccessary(output)
670
672 """return this po element as a string"""
673 lines = []
674 lines.extend(self.othercomments)
675 if self.isobsolete():
676 lines.extend(self.typecomments)
677 obsoletelines = []
678 if self.obsoletemsgctxt:
679 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
680 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
681 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
682 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
683 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
684 for index, obsoleteline in enumerate(obsoletelines):
685
686 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
687 lines.extend(obsoletelines)
688 lines = [self._encodeifneccessary(line) for line in lines]
689 return "".join(lines)
690
691
692 if (len(self.msgid) == 0) or ((len(self.msgid) == 1) and (self.msgid[0] == '""')):
693 if not (self.isheader() or self.msgidcomments or self.sourcecomments):
694 return "".join(lines)
695 lines.extend(self.automaticcomments)
696 lines.extend(self.sourcecomments)
697 lines.extend(self.typecomments)
698 if self.msgctxt:
699 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt))
700 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments))
701 if self.msgid_plural or self.msgid_pluralcomments:
702 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
703 lines.append(self._getmsgpartstr("msgstr", self.msgstr))
704 lines = [self._encodeifneccessary(line) for line in lines]
705 postr = "".join(lines)
706 return postr
707
709 """Get a list of locations from sourcecomments in the PO unit
710
711 rtype: List
712 return: A list of the locations with '#: ' stripped
713
714 """
715 locations = []
716 for sourcecomment in self.sourcecomments:
717 locations += quote.rstripeol(sourcecomment)[3:].split()
718 return locations
719
721 """Add a location to sourcecomments in the PO unit
722
723 @param location: Text location e.g. 'file.c:23' does not include #:
724 @type location: String
725
726 """
727 self.sourcecomments.append("#: %s\n" % location)
728
740
741 - def getcontext(self):
742 """Get the message context."""
743 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
744
746 """Returns a unique identifier for this unit."""
747 context = self.getcontext()
748
749
750
751
752
753 id = self.source
754 if self.msgidcomments:
755 id = "_: %s\n%s" % (context, id)
756 elif context:
757 id = "%s\04%s" % (context, id)
758 return id
759
760 -class pofile(pocommon.pofile):
761 """this represents a .po file containing various units"""
762 UnitClass = pounit
764 """construct a pofile, optionally reading in from inputfile.
765 encoding can be specified but otherwise will be read from the PO header"""
766 self.UnitClass = unitclass
767 pocommon.pofile.__init__(self, unitclass=unitclass)
768 self.units = []
769 self.filename = ''
770 self._encoding = encodingToUse(encoding)
771 if inputfile is not None:
772 self.parse(inputfile)
773
775 """changes the encoding on the file"""
776 self._encoding = encodingToUse(newencoding)
777 if not self.units:
778 return
779 header = self.header()
780 if not header or header.isblank():
781 return
782 charsetline = None
783 headerstr = unquotefrompo(header.msgstr, True)
784 for line in headerstr.split("\\n"):
785 if not ":" in line: continue
786 key, value = line.strip().split(":", 1)
787 if key.strip() != "Content-Type": continue
788 charsetline = line
789 if charsetline is None:
790 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
791 else:
792 charset = re.search("charset=([^ ]*)", charsetline)
793 if charset is None:
794 newcharsetline = charsetline
795 if not newcharsetline.strip().endswith(";"):
796 newcharsetline += ";"
797 newcharsetline += " charset=%s" % self._encoding
798 else:
799 charset = charset.group(1)
800 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1)
801 headerstr = headerstr.replace(charsetline, newcharsetline, 1)
802 header.msgstr = quoteforpo(headerstr)
803
805 """parses the given file or file source string"""
806 if hasattr(input, 'name'):
807 self.filename = input.name
808 elif not getattr(self, 'filename', ''):
809 self.filename = ''
810 if hasattr(input, "read"):
811 posrc = input.read()
812 input.close()
813 input = posrc
814
815 lines = input.split("\n")
816 start = 0
817 end = 0
818
819 linesprocessed = 0
820 while end <= len(lines):
821 if (end == len(lines)) or (not lines[end].strip()):
822 newpe = self.UnitClass(encoding=self._encoding)
823 linesprocessed = newpe.parse("\n".join(lines[start:end]))
824 start += linesprocessed
825
826 if linesprocessed >= 1 and newpe._getoutput():
827 self.units.append(newpe)
828 if newpe.isheader():
829 if "Content-Type" in self.parseheader():
830 self._encoding = newpe._encoding
831
832 if self._encoding is not None and self._encoding.lower() != 'charset':
833 lines = self.decode(lines)
834 if self._encoding is None:
835
836 self._encoding = 'utf-8'
837 lines = self.decode(lines)
838 self.units = []
839 start = 0
840 end = 0
841 end = end+1
842
844 """make sure each msgid is unique ; merge comments etc from duplicates into original"""
845 msgiddict = {}
846 uniqueunits = []
847
848
849 markedpos = []
850 def addcomment(thepo):
851 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
852 markedpos.append(thepo)
853 for thepo in self.units:
854 if duplicatestyle.startswith("msgid_comment"):
855 msgid = unquotefrompo(thepo.msgidcomments) + unquotefrompo(thepo.msgid)
856 else:
857 msgid = unquotefrompo(thepo.msgid)
858 if thepo.isheader():
859
860 uniqueunits.append(thepo)
861 elif duplicatestyle == "msgid_comment_all":
862 addcomment(thepo)
863 uniqueunits.append(thepo)
864 elif msgid in msgiddict:
865 if duplicatestyle == "merge":
866 if msgid:
867 msgiddict[msgid].merge(thepo)
868 else:
869 addcomment(thepo)
870 uniqueunits.append(thepo)
871 elif duplicatestyle == "keep":
872 uniqueunits.append(thepo)
873 elif duplicatestyle == "msgid_comment":
874 origpo = msgiddict[msgid]
875 if origpo not in markedpos:
876 addcomment(origpo)
877 addcomment(thepo)
878 uniqueunits.append(thepo)
879 elif duplicatestyle == "msgctxt":
880 origpo = msgiddict[msgid]
881 if origpo not in markedpos:
882 origpo.msgctxt.append('"%s"' % " ".join(origpo.getlocations()))
883 markedpos.append(thepo)
884 thepo.msgctxt.append('"%s"' % " ".join(thepo.getlocations()))
885 uniqueunits.append(thepo)
886 else:
887 if not msgid and duplicatestyle != "keep":
888 addcomment(thepo)
889 msgiddict[msgid] = thepo
890 uniqueunits.append(thepo)
891 self.units = uniqueunits
892
894 """convert to a string. double check that unicode is handled somehow here"""
895 output = self._getoutput()
896 if isinstance(output, unicode):
897 return output.encode(getattr(self, "encoding", "UTF-8"))
898 return output
899
901 """convert the units back to lines"""
902 lines = []
903 for unit in self.units:
904 unitsrc = str(unit) + "\n"
905 lines.append(unitsrc)
906 lines = "".join(self.encode(lines)).rstrip()
907
908 if lines: lines += "\n"
909 return lines
910
912 """encode any unicode strings in lines in self._encoding"""
913 newlines = []
914 encoding = self._encoding
915 if encoding is None or encoding.lower() == "charset":
916 encoding = 'UTF-8'
917 for line in lines:
918 if isinstance(line, unicode):
919 line = line.encode(encoding)
920 newlines.append(line)
921 return newlines
922
924 """decode any non-unicode strings in lines with self._encoding"""
925 newlines = []
926 for line in lines:
927 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
928 try:
929 line = line.decode(self._encoding)
930 except UnicodeError, e:
931 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
932 newlines.append(line)
933 return newlines
934
939
940 if __name__ == '__main__':
941 import sys
942 pf = pofile(sys.stdin)
943 sys.stdout.write(str(pf))
944