Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  #  
   4  # Copyright 2004-2007 Zuza Software Foundation 
   5  #  
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  #  
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation  
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check  
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the  
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  from translate.filters import helpers 
  34  from translate.filters import decoration 
  35  from translate.filters import prefilters 
  36  from translate.filters import spelling 
  37  from translate.lang import factory 
  38  from translate.lang import data 
  39  # The import of xliff could fail if the user doesn't have lxml installed. For 
  40  # now we try to continue gracefully to help users who aren't interested in  
  41  # support for XLIFF or other XML formats. 
  42  try: 
  43      from translate.storage import xliff 
  44  except ImportError, e: 
  45      xliff = None 
  46  import re 
  47   
  48  # These are some regular expressions that are compiled for use in some tests 
  49   
  50  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as 
  51  # this should capture printf types defined in other platforms. 
  52  printf_pat = re.compile('%((?:(?P<ord>\d+)\$)*(?P<fullvar>[+#-]*(?:\d+)*(?:\.\d+)*(hh\|h\|l\|ll)*(?P<type>[\w%])))') 
  53   
  54  # The name of the XML tag 
  55  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  56   
  57  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  58  #TODO: remove escaped strings once usage is audited 
  59  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  60   
  61  # The whole tag 
  62  tag_re = re.compile("<[^>]+>") 
  63   
64 -def tagname(string):
65 """Returns the name of the XML/HTML tag in string""" 66 return tagname_re.match(string).groups(1)[0]
67
68 -def intuplelist(pair, list):
69 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 70 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut 71 by only considering "c" if "b" has already matched.""" 72 a, b, c = pair 73 if (b, c) == (None, None): 74 #This is a tagname 75 return pair 76 for pattern in list: 77 x, y, z = pattern 78 if (x, y) in [(a, b), (None, b)]: 79 if z in [None, c]: 80 return pattern 81 return pair
82
83 -def tagproperties(strings, ignore):
84 """Returns all the properties in the XML/HTML tag string as 85 (tagname, propertyname, propertyvalue), but ignore those combinations 86 specified in ignore.""" 87 properties = [] 88 for string in strings: 89 tag = tagname(string) 90 properties += [(tag, None, None)] 91 #Now we isolate the attribute pairs. 92 pairs = property_re.findall(string) 93 for property, value, a, b in pairs: 94 #Strip the quotes: 95 value = value[1:-1] 96 97 canignore = False 98 if (tag, property, value) in ignore or \ 99 intuplelist((tag,property,value), ignore) != (tag,property,value): 100 canignore = True 101 break 102 if not canignore: 103 properties += [(tag, property, value)] 104 return properties
105 106
107 -class FilterFailure(Exception):
108 """This exception signals that a Filter didn't pass, and gives an explanation 109 or a comment"""
110 - def __init__(self, messages):
111 if not isinstance(messages, list): 112 messages = [messages] 113 strmessages = [] 114 for message in messages: 115 if isinstance(message, unicode): 116 message = message.encode("utf-8") 117 strmessages.append(message) 118 messages = ", ".join(strmessages) 119 Exception.__init__(self, messages)
120
121 -class SeriousFilterFailure(FilterFailure):
122 """This exception signals that a Filter didn't pass, and the bad translation 123 might break an application (so the string will be marked fuzzy)""" 124 pass
125 126 #(tag, attribute, value) specifies a certain attribute which can be changed/ 127 #ignored if it exists inside tag. In the case where there is a third element 128 #in the tuple, it indicates a property value that can be ignored if present 129 #(like defaults, for example) 130 #If a certain item is None, it indicates that it is relevant for all values of 131 #the property/tag that is specified as None. A non-None value of "value" 132 #indicates that the value of the attribute must be taken into account. 133 common_ignoretags = [(None, "xml-lang", None)] 134 common_canchangetags = [("img", "alt", None)] 135
136 -class CheckerConfig(object):
137 """object representing the configuration of a checker"""
138 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 139 notranslatewords=None, musttranslatewords=None, validchars=None, 140 punctuation=None, endpunctuation=None, ignoretags=None, 141 canchangetags=None, criticaltests=None, credit_sources=None):
142 # we have to initialise empty lists properly (default arguments get reused) 143 if accelmarkers is None: 144 accelmarkers = [] 145 if varmatches is None: 146 varmatches = [] 147 if musttranslatewords is None: 148 musttranslatewords = [] 149 if notranslatewords is None: 150 notranslatewords = [] 151 self.targetlanguage = targetlanguage 152 self.updatetargetlanguage(targetlanguage) 153 self.sourcelang = factory.getlanguage('en') 154 self.accelmarkers = accelmarkers 155 self.varmatches = varmatches 156 # TODO: allow user configuration of untranslatable words 157 self.notranslatewords = dict.fromkeys([data.forceunicode(key) for key in notranslatewords]) 158 self.musttranslatewords = dict.fromkeys([data.forceunicode(key) for key in musttranslatewords]) 159 validchars = data.forceunicode(validchars) 160 self.validcharsmap = {} 161 self.updatevalidchars(validchars) 162 punctuation = data.forceunicode(punctuation) 163 if punctuation is None: 164 punctuation = self.lang.punctuation 165 self.punctuation = punctuation 166 endpunctuation = data.forceunicode(endpunctuation) 167 if endpunctuation is None: 168 endpunctuation = self.lang.sentenceend 169 self.endpunctuation = endpunctuation 170 if ignoretags is None: 171 self.ignoretags = common_ignoretags 172 else: 173 self.ignoretags = ignoretags 174 if canchangetags is None: 175 self.canchangetags = common_canchangetags 176 else: 177 self.canchangetags = canchangetags 178 if criticaltests is None: 179 criticaltests = [] 180 self.criticaltests = criticaltests 181 if credit_sources is None: 182 credit_sources = [] 183 self.credit_sources = credit_sources
184
185 - def update(self, otherconfig):
186 """combines the info in otherconfig into this config object""" 187 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 188 self.updatetargetlanguage(self.targetlanguage) 189 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 190 self.varmatches.extend(otherconfig.varmatches) 191 self.notranslatewords.update(otherconfig.notranslatewords) 192 self.musttranslatewords.update(otherconfig.musttranslatewords) 193 self.validcharsmap.update(otherconfig.validcharsmap) 194 self.punctuation += otherconfig.punctuation 195 self.endpunctuation += otherconfig.endpunctuation 196 #TODO: consider also updating in the following cases: 197 self.ignoretags = otherconfig.ignoretags 198 self.canchangetags = otherconfig.canchangetags 199 self.criticaltests.extend(otherconfig.criticaltests) 200 self.credit_sources = otherconfig.credit_sources
201
202 - def updatevalidchars(self, validchars):
203 """updates the map that eliminates valid characters""" 204 if validchars is None: 205 return True 206 validcharsmap = dict([(ord(validchar), None) for validchar in data.forceunicode(validchars)]) 207 self.validcharsmap.update(validcharsmap)
208
209 - def updatetargetlanguage(self, langcode):
210 """Updates the target language in the config to the given target language""" 211 self.lang = factory.getlanguage(langcode)
212
213 -class UnitChecker(object):
214 """Parent Checker class which does the checking based on functions available 215 in derived classes.""" 216 preconditions = {} 217
218 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
219 self.errorhandler = errorhandler 220 if checkerconfig is None: 221 self.setconfig(CheckerConfig()) 222 else: 223 self.setconfig(checkerconfig) 224 # exclude functions defined in UnitChecker from being treated as tests... 225 self.helperfunctions = {} 226 for functionname in dir(UnitChecker): 227 function = getattr(self, functionname) 228 if callable(function): 229 self.helperfunctions[functionname] = function 230 self.defaultfilters = self.getfilters(excludefilters, limitfilters)
231
232 - def getfilters(self, excludefilters=None, limitfilters=None):
233 """returns dictionary of available filters, including/excluding those in 234 the given lists""" 235 filters = {} 236 if limitfilters is None: 237 # use everything available unless instructed 238 limitfilters = dir(self) 239 if excludefilters is None: 240 excludefilters = {} 241 for functionname in limitfilters: 242 if functionname in excludefilters: continue 243 if functionname in self.helperfunctions: continue 244 if functionname == "errorhandler": continue 245 filterfunction = getattr(self, functionname, None) 246 if not callable(filterfunction): continue 247 filters[functionname] = filterfunction 248 return filters
249
250 - def setconfig(self, config):
251 """sets the accelerator list""" 252 self.config = config 253 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 254 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 255 for startmatch, endmatch in self.config.varmatches] 256 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone) 257 for startmatch, endmatch in self.config.varmatches]
258
259 - def setsuggestionstore(self, store):
260 """Sets the filename that a checker should use for evaluating suggestions.""" 261 self.suggestion_store = store
262
263 - def filtervariables(self, str1):
264 """filter out variables from str1""" 265 return helpers.multifilter(str1, self.varfilters)
266
267 - def removevariables(self, str1):
268 """remove variables from str1""" 269 return helpers.multifilter(str1, self.removevarfilter)
270
271 - def filteraccelerators(self, str1):
272 """filter out accelerators from str1""" 273 return helpers.multifilter(str1, self.accfilters)
274
275 - def filterwordswithpunctuation(self, str1):
276 """replaces words with punctuation with their unpunctuated equivalents""" 277 return prefilters.filterwordswithpunctuation(str1)
278
279 - def filterxml(self, str1):
280 """filter out XML from the string so only text remains""" 281 return tag_re.sub("", str1)
282
283 - def run_test(self, test, unit):
284 """Runs the given test on the given unit. 285 286 Note that this can raise a FilterFailure as part of normal operation""" 287 return test(unit)
288
289 - def run_filters(self, unit):
290 """run all the tests in this suite, return failures as testname, message_or_exception""" 291 failures = {} 292 ignores = self.config.lang.ignoretests[:] 293 functionnames = self.defaultfilters.keys() 294 priorityfunctionnames = self.preconditions.keys() 295 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 296 for functionname in priorityfunctionnames + otherfunctionnames: 297 if functionname in ignores: 298 continue 299 filterfunction = getattr(self, functionname, None) 300 # this filterfunction may only be defined on another checker if using TeeChecker 301 if filterfunction is None: 302 continue 303 filtermessage = filterfunction.__doc__ 304 try: 305 filterresult = self.run_test(filterfunction, unit) 306 except FilterFailure, e: 307 filterresult = False 308 filtermessage = str(e).decode('utf-8') 309 except Exception, e: 310 if self.errorhandler is None: 311 raise ValueError("error in filter %s: %r, %r, %s" % \ 312 (functionname, unit.source, unit.target, e)) 313 else: 314 filterresult = self.errorhandler(functionname, unit.source, unit.target, e) 315 if not filterresult: 316 # we test some preconditions that aren't actually a cause for failure 317 if functionname in self.defaultfilters: 318 failures[functionname] = filtermessage 319 if functionname in self.preconditions: 320 for ignoredfunctionname in self.preconditions[functionname]: 321 ignores.append(ignoredfunctionname) 322 return failures
323
324 -class TranslationChecker(UnitChecker):
325 """A checker that passes source and target strings to the checks, not the 326 whole unit. 327 328 This provides some speedup and simplifies testing."""
329 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
330 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
331
332 - def run_test(self, test, unit):
333 """Runs the given test on the given unit. 334 335 Note that this can raise a FilterFailure as part of normal operation.""" 336 if self.hasplural: 337 for pluralform in unit.target.strings: 338 if not test(self.str1, pluralform): 339 return False 340 else: 341 return True 342 else: 343 return test(self.str1, self.str2)
344
345 - def run_filters(self, unit):
346 """Do some optimisation by caching some data of the unit for the benefit 347 of run_test().""" 348 self.str1 = data.forceunicode(unit.source) 349 self.str2 = data.forceunicode(unit.target) 350 self.hasplural = unit.hasplural() 351 return super(TranslationChecker, self).run_filters(unit)
352
353 -class TeeChecker:
354 """A Checker that controls multiple checkers."""
355 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, 356 checkerclasses=None, errorhandler=None, languagecode=None):
357 """construct a TeeChecker from the given checkers""" 358 self.limitfilters = limitfilters 359 if checkerclasses is None: 360 checkerclasses = [StandardChecker] 361 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses] 362 if languagecode: 363 for checker in self.checkers: 364 checker.config.updatetargetlanguage(languagecode) 365 # Let's hook up the language specific checker 366 lang_checker = self.checkers[0].config.lang.checker 367 if lang_checker: 368 self.checkers.append(lang_checker) 369 370 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 371 self.config = checkerconfig or self.checkers[0].config
372
373 - def getfilters(self, excludefilters=None, limitfilters=None):
374 """returns dictionary of available filters, including/excluding those in 375 the given lists""" 376 if excludefilters is None: 377 excludefilters = {} 378 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 379 self.combinedfilters = {} 380 for filters in filterslist: 381 self.combinedfilters.update(filters) 382 # TODO: move this somewhere more sensible (a checkfilters method?) 383 if limitfilters is not None: 384 for filtername in limitfilters: 385 if not filtername in self.combinedfilters: 386 import sys 387 print >> sys.stderr, "warning: could not find filter %s" % filtername 388 return self.combinedfilters
389
390 - def run_filters(self, unit):
391 """run all the tests in the checker's suites""" 392 failures = {} 393 for checker in self.checkers: 394 failures.update(checker.run_filters(unit)) 395 return failures
396
397 - def setsuggestionstore(self, store):
398 """Sets the filename that a checker should use for evaluating suggestions.""" 399 for checker in self.checkers: 400 checker.setsuggestionstore(store)
401 402
403 -class StandardChecker(TranslationChecker):
404 """The basic test suite for source -> target translations."""
405 - def untranslated(self, str1, str2):
406 """checks whether a string has been translated at all""" 407 str2 = prefilters.removekdecomments(str2) 408 return not (len(str1.strip()) > 0 and len(str2) == 0)
409
410 - def unchanged(self, str1, str2):
411 """checks whether a translation is basically identical to the original string""" 412 str1 = self.filteraccelerators(str1) 413 str2 = self.filteraccelerators(str2) 414 if len(str1.strip()) == 0: 415 return True 416 if str1.isupper() and str1 == str2: 417 return True 418 if self.config.notranslatewords: 419 words1 = str1.split() 420 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 421 return True 422 str1 = self.removevariables(str1) 423 str2 = self.removevariables(str2) 424 if not (str1.strip().isdigit() or len(str1) < 2 or decoration.ispurepunctuation(str1.strip())) and (str1.strip().lower() == str2.strip().lower()): 425 raise FilterFailure("please translate") 426 return True
427
428 - def blank(self, str1, str2):
429 """checks whether a translation only contains spaces""" 430 len1 = len(str1.strip()) 431 len2 = len(str2.strip()) 432 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
433
434 - def short(self, str1, str2):
435 """checks whether a translation is much shorter than the original string""" 436 len1 = len(str1.strip()) 437 len2 = len(str2.strip()) 438 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
439
440 - def long(self, str1, str2):
441 """checks whether a translation is much longer than the original string""" 442 len1 = len(str1.strip()) 443 len2 = len(str2.strip()) 444 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
445
446 - def escapes(self, str1, str2):
447 """checks whether escaping is consistent between the two strings""" 448 if not helpers.countsmatch(str1, str2, ("\\", "\\\\")): 449 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if "\\" in word]) 450 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if "\\" in word]) 451 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2)) 452 else: 453 return True
454
455 - def newlines(self, str1, str2):
456 """checks whether newlines are consistent between the two strings""" 457 if not helpers.countsmatch(str1, str2, ("\n", "\r")): 458 raise FilterFailure("line endings in original don't match line endings in translation") 459 else: 460 return True
461
462 - def tabs(self, str1, str2):
463 """checks whether tabs are consistent between the two strings""" 464 if not helpers.countmatch(str1, str2, "\t"): 465 raise SeriousFilterFailure("tabs in original don't match tabs in translation") 466 else: 467 return True
468
469 - def singlequoting(self, str1, str2):
470 """checks whether singlequoting is consistent between the two strings""" 471 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 472 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 473 return helpers.countsmatch(str1, str2, ("'", "''", "\\'"))
474
475 - def doublequoting(self, str1, str2):
476 """checks whether doublequoting is consistent between the two strings""" 477 str1 = self.filteraccelerators(self.filtervariables(str1)) 478 str1 = self.filterxml(str1) 479 str1 = self.config.lang.punctranslate(str1) 480 str2 = self.filteraccelerators(self.filtervariables(str2)) 481 str2 = self.filterxml(str2) 482 return helpers.countsmatch(str1, str2, ('"', '""', '\\"', u"«", u"»"))
483
484 - def doublespacing(self, str1, str2):
485 """checks for bad double-spaces by comparing to original""" 486 str1 = self.filteraccelerators(str1) 487 str2 = self.filteraccelerators(str2) 488 return helpers.countmatch(str1, str2, " ")
489
490 - def puncspacing(self, str1, str2):
491 """checks for bad spacing after punctuation""" 492 if str1.find(u" ") == -1: 493 return True 494 str1 = self.filteraccelerators(self.filtervariables(str1)) 495 str1 = self.config.lang.punctranslate(str1) 496 str2 = self.filteraccelerators(self.filtervariables(str2)) 497 for puncchar in self.config.punctuation: 498 plaincount1 = str1.count(puncchar) 499 plaincount2 = str2.count(puncchar) 500 if not plaincount1 or plaincount1 != plaincount2: 501 continue 502 spacecount1 = str1.count(puncchar+" ") 503 spacecount2 = str2.count(puncchar+" ") 504 if spacecount1 != spacecount2: 505 # handle extra spaces that are because of transposed punctuation 506 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1: 507 continue 508 return False 509 return True
510
511 - def printf(self, str1, str2):
512 """checks whether printf format strings match""" 513 count1 = count2 = None 514 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 515 count2 = var_num2 + 1 516 if match2.group('ord'): 517 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 518 count1 = var_num1 + 1 519 if int(match2.group('ord')) == var_num1 + 1: 520 if match2.group('fullvar') != match1.group('fullvar'): 521 return 0 522 else: 523 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 524 count1 = var_num1 + 1 525 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 526 return 0 527 528 if count2 is None: 529 if list(printf_pat.finditer(str1)): 530 return 0 531 532 if (count1 or count2) and (count1 != count2): 533 return 0 534 return 1
535
536 - def accelerators(self, str1, str2):
537 """checks whether accelerators are consistent between the two strings""" 538 str1 = self.filtervariables(str1) 539 str2 = self.filtervariables(str2) 540 messages = [] 541 for accelmarker in self.config.accelmarkers: 542 counter = decoration.countaccelerators(accelmarker) 543 count1, countbad1 = counter(str1) 544 count2, countbad2 = counter(str2) 545 getaccel = decoration.getaccelerators(accelmarker) 546 accel2, bad2 = getaccel(str2) 547 if count1 == count2: 548 continue 549 if count1 == 1 and count2 == 0: 550 if countbad2 == 1: 551 messages.append("accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0])) 552 else: 553 messages.append("accelerator %s is missing from translation" % accelmarker) 554 elif count1 == 0: 555 messages.append("accelerator %s does not occur in original and should not be in translation" % accelmarker) 556 elif count1 == 1 and count2 > count1: 557 messages.append("accelerator %s is repeated in translation" % accelmarker) 558 else: 559 messages.append("accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2)) 560 if messages: 561 if "accelerators" in self.config.criticaltests: 562 raise SeriousFilterFailure(messages) 563 else: 564 raise FilterFailure(messages) 565 return True
566 567 # def acceleratedvariables(self, str1, str2): 568 # """checks that no variables are accelerated""" 569 # messages = [] 570 # for accelerator in self.config.accelmarkers: 571 # for variablestart, variableend in self.config.varmatches: 572 # error = accelerator + variablestart 573 # if str1.find(error) >= 0: 574 # messages.append("original has an accelerated variable") 575 # if str2.find(error) >= 0: 576 # messages.append("translation has an accelerated variable") 577 # if messages: 578 # raise FilterFailure(messages) 579 # return True 580
581 - def variables(self, str1, str2):
582 """checks whether variables of various forms are consistent between the two strings""" 583 messages = [] 584 mismatch1, mismatch2 = [], [] 585 varnames1, varnames2 = [], [] 586 for startmarker, endmarker in self.config.varmatches: 587 varchecker = decoration.getvariables(startmarker, endmarker) 588 if startmarker and endmarker: 589 if isinstance(endmarker, int): 590 redecorate = lambda var: startmarker + var 591 else: 592 redecorate = lambda var: startmarker + var + endmarker 593 elif startmarker: 594 redecorate = lambda var: startmarker + var 595 else: 596 redecorate = lambda var: var 597 vars1 = varchecker(str1) 598 vars2 = varchecker(str2) 599 if vars1 != vars2: 600 # we use counts to compare so we can handle multiple variables 601 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)] 602 # filter variable names we've already seen, so they aren't matched by more than one filter... 603 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 604 varnames1.extend(vars1) 605 varnames2.extend(vars2) 606 vars1 = map(redecorate, vars1) 607 vars2 = map(redecorate, vars2) 608 mismatch1.extend(vars1) 609 mismatch2.extend(vars2) 610 if mismatch1: 611 messages.append("do not translate: %s" % ", ".join(mismatch1)) 612 elif mismatch2: 613 messages.append("translation contains variables not in original: %s" % ", ".join(mismatch2)) 614 if messages and mismatch1: 615 raise SeriousFilterFailure(messages) 616 elif messages: 617 raise FilterFailure(messages) 618 return True
619
620 - def functions(self, str1, str2):
621 """checks that function names are not translated""" 622 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
623
624 - def emails(self, str1, str2):
625 """checks that emails are not translated""" 626 return helpers.funcmatch(str1, str2, decoration.getemails)
627
628 - def urls(self, str1, str2):
629 """checks that URLs are not translated""" 630 return helpers.funcmatch(str1, str2, decoration.geturls)
631
632 - def numbers(self, str1, str2):
633 """checks whether numbers of various forms are consistent between the two strings""" 634 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
635
636 - def startwhitespace(self, str1, str2):
637 """checks whether whitespace at the beginning of the strings matches""" 638 str1 = self.filteraccelerators(self.filtervariables(str1)) 639 str2 = self.filteraccelerators(self.filtervariables(str2)) 640 return helpers.funcmatch(str1, str2, decoration.spacestart)
641
642 - def endwhitespace(self, str1, str2):
643 """checks whether whitespace at the end of the strings matches""" 644 str1 = self.filteraccelerators(self.filtervariables(str1)) 645 str2 = self.filteraccelerators(self.filtervariables(str2)) 646 return helpers.funcmatch(str1, str2, decoration.spaceend)
647
648 - def startpunc(self, str1, str2):
649 """checks whether punctuation at the beginning of the strings match""" 650 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 651 str1 = self.config.lang.punctranslate(str1) 652 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 653 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
654
655 - def endpunc(self, str1, str2):
656 """checks whether punctuation at the end of the strings match""" 657 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 658 str1 = self.config.lang.punctranslate(str1) 659 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 660 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation)
661
662 - def purepunc(self, str1, str2):
663 """checks that strings that are purely punctuation are not changed""" 664 # this test is a subset of startandend 665 if (decoration.ispurepunctuation(str1)): 666 return str1 == str2 667 else: 668 return not decoration.ispurepunctuation(str2)
669
670 - def brackets(self, str1, str2):
671 """checks that the number of brackets in both strings match""" 672 str1 = self.filtervariables(str1) 673 str2 = self.filtervariables(str2) 674 messages = [] 675 missing = [] 676 extra = [] 677 for bracket in ("[", "]", "{", "}", "(", ")"): 678 count1 = str1.count(bracket) 679 count2 = str2.count(bracket) 680 if count2 < count1: 681 missing.append("'%s'" % bracket) 682 elif count2 > count1: 683 extra.append("'%s'" % bracket) 684 if missing: 685 messages.append("translation is missing %s" % ", ".join(missing)) 686 if extra: 687 messages.append("translation has extra %s" % ", ".join(extra)) 688 if messages: 689 raise FilterFailure(messages) 690 return True
691
692 - def sentencecount(self, str1, str2):
693 """checks that the number of sentences in both strings match""" 694 sentences1 = len(self.config.sourcelang.sentences(str1)) 695 sentences2 = len(self.config.lang.sentences(str2)) 696 if not sentences1 == sentences2: 697 raise FilterFailure("The number of sentences differ: %d versus %d" % (sentences1, sentences2)) 698 return True
699
700 - def options(self, str1, str2):
701 """checks that options are not translated""" 702 str1 = self.filtervariables(str1) 703 for word1 in str1.split(): 704 if word1 != "--" and word1.startswith("--") and word1[-1].isalnum(): 705 parts = word1.split("=") 706 if not parts[0] in str2: 707 raise FilterFailure("The option %s does not occur or is translated in the translation." % parts[0]) 708 if len(parts) > 1 and parts[1] in str2: 709 raise FilterFailure("The parameter %(param)s in option %(option)s is not translated." % {"param": parts[0], "option": parts[1]}) 710 return True
711
712 - def startcaps(self, str1, str2):
713 """checks that the message starts with the correct capitalisation""" 714 str1 = self.filteraccelerators(str1) 715 str2 = self.filteraccelerators(str2) 716 if len(str1) > 1 and len(str2) > 1: 717 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 718 if len(str1) == 0 and len(str2) == 0: 719 return True 720 if len(str1) == 0 or len(str2) == 0: 721 return False 722 return True
723
724 - def simplecaps(self, str1, str2):
725 """checks the capitalisation of two strings isn't wildly different""" 726 str1 = self.removevariables(str1) 727 str2 = self.removevariables(str2) 728 # TODO: review this. The 'I' is specific to English, so it probably serves 729 # no purpose to get sourcelang.sentenceend 730 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, " i ", str1) 731 capitals1 = helpers.filtercount(str1, type(str1).isupper) 732 capitals2 = helpers.filtercount(str2, type(str2).isupper) 733 alpha1 = helpers.filtercount(str1, type(str1).isalpha) 734 alpha2 = helpers.filtercount(str2, type(str2).isalpha) 735 # Capture the all caps case 736 if capitals1 == alpha1: 737 return capitals2 == alpha2 738 # some heuristic tests to try and see that the style of capitals is vaguely the same 739 if capitals1 == 0 or capitals1 == 1: 740 return capitals2 == capitals1 741 elif capitals1 < len(str1) / 10: 742 return capitals2 < len(str2) / 8 743 elif len(str1) < 10: 744 return abs(capitals1 - capitals2) < 3 745 elif capitals1 > len(str1) * 6 / 10: 746 return capitals2 > len(str2) * 6 / 10 747 else: 748 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
749
750 - def acronyms(self, str1, str2):
751 """checks that acronyms that appear are unchanged""" 752 acronyms = [] 753 allowed = [] 754 for startmatch, endmatch in self.config.varmatches: 755 allowed += decoration.getvariables(startmatch, endmatch)(str1) 756 allowed += self.config.musttranslatewords.keys() 757 str1 = self.filteraccelerators(self.filtervariables(str1)) 758 iter = self.config.lang.word_iter(str1) 759 str2 = self.filteraccelerators(self.filtervariables(str2)) 760 for word in iter: 761 if word.isupper() and len(word) > 1 and word not in allowed: 762 if str2.find(word) == -1: 763 acronyms.append(word) 764 if acronyms: 765 raise FilterFailure("acronyms should not be translated: " + ", ".join(acronyms)) 766 return True
767
768 - def doublewords(self, str1, str2):
769 """checks for repeated words in the translation""" 770 lastword = "" 771 without_newlines = "\n".join(str2.split("\n")) 772 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(".", "").lower().split() 773 for word in words: 774 if word == lastword: 775 raise FilterFailure("The word '%s' is repeated" % word) 776 lastword = word 777 return True
778
779 - def notranslatewords(self, str1, str2):
780 """checks that words configured as untranslatable appear in the translation too""" 781 if not self.config.notranslatewords: 782 return True 783 str1 = self.filtervariables(str1) 784 str2 = self.filtervariables(str2) 785 #The above is full of strange quotes and things in utf-8 encoding. 786 #single apostrophe perhaps problematic in words like "doesn't" 787 for seperator in self.config.punctuation: 788 if isinstance(str1, unicode): 789 str1 = str1.replace(seperator, u" ") 790 else: 791 str1 = str1.replace(seperator.encode("utf-8"), " ") 792 if isinstance(str2, unicode): 793 str2 = str2.replace(seperator, u" ") 794 else: 795 str2 = str2.replace(seperator.encode("utf-8"), " ") 796 words1 = self.filteraccelerators(str1).split() 797 words2 = self.filteraccelerators(str2).split() 798 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 799 if stopwords: 800 raise FilterFailure("do not translate: %s" % (", ".join(stopwords))) 801 return True
802
803 - def musttranslatewords(self, str1, str2):
804 """checks that words configured as definitely translatable don't appear in 805 the translation""" 806 if not self.config.musttranslatewords: 807 return True 808 str1 = self.removevariables(str1) 809 str2 = self.removevariables(str2) 810 #The above is full of strange quotes and things in utf-8 encoding. 811 #single apostrophe perhaps problematic in words like "doesn't" 812 for seperator in self.config.punctuation: 813 str1 = str1.replace(seperator, " ") 814 str2 = str2.replace(seperator, " ") 815 words1 = self.filteraccelerators(str1).split() 816 words2 = self.filteraccelerators(str2).split() 817 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 818 if stopwords: 819 raise FilterFailure("please translate: %s" % (", ".join(stopwords))) 820 return True
821
822 - def validchars(self, str1, str2):
823 """checks that only characters specified as valid appear in the translation""" 824 if not self.config.validcharsmap: 825 return True 826 invalid1 = str1.translate(self.config.validcharsmap) 827 invalid2 = str2.translate(self.config.validcharsmap) 828 invalidchars = ["'%s' (\\u%04x)" % (invalidchar.encode('utf-8'), ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 829 if invalidchars: 830 raise FilterFailure("invalid chars: %s" % (", ".join(invalidchars))) 831 return True
832
833 - def filepaths(self, str1, str2):
834 """checks that file paths have not been translated""" 835 for word1 in self.filteraccelerators(str1).split(): 836 if word1.startswith("/"): 837 if not helpers.countsmatch(str1, str2, (word1,)): 838 return False 839 return True
840
841 - def xmltags(self, str1, str2):
842 """checks that XML/HTML tags have not been translated""" 843 tags1 = tag_re.findall(str1) 844 if len(tags1) > 0: 845 if (len(tags1[0]) == len(str1)) and not "=" in tags1[0]: 846 return True 847 tags2 = tag_re.findall(str2) 848 properties1 = tagproperties(tags1, self.config.ignoretags) 849 properties2 = tagproperties(tags2, self.config.ignoretags) 850 filtered1 = [] 851 filtered2 = [] 852 for property1 in properties1: 853 filtered1 += [intuplelist(property1, self.config.canchangetags)] 854 for property2 in properties2: 855 filtered2 += [intuplelist(property2, self.config.canchangetags)] 856 857 #TODO: consider the consequences of different ordering of attributes/tags 858 if filtered1 != filtered2: 859 return False 860 else: 861 # No tags in str1, let's just check that none were added in str2. This 862 # might be useful for fuzzy strings wrongly unfuzzied, for example. 863 tags2 = tag_re.findall(str2) 864 if len(tags2) > 0: 865 return False 866 return True
867
868 - def kdecomments(self, str1, str2):
869 """checks to ensure that no KDE style comments appear in the translation""" 870 return str2.find("\n_:") == -1 and not str2.startswith("_:")
871
872 - def compendiumconflicts(self, str1, str2):
873 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 874 return str2.find("#-#-#-#-#") == -1
875
876 - def simpleplurals(self, str1, str2):
877 """checks for English style plural(s) for you to review""" 878 def numberofpatterns(string, patterns): 879 number = 0 880 for pattern in patterns: 881 number += len(re.findall(pattern, string)) 882 return number
883 884 sourcepatterns = ["\(s\)"] 885 targetpatterns = ["\(s\)"] 886 sourcecount = numberofpatterns(str1, sourcepatterns) 887 targetcount = numberofpatterns(str2, targetpatterns) 888 if self.config.lang.nplurals == 1: 889 return not targetcount 890 return sourcecount == targetcount
891
892 - def spellcheck(self, str1, str2):
893 """checks words that don't pass a spell check""" 894 if not self.config.targetlanguage: 895 return True 896 str1 = self.filterxml(self.filteraccelerators(self.filtervariables(str1))) 897 str2 = self.filterxml(self.filteraccelerators(self.filtervariables(str2))) 898 ignore1 = [] 899 messages = [] 900 for word, index, suggestions in spelling.check(str1, lang="en"): 901 ignore1.append(word) 902 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 903 if word in ignore1: 904 continue 905 # hack to ignore hyphenisation rules 906 if word in suggestions: 907 continue 908 if isinstance(str2, unicode) or isinstance(str1, unicode): 909 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions))) 910 else: 911 messages.append("check spelling of %s (could be %s)" % (word, " / ".join(suggestions))) 912 if messages: 913 raise FilterFailure(messages) 914 return True
915
916 - def credits(self, str1, str2):
917 """checks for messages containing translation credits instead of normal translations.""" 918 return not str1 in self.config.credit_sources
919 920 # If the precondition filter is run and fails then the other tests listed are ignored 921 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps", 922 "accelerators", "brackets", "endpunc", 923 "acronyms", "xmltags", "startpunc", 924 "endwhitespace", "startwhitespace", 925 "escapes", "doublequoting", "singlequoting", 926 "filepaths", "purepunc", "doublespacing", 927 "sentencecount", "numbers", "isfuzzy", 928 "isreview", "notranslatewords", "musttranslatewords", 929 "emails", "simpleplurals", "urls", "printf", 930 "tabs", "newlines", "functions", "options", 931 "blank", "nplurals"), 932 "blank": ("simplecaps", "variables", "startcaps", 933 "accelerators", "brackets", "endpunc", 934 "acronyms", "xmltags", "startpunc", 935 "endwhitespace", "startwhitespace", 936 "escapes", "doublequoting", "singlequoting", 937 "filepaths", "purepunc", "doublespacing", 938 "sentencecount", "numbers", "isfuzzy", 939 "isreview", "notranslatewords", "musttranslatewords", 940 "emails", "simpleplurals", "urls", "printf", 941 "tabs", "newlines", "functions", "options"), 942 "credits": ("simplecaps", "variables", "startcaps", 943 "accelerators", "brackets", "endpunc", 944 "acronyms", "xmltags", "startpunc", 945 "escapes", "doublequoting", "singlequoting", 946 "filepaths", "doublespacing", 947 "sentencecount", "numbers", 948 "emails", "simpleplurals", "urls", "printf", 949 "tabs", "newlines", "functions", "options"), 950 "purepunc": ("startcaps", "options"), 951 "startcaps": ("simplecaps",), 952 "endwhitespace": ("endpunc",), 953 "startwhitespace":("startpunc",), 954 "unchanged": ("doublewords",), 955 "compendiumconflicts": ("accelerators", "brackets", "escapes", 956 "numbers", "startpunc", "long", "variables", 957 "startcaps", "sentencecount", "simplecaps", 958 "doublespacing", "endpunc", "xmltags", 959 "startwhitespace", "endwhitespace", 960 "singlequoting", "doublequoting", 961 "filepaths", "purepunc", "doublewords", "printf") } 962 963 # code to actually run the tests (use unittest?) 964 965 openofficeconfig = CheckerConfig( 966 accelmarkers = ["~"], 967 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 968 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)], 969 canchangetags = [("link", "name", None)] 970 ) 971
972 -class OpenOfficeChecker(StandardChecker):
973 - def __init__(self, **kwargs):
974 checkerconfig = kwargs.get("checkerconfig", None) 975 if checkerconfig is None: 976 checkerconfig = CheckerConfig() 977 kwargs["checkerconfig"] = checkerconfig 978 checkerconfig.update(openofficeconfig) 979 StandardChecker.__init__(self, **kwargs)
980 981 mozillaconfig = CheckerConfig( 982 accelmarkers = ["&"], 983 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")], 984 criticaltests = ["accelerators"] 985 ) 986
987 -class MozillaChecker(StandardChecker):
988 - def __init__(self, **kwargs):
989 checkerconfig = kwargs.get("checkerconfig", None) 990 if checkerconfig is None: 991 checkerconfig = CheckerConfig() 992 kwargs["checkerconfig"] = checkerconfig 993 checkerconfig.update(mozillaconfig) 994 StandardChecker.__init__(self, **kwargs)
995 996 gnomeconfig = CheckerConfig( 997 accelmarkers = ["_"], 998 varmatches = [("%", 1), ("$(", ")")], 999 credit_sources = [u"translator-credits"] 1000 ) 1001
1002 -class GnomeChecker(StandardChecker):
1003 - def __init__(self, **kwargs):
1004 checkerconfig = kwargs.get("checkerconfig", None) 1005 if checkerconfig is None: 1006 checkerconfig = CheckerConfig() 1007 kwargs["checkerconfig"] = checkerconfig 1008 checkerconfig.update(gnomeconfig) 1009 StandardChecker.__init__(self, **kwargs)
1010 1011 kdeconfig = CheckerConfig( 1012 accelmarkers = ["&"], 1013 varmatches = [("%", 1)], 1014 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"] 1015 ) 1016
1017 -class KdeChecker(StandardChecker):
1018 - def __init__(self, **kwargs):
1019 # TODO allow setup of KDE plural and translator comments so that they do 1020 # not create false postives 1021 checkerconfig = kwargs.get("checkerconfig", None) 1022 if checkerconfig is None: 1023 checkerconfig = CheckerConfig() 1024 kwargs["checkerconfig"] = checkerconfig 1025 checkerconfig.update(kdeconfig) 1026 StandardChecker.__init__(self, **kwargs)
1027 1028 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1029 -class CCLicenseChecker(StandardChecker):
1030 - def __init__(self, **kwargs):
1031 checkerconfig = kwargs.get("checkerconfig", None) 1032 if checkerconfig is None: 1033 checkerconfig = CheckerConfig() 1034 kwargs["checkerconfig"] = checkerconfig 1035 checkerconfig.update(cclicenseconfig) 1036 StandardChecker.__init__(self, **kwargs)
1037 1038 projectcheckers = { 1039 "openoffice": OpenOfficeChecker, 1040 "mozilla": MozillaChecker, 1041 "kde": KdeChecker, 1042 "wx": KdeChecker, 1043 "gnome": GnomeChecker, 1044 "creativecommons": CCLicenseChecker 1045 } 1046 1047
1048 -class StandardUnitChecker(UnitChecker):
1049 """The standard checks for common checks on translation units."""
1050 - def isfuzzy(self, unit):
1051 """Check if the unit has been marked fuzzy.""" 1052 return not unit.isfuzzy()
1053
1054 - def isreview(self, unit):
1055 """Check if the unit has been marked review.""" 1056 return not unit.isreview()
1057
1058 - def nplurals(self, unit):
1059 """Checks for the correct number of noun forms for plural translations.""" 1060 if unit.hasplural(): 1061 # if we don't have a valid nplurals value, don't run the test 1062 nplurals = self.config.lang.nplurals 1063 if nplurals > 0: 1064 return len(unit.target.strings) == nplurals 1065 return True
1066
1067 - def hassuggestion(self, unit):
1068 """Checks if there is at least one suggested translation for this unit.""" 1069 self.suggestion_store = getattr(self, 'suggestion_store', None) 1070 suggestions = [] 1071 if self.suggestion_store: 1072 source = unit.source 1073 suggestions = [unit for unit in self.suggestion_store.units if unit.source == source] 1074 elif xliff and isinstance(unit, xliff.xliffunit): 1075 # TODO: we probably want to filter them somehow 1076 suggestions = unit.getalttrans() 1077 return not bool(suggestions)
1078 1079
1080 -def runtests(str1, str2, ignorelist=()):
1081 """verifies that the tests pass for a pair of strings""" 1082 from translate.storage import base 1083 str1 = data.forceunicode(str1) 1084 str2 = data.forceunicode(str2) 1085 unit = base.TranslationUnit(str1) 1086 unit.target = str2 1087 checker = StandardChecker(excludefilters=ignorelist) 1088 failures = checker.run_filters(unit) 1089 for testname, message in failures: 1090 print "failure: %s: %s\n %r\n %r" % (testname, message, str1, str2) 1091 return failures
1092
1093 -def batchruntests(pairs):
1094 """runs test on a batch of string pairs""" 1095 passed, numpairs = 0, len(pairs) 1096 for str1, str2 in pairs: 1097 if runtests(str1, str2): 1098 passed += 1 1099 print 1100 print "total: %d/%d pairs passed" % (passed, numpairs)
1101 1102 if __name__ == '__main__': 1103 testset = [(r"simple", r"somple"), 1104 (r"\this equals \that", r"does \this equal \that?"), 1105 (r"this \'equals\' that", r"this 'equals' that"), 1106 (r" start and end! they must match.", r"start and end! they must match."), 1107 (r"check for matching %variables marked like %this", r"%this %variable is marked"), 1108 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"), 1109 (r"check for mismatching %variables% too", r"how many %variable% are marked"), 1110 (r"%% %%", r"%%"), 1111 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1112 (r"simple lowercase", r"it is all lowercase"), 1113 (r"simple lowercase", r"It Is All Lowercase"), 1114 (r"Simple First Letter Capitals", r"First Letters"), 1115 (r"SIMPLE CAPITALS", r"First Letters"), 1116 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1117 (r"forgot to translate", r" ") 1118 ] 1119 batchruntests(testset) 1120