Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  #  
   4  # Copyright 2004-2008 Zuza Software Foundation 
   5  #  
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  #  
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation  
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check  
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the  
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  from translate.filters import helpers 
  34  from translate.filters import decoration 
  35  from translate.filters import prefilters 
  36  from translate.filters import spelling 
  37  from translate.lang import factory 
  38  from translate.lang import data 
  39  # The import of xliff could fail if the user doesn't have lxml installed. For 
  40  # now we try to continue gracefully to help users who aren't interested in  
  41  # support for XLIFF or other XML formats. 
  42  try: 
  43      from translate.storage import xliff 
  44  except ImportError, e: 
  45      xliff = None 
  46  # The import of xliff fail silently in the absence of lxml if another module 
  47  # already tried to import it unsuccessfully, so let's make 100% sure: 
  48  if not hasattr(xliff, "xliffunit"): 
  49      xliff = None 
  50  import re 
  51   
  52  # These are some regular expressions that are compiled for use in some tests 
  53   
  54  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as 
  55  # this should capture printf types defined in other platforms. 
  56  # extended to support Python named format specifiers 
  57  printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))') 
  58   
  59  # The name of the XML tag 
  60  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  61   
  62  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  63  #TODO: remove escaped strings once usage is audited 
  64  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  65   
  66  # The whole tag 
  67  tag_re = re.compile("<[^>]+>") 
  68   
  69  gconf_attribute_re = re.compile('"[a-z_]+?"') 
  70   
71 -def tagname(string):
72 """Returns the name of the XML/HTML tag in string""" 73 return tagname_re.match(string).groups(1)[0]
74
75 -def intuplelist(pair, list):
76 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 77 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut 78 by only considering "c" if "b" has already matched.""" 79 a, b, c = pair 80 if (b, c) == (None, None): 81 #This is a tagname 82 return pair 83 for pattern in list: 84 x, y, z = pattern 85 if (x, y) in [(a, b), (None, b)]: 86 if z in [None, c]: 87 return pattern 88 return pair
89
90 -def tagproperties(strings, ignore):
91 """Returns all the properties in the XML/HTML tag string as 92 (tagname, propertyname, propertyvalue), but ignore those combinations 93 specified in ignore.""" 94 properties = [] 95 for string in strings: 96 tag = tagname(string) 97 properties += [(tag, None, None)] 98 #Now we isolate the attribute pairs. 99 pairs = property_re.findall(string) 100 for property, value, a, b in pairs: 101 #Strip the quotes: 102 value = value[1:-1] 103 104 canignore = False 105 if (tag, property, value) in ignore or \ 106 intuplelist((tag,property,value), ignore) != (tag,property,value): 107 canignore = True 108 break 109 if not canignore: 110 properties += [(tag, property, value)] 111 return properties
112 113
114 -class FilterFailure(Exception):
115 """This exception signals that a Filter didn't pass, and gives an explanation 116 or a comment"""
117 - def __init__(self, messages):
118 if not isinstance(messages, list): 119 messages = [messages] 120 assert isinstance(messages[0], unicode) # Assumption: all of same type 121 joined = u", ".join(messages) 122 Exception.__init__(self, joined) 123 # Python 2.3 doesn't have .args 124 if not hasattr(self, "args"): 125 self.args = joined
126
127 -class SeriousFilterFailure(FilterFailure):
128 """This exception signals that a Filter didn't pass, and the bad translation 129 might break an application (so the string will be marked fuzzy)""" 130 pass
131 132 #(tag, attribute, value) specifies a certain attribute which can be changed/ 133 #ignored if it exists inside tag. In the case where there is a third element 134 #in the tuple, it indicates a property value that can be ignored if present 135 #(like defaults, for example) 136 #If a certain item is None, it indicates that it is relevant for all values of 137 #the property/tag that is specified as None. A non-None value of "value" 138 #indicates that the value of the attribute must be taken into account. 139 common_ignoretags = [(None, "xml-lang", None)] 140 common_canchangetags = [("img", "alt", None)] 141
142 -class CheckerConfig(object):
143 """object representing the configuration of a checker"""
144 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 145 notranslatewords=None, musttranslatewords=None, validchars=None, 146 punctuation=None, endpunctuation=None, ignoretags=None, 147 canchangetags=None, criticaltests=None, credit_sources=None):
148 # Init lists 149 self.accelmarkers = self._init_list(accelmarkers) 150 self.varmatches = self._init_list(varmatches) 151 self.criticaltests = self._init_list(criticaltests) 152 self.credit_sources = self._init_list(credit_sources) 153 # Lang data 154 self.targetlanguage = targetlanguage 155 self.updatetargetlanguage(targetlanguage) 156 self.sourcelang = factory.getlanguage('en') 157 # Inits with default values 158 self.punctuation = self._init_default(data.normalized_unicode(punctuation), self.lang.punctuation) 159 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), self.lang.sentenceend) 160 self.ignoretags = self._init_default(ignoretags, common_ignoretags) 161 self.canchangetags = self._init_default(canchangetags, common_canchangetags) 162 # Other data 163 # TODO: allow user configuration of untranslatable words 164 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)]) 165 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)]) 166 validchars = data.normalized_unicode(validchars) 167 self.validcharsmap = {} 168 self.updatevalidchars(validchars)
169
170 - def _init_list(self, list):
171 """initialise configuration paramaters that are lists 172 173 @type list: List 174 @param list: None (we'll initialise a blank list) or a list paramater 175 @rtype: List 176 """ 177 if list is None: 178 list = [] 179 return list
180
181 - def _init_default(self, param, default):
182 """initialise parameters that can have default options 183 184 @param param: the user supplied paramater value 185 @param default: default values when param is not specified 186 @return: the paramater as specified by the user of the default settings 187 """ 188 if param is None: 189 return default 190 return param
191
192 - def update(self, otherconfig):
193 """combines the info in otherconfig into this config object""" 194 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 195 self.updatetargetlanguage(self.targetlanguage) 196 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 197 self.varmatches.extend(otherconfig.varmatches) 198 self.notranslatewords.update(otherconfig.notranslatewords) 199 self.musttranslatewords.update(otherconfig.musttranslatewords) 200 self.validcharsmap.update(otherconfig.validcharsmap) 201 self.punctuation += otherconfig.punctuation 202 self.endpunctuation += otherconfig.endpunctuation 203 #TODO: consider also updating in the following cases: 204 self.ignoretags = otherconfig.ignoretags 205 self.canchangetags = otherconfig.canchangetags 206 self.criticaltests.extend(otherconfig.criticaltests) 207 self.credit_sources = otherconfig.credit_sources
208
209 - def updatevalidchars(self, validchars):
210 """updates the map that eliminates valid characters""" 211 if validchars is None: 212 return True 213 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)]) 214 self.validcharsmap.update(validcharsmap)
215
216 - def updatetargetlanguage(self, langcode):
217 """Updates the target language in the config to the given target language""" 218 self.lang = factory.getlanguage(langcode)
219
220 -def cache_results(f):
221 def cached_f(self, param1): 222 key = (f.__name__, param1) 223 res_cache = self.results_cache 224 if key in res_cache: 225 return res_cache[key] 226 else: 227 value = f(self, param1) 228 res_cache[key] = value 229 return value
230 return cached_f 231
232 -class UnitChecker(object):
233 """Parent Checker class which does the checking based on functions available 234 in derived classes.""" 235 preconditions = {} 236
237 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
238 self.errorhandler = errorhandler 239 if checkerconfig is None: 240 self.setconfig(CheckerConfig()) 241 else: 242 self.setconfig(checkerconfig) 243 # exclude functions defined in UnitChecker from being treated as tests... 244 self.helperfunctions = {} 245 for functionname in dir(UnitChecker): 246 function = getattr(self, functionname) 247 if callable(function): 248 self.helperfunctions[functionname] = function 249 self.defaultfilters = self.getfilters(excludefilters, limitfilters) 250 251 self.results_cache = {}
252
253 - def getfilters(self, excludefilters=None, limitfilters=None):
254 """returns dictionary of available filters, including/excluding those in 255 the given lists""" 256 filters = {} 257 if limitfilters is None: 258 # use everything available unless instructed 259 limitfilters = dir(self) 260 if excludefilters is None: 261 excludefilters = {} 262 for functionname in limitfilters: 263 if functionname in excludefilters: continue 264 if functionname in self.helperfunctions: continue 265 if functionname == "errorhandler": continue 266 filterfunction = getattr(self, functionname, None) 267 if not callable(filterfunction): continue 268 filters[functionname] = filterfunction 269 return filters
270
271 - def setconfig(self, config):
272 """sets the accelerator list""" 273 self.config = config 274 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 275 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 276 for startmatch, endmatch in self.config.varmatches] 277 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone) 278 for startmatch, endmatch in self.config.varmatches]
279
280 - def setsuggestionstore(self, store):
281 """Sets the filename that a checker should use for evaluating suggestions.""" 282 self.suggestion_store = store
283
284 - def filtervariables(self, str1):
285 """filter out variables from str1""" 286 return helpers.multifilter(str1, self.varfilters)
287 filtervariables = cache_results(filtervariables) 288
289 - def removevariables(self, str1):
290 """remove variables from str1""" 291 return helpers.multifilter(str1, self.removevarfilter)
292 removevariables = cache_results(removevariables) 293
294 - def filteraccelerators(self, str1):
295 """filter out accelerators from str1""" 296 return helpers.multifilter(str1, self.accfilters, None)
297 filteraccelerators = cache_results(filteraccelerators) 298
299 - def filteraccelerators_by_list(self, str1, acceptlist=None):
300 """filter out accelerators from str1""" 301 return helpers.multifilter(str1, self.accfilters, acceptlist)
302
303 - def filterwordswithpunctuation(self, str1):
304 """replaces words with punctuation with their unpunctuated equivalents""" 305 return prefilters.filterwordswithpunctuation(str1)
306 filterwordswithpunctuation = cache_results(filterwordswithpunctuation) 307
308 - def filterxml(self, str1):
309 """filter out XML from the string so only text remains""" 310 return tag_re.sub("", str1)
311 filterxml = cache_results(filterxml) 312
313 - def run_test(self, test, unit):
314 """Runs the given test on the given unit. 315 316 Note that this can raise a FilterFailure as part of normal operation""" 317 return test(unit)
318
319 - def run_filters(self, unit):
320 """run all the tests in this suite, return failures as testname, message_or_exception""" 321 self.results_cache = {} 322 failures = {} 323 ignores = self.config.lang.ignoretests[:] 324 functionnames = self.defaultfilters.keys() 325 priorityfunctionnames = self.preconditions.keys() 326 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 327 for functionname in priorityfunctionnames + otherfunctionnames: 328 if functionname in ignores: 329 continue 330 filterfunction = getattr(self, functionname, None) 331 # this filterfunction may only be defined on another checker if using TeeChecker 332 if filterfunction is None: 333 continue 334 filtermessage = filterfunction.__doc__ 335 try: 336 filterresult = self.run_test(filterfunction, unit) 337 except FilterFailure, e: 338 filterresult = False 339 filtermessage = e.args[0] 340 except Exception, e: 341 if self.errorhandler is None: 342 raise ValueError("error in filter %s: %r, %r, %s" % \ 343 (functionname, unit.source, unit.target, e)) 344 else: 345 filterresult = self.errorhandler(functionname, unit.source, unit.target, e) 346 if not filterresult: 347 # we test some preconditions that aren't actually a cause for failure 348 if functionname in self.defaultfilters: 349 failures[functionname] = filtermessage 350 if functionname in self.preconditions: 351 for ignoredfunctionname in self.preconditions[functionname]: 352 ignores.append(ignoredfunctionname) 353 self.results_cache = {} 354 return failures
355
356 -class TranslationChecker(UnitChecker):
357 """A checker that passes source and target strings to the checks, not the 358 whole unit. 359 360 This provides some speedup and simplifies testing."""
361 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
362 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
363
364 - def run_test(self, test, unit):
365 """Runs the given test on the given unit. 366 367 Note that this can raise a FilterFailure as part of normal operation.""" 368 if self.hasplural: 369 filtermessages = [] 370 filterresult = True 371 for pluralform in unit.target.strings: 372 try: 373 if not test(self.str1, unicode(pluralform)): 374 filterresult = False 375 except FilterFailure, e: 376 filterresult = False 377 filtermessages.append( unicode(e.args) ) 378 if not filterresult and filtermessages: 379 raise FilterFailure(filtermessages) 380 else: 381 return filterresult 382 else: 383 return test(self.str1, self.str2)
384
385 - def run_filters(self, unit):
386 """Do some optimisation by caching some data of the unit for the benefit 387 of run_test().""" 388 self.str1 = data.normalized_unicode(unit.source) 389 self.str2 = data.normalized_unicode(unit.target) 390 self.hasplural = unit.hasplural() 391 self.locations = unit.getlocations() 392 return super(TranslationChecker, self).run_filters(unit)
393
394 -class TeeChecker:
395 """A Checker that controls multiple checkers."""
396 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, 397 checkerclasses=None, errorhandler=None, languagecode=None):
398 """construct a TeeChecker from the given checkers""" 399 self.limitfilters = limitfilters 400 if checkerclasses is None: 401 checkerclasses = [StandardChecker] 402 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses] 403 if languagecode: 404 for checker in self.checkers: 405 checker.config.updatetargetlanguage(languagecode) 406 # Let's hook up the language specific checker 407 lang_checker = self.checkers[0].config.lang.checker 408 if lang_checker: 409 self.checkers.append(lang_checker) 410 411 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 412 self.config = checkerconfig or self.checkers[0].config
413
414 - def getfilters(self, excludefilters=None, limitfilters=None):
415 """returns dictionary of available filters, including/excluding those in 416 the given lists""" 417 if excludefilters is None: 418 excludefilters = {} 419 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 420 self.combinedfilters = {} 421 for filters in filterslist: 422 self.combinedfilters.update(filters) 423 # TODO: move this somewhere more sensible (a checkfilters method?) 424 if limitfilters is not None: 425 for filtername in limitfilters: 426 if not filtername in self.combinedfilters: 427 import sys 428 print >> sys.stderr, "warning: could not find filter %s" % filtername 429 return self.combinedfilters
430
431 - def run_filters(self, unit):
432 """run all the tests in the checker's suites""" 433 failures = {} 434 for checker in self.checkers: 435 failures.update(checker.run_filters(unit)) 436 return failures
437
438 - def setsuggestionstore(self, store):
439 """Sets the filename that a checker should use for evaluating suggestions.""" 440 for checker in self.checkers: 441 checker.setsuggestionstore(store)
442 443
444 -class StandardChecker(TranslationChecker):
445 """The basic test suite for source -> target translations."""
446 - def untranslated(self, str1, str2):
447 """checks whether a string has been translated at all""" 448 str2 = prefilters.removekdecomments(str2) 449 return not (len(str1.strip()) > 0 and len(str2) == 0)
450
451 - def unchanged(self, str1, str2):
452 """checks whether a translation is basically identical to the original string""" 453 str1 = self.filteraccelerators(self.removevariables(str1)).strip() 454 str2 = self.filteraccelerators(self.removevariables(str2)).strip() 455 if len(str1) < 2: 456 return True 457 # If the whole string is upperase, or nothing in the string can go 458 # towards uppercase, let's assume there is nothing translatable 459 # TODO: reconsider 460 if (str1.isupper() or str1.upper() == str1) and str1 == str2: 461 return True 462 if self.config.notranslatewords: 463 words1 = str1.split() 464 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 465 #currently equivalent to: 466 # if len(words1) == 1 and words1[0] in self.config.notranslatewords: 467 #why do we only test for one notranslate word? 468 return True 469 # we could also check for things like str1.isnumeric(), but the test 470 # above (str1.upper() == str1) makes this unnecessary 471 if str1.lower() == str2.lower(): 472 raise FilterFailure(u"please translate") 473 return True
474
475 - def blank(self, str1, str2):
476 """checks whether a translation only contains spaces""" 477 len1 = len(str1.strip()) 478 len2 = len(str2.strip()) 479 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
480
481 - def short(self, str1, str2):
482 """checks whether a translation is much shorter than the original string""" 483 len1 = len(str1.strip()) 484 len2 = len(str2.strip()) 485 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
486
487 - def long(self, str1, str2):
488 """checks whether a translation is much longer than the original string""" 489 len1 = len(str1.strip()) 490 len2 = len(str2.strip()) 491 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
492
493 - def escapes(self, str1, str2):
494 """checks whether escaping is consistent between the two strings""" 495 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")): 496 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word]) 497 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word]) 498 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2)) 499 else: 500 return True
501
502 - def newlines(self, str1, str2):
503 """checks whether newlines are consistent between the two strings""" 504 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")): 505 raise FilterFailure(u"line endings in original don't match line endings in translation") 506 else: 507 return True
508
509 - def tabs(self, str1, str2):
510 """checks whether tabs are consistent between the two strings""" 511 if not helpers.countmatch(str1, str2, "\t"): 512 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation") 513 else: 514 return True
515
516 - def singlequoting(self, str1, str2):
517 """checks whether singlequoting is consistent between the two strings""" 518 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 519 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 520 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
521
522 - def doublequoting(self, str1, str2):
523 """checks whether doublequoting is consistent between the two strings""" 524 str1 = self.filteraccelerators(self.filtervariables(str1)) 525 str1 = self.filterxml(str1) 526 str1 = self.config.lang.punctranslate(str1) 527 str2 = self.filteraccelerators(self.filtervariables(str2)) 528 str2 = self.filterxml(str2) 529 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", u"»", u"“", u"”"))
530
531 - def doublespacing(self, str1, str2):
532 """checks for bad double-spaces by comparing to original""" 533 str1 = self.filteraccelerators(str1) 534 str2 = self.filteraccelerators(str2) 535 return helpers.countmatch(str1, str2, u" ")
536
537 - def puncspacing(self, str1, str2):
538 """checks for bad spacing after punctuation""" 539 if str1.find(u" ") == -1: 540 return True 541 str1 = self.filteraccelerators(self.filtervariables(str1)) 542 str1 = self.config.lang.punctranslate(str1) 543 str2 = self.filteraccelerators(self.filtervariables(str2)) 544 for puncchar in self.config.punctuation: 545 plaincount1 = str1.count(puncchar) 546 plaincount2 = str2.count(puncchar) 547 if not plaincount1 or plaincount1 != plaincount2: 548 continue 549 spacecount1 = str1.count(puncchar + u" ") 550 spacecount2 = str2.count(puncchar + u" ") 551 if spacecount1 != spacecount2: 552 # handle extra spaces that are because of transposed punctuation 553 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1: 554 continue 555 return False 556 return True
557
558 - def printf(self, str1, str2):
559 """checks whether printf format strings match""" 560 count1 = count2 = plural = None 561 # self.hasplural only set by run_filters, not always available 562 if 'hasplural' in self.__dict__: 563 plural = self.hasplural 564 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 565 count2 = var_num2 + 1 566 str2key = match2.group('key') 567 if match2.group('ord'): 568 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 569 count1 = var_num1 + 1 570 if int(match2.group('ord')) == var_num1 + 1: 571 if match2.group('fullvar') != match1.group('fullvar'): 572 return 0 573 elif str2key: 574 str1key = None 575 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 576 count1 = var_num1 + 1 577 if match1.group('key') and str2key == match1.group('key'): 578 str1key = match1.group('key') 579 # '%.0s' "placeholder" in plural will match anything 580 if plural and match2.group('fullvar') == '.0s': 581 continue 582 if match1.group('fullvar') != match2.group('fullvar'): 583 return 0 584 if str1key == None: 585 return 0 586 else: 587 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 588 count1 = var_num1 + 1 589 # '%.0s' "placeholder" in plural will match anything 590 if plural and match2.group('fullvar') == '.0s': 591 continue 592 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 593 return 0 594 595 if count2 is None: 596 if list(printf_pat.finditer(str1)): 597 return 0 598 599 if (count1 or count2) and (count1 != count2): 600 return 0 601 return 1
602
603 - def accelerators(self, str1, str2):
604 """checks whether accelerators are consistent between the two strings""" 605 str1 = self.filtervariables(str1) 606 str2 = self.filtervariables(str2) 607 messages = [] 608 for accelmarker in self.config.accelmarkers: 609 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel) 610 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel) 611 count1, countbad1 = counter1(str1) 612 count2, countbad2 = counter2(str2) 613 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel) 614 accel2, bad2 = getaccel(str2) 615 if count1 == count2: 616 continue 617 if count1 == 1 and count2 == 0: 618 if countbad2 == 1: 619 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0])) 620 else: 621 messages.append(u"accelerator %s is missing from translation" % accelmarker) 622 elif count1 == 0: 623 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker) 624 elif count1 == 1 and count2 > count1: 625 messages.append(u"accelerator %s is repeated in translation" % accelmarker) 626 else: 627 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2)) 628 if messages: 629 if "accelerators" in self.config.criticaltests: 630 raise SeriousFilterFailure(messages) 631 else: 632 raise FilterFailure(messages) 633 return True
634 635 # def acceleratedvariables(self, str1, str2): 636 # """checks that no variables are accelerated""" 637 # messages = [] 638 # for accelerator in self.config.accelmarkers: 639 # for variablestart, variableend in self.config.varmatches: 640 # error = accelerator + variablestart 641 # if str1.find(error) >= 0: 642 # messages.append(u"original has an accelerated variable") 643 # if str2.find(error) >= 0: 644 # messages.append(u"translation has an accelerated variable") 645 # if messages: 646 # raise FilterFailure(messages) 647 # return True 648
649 - def variables(self, str1, str2):
650 """checks whether variables of various forms are consistent between the two strings""" 651 messages = [] 652 mismatch1, mismatch2 = [], [] 653 varnames1, varnames2 = [], [] 654 for startmarker, endmarker in self.config.varmatches: 655 varchecker = decoration.getvariables(startmarker, endmarker) 656 if startmarker and endmarker: 657 if isinstance(endmarker, int): 658 redecorate = lambda var: startmarker + var 659 else: 660 redecorate = lambda var: startmarker + var + endmarker 661 elif startmarker: 662 redecorate = lambda var: startmarker + var 663 else: 664 redecorate = lambda var: var 665 vars1 = varchecker(str1) 666 vars2 = varchecker(str2) 667 if vars1 != vars2: 668 # we use counts to compare so we can handle multiple variables 669 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)] 670 # filter variable names we've already seen, so they aren't matched by more than one filter... 671 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 672 varnames1.extend(vars1) 673 varnames2.extend(vars2) 674 vars1 = map(redecorate, vars1) 675 vars2 = map(redecorate, vars2) 676 mismatch1.extend(vars1) 677 mismatch2.extend(vars2) 678 if mismatch1: 679 messages.append(u"do not translate: %s" % u", ".join(mismatch1)) 680 elif mismatch2: 681 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2)) 682 if messages and mismatch1: 683 raise SeriousFilterFailure(messages) 684 elif messages: 685 raise FilterFailure(messages) 686 return True
687
688 - def functions(self, str1, str2):
689 """checks that function names are not translated""" 690 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
691
692 - def emails(self, str1, str2):
693 """checks that emails are not translated""" 694 return helpers.funcmatch(str1, str2, decoration.getemails)
695
696 - def urls(self, str1, str2):
697 """checks that URLs are not translated""" 698 return helpers.funcmatch(str1, str2, decoration.geturls)
699
700 - def numbers(self, str1, str2):
701 """checks whether numbers of various forms are consistent between the two strings""" 702 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
703
704 - def startwhitespace(self, str1, str2):
705 """checks whether whitespace at the beginning of the strings matches""" 706 return helpers.funcmatch(str1, str2, decoration.spacestart)
707
708 - def endwhitespace(self, str1, str2):
709 """checks whether whitespace at the end of the strings matches""" 710 str1 = self.config.lang.punctranslate(str1) 711 return helpers.funcmatch(str1, str2, decoration.spaceend)
712
713 - def startpunc(self, str1, str2):
714 """checks whether punctuation at the beginning of the strings match""" 715 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 716 str1 = self.config.lang.punctranslate(str1) 717 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 718 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
719
720 - def endpunc(self, str1, str2):
721 """checks whether punctuation at the end of the strings match""" 722 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 723 str1 = self.config.lang.punctranslate(str1) 724 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 725 str1 = str1.rstrip() 726 str2 = str2.rstrip() 727 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
728
729 - def purepunc(self, str1, str2):
730 """checks that strings that are purely punctuation are not changed""" 731 # this test is a subset of startandend 732 if (decoration.ispurepunctuation(str1)): 733 return str1 == str2 734 else: 735 return not decoration.ispurepunctuation(str2)
736
737 - def brackets(self, str1, str2):
738 """checks that the number of brackets in both strings match""" 739 str1 = self.filtervariables(str1) 740 str2 = self.filtervariables(str2) 741 messages = [] 742 missing = [] 743 extra = [] 744 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"): 745 count1 = str1.count(bracket) 746 count2 = str2.count(bracket) 747 if count2 < count1: 748 missing.append(u"'%s'" % bracket) 749 elif count2 > count1: 750 extra.append(u"'%s'" % bracket) 751 if missing: 752 messages.append(u"translation is missing %s" % u", ".join(missing)) 753 if extra: 754 messages.append(u"translation has extra %s" % u", ".join(extra)) 755 if messages: 756 raise FilterFailure(messages) 757 return True
758
759 - def sentencecount(self, str1, str2):
760 """checks that the number of sentences in both strings match""" 761 str1 = self.filteraccelerators(str1) 762 str2 = self.filteraccelerators(str2) 763 sentences1 = len(self.config.sourcelang.sentences(str1)) 764 sentences2 = len(self.config.lang.sentences(str2)) 765 if not sentences1 == sentences2: 766 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2)) 767 return True
768
769 - def options(self, str1, str2):
770 """checks that options are not translated""" 771 str1 = self.filtervariables(str1) 772 for word1 in str1.split(): 773 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum(): 774 parts = word1.split(u"=") 775 if not parts[0] in str2: 776 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0]) 777 if len(parts) > 1 and parts[1] in str2: 778 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]}) 779 return True
780
781 - def startcaps(self, str1, str2):
782 """checks that the message starts with the correct capitalisation""" 783 str1 = self.filteraccelerators(str1) 784 str2 = self.filteraccelerators(str2) 785 if len(str1) > 1 and len(str2) > 1: 786 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 787 if len(str1) == 0 and len(str2) == 0: 788 return True 789 if len(str1) == 0 or len(str2) == 0: 790 return False 791 return True
792
793 - def simplecaps(self, str1, str2):
794 """checks the capitalisation of two strings isn't wildly different""" 795 str1 = self.removevariables(str1) 796 str2 = self.removevariables(str2) 797 # TODO: review this. The 'I' is specific to English, so it probably serves 798 # no purpose to get sourcelang.sentenceend 799 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1) 800 capitals1 = helpers.filtercount(str1, unicode.isupper) 801 capitals2 = helpers.filtercount(str2, unicode.isupper) 802 alpha1 = helpers.filtercount(str1, unicode.isalpha) 803 alpha2 = helpers.filtercount(str2, unicode.isalpha) 804 # Capture the all caps case 805 if capitals1 == alpha1: 806 return capitals2 == alpha2 807 # some heuristic tests to try and see that the style of capitals is vaguely the same 808 if capitals1 == 0 or capitals1 == 1: 809 return capitals2 == capitals1 810 elif capitals1 < len(str1) / 10: 811 return capitals2 <= len(str2) / 8 812 elif len(str1) < 10: 813 return abs(capitals1 - capitals2) < 3 814 elif capitals1 > len(str1) * 6 / 10: 815 return capitals2 > len(str2) * 6 / 10 816 else: 817 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
818
819 - def acronyms(self, str1, str2):
820 """checks that acronyms that appear are unchanged""" 821 acronyms = [] 822 allowed = [] 823 for startmatch, endmatch in self.config.varmatches: 824 allowed += decoration.getvariables(startmatch, endmatch)(str1) 825 allowed += self.config.musttranslatewords.keys() 826 str1 = self.filteraccelerators(self.filtervariables(str1)) 827 iter = self.config.lang.word_iter(str1) 828 str2 = self.filteraccelerators(self.filtervariables(str2)) 829 #TODO: strip XML? - should provide better error messsages 830 # see mail/chrome/messanger/smime.properties.po 831 #TODO: consider limiting the word length for recognising acronyms to 832 #something like 5/6 characters 833 for word in iter: 834 if word.isupper() and len(word) > 1 and word not in allowed: 835 if str2.find(word) == -1: 836 acronyms.append(word) 837 if acronyms: 838 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms)) 839 return True
840
841 - def doublewords(self, str1, str2):
842 """checks for repeated words in the translation""" 843 lastword = "" 844 without_newlines = "\n".join(str2.split("\n")) 845 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(u".", u"").lower().split() 846 for word in words: 847 if word == lastword and word not in self.config.lang.validdoublewords: 848 raise FilterFailure(u"The word '%s' is repeated" % word) 849 lastword = word 850 return True
851
852 - def notranslatewords(self, str1, str2):
853 """checks that words configured as untranslatable appear in the translation too""" 854 if not self.config.notranslatewords: 855 return True 856 str1 = self.filtervariables(str1) 857 str2 = self.filtervariables(str2) 858 #The above is full of strange quotes and things in utf-8 encoding. 859 #single apostrophe perhaps problematic in words like "doesn't" 860 for seperator in self.config.punctuation: 861 str1 = str1.replace(seperator, u" ") 862 str2 = str2.replace(seperator, u" ") 863 words1 = self.filteraccelerators(str1).split() 864 words2 = self.filteraccelerators(str2).split() 865 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 866 if stopwords: 867 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords))) 868 return True
869
870 - def musttranslatewords(self, str1, str2):
871 """checks that words configured as definitely translatable don't appear in 872 the translation""" 873 if not self.config.musttranslatewords: 874 return True 875 str1 = self.removevariables(str1) 876 str2 = self.removevariables(str2) 877 #The above is full of strange quotes and things in utf-8 encoding. 878 #single apostrophe perhaps problematic in words like "doesn't" 879 for seperator in self.config.punctuation: 880 str1 = str1.replace(seperator, u" ") 881 str2 = str2.replace(seperator, u" ") 882 words1 = self.filteraccelerators(str1).split() 883 words2 = self.filteraccelerators(str2).split() 884 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 885 if stopwords: 886 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords))) 887 return True
888
889 - def validchars(self, str1, str2):
890 """checks that only characters specified as valid appear in the translation""" 891 if not self.config.validcharsmap: 892 return True 893 invalid1 = str1.translate(self.config.validcharsmap) 894 invalid2 = str2.translate(self.config.validcharsmap) 895 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 896 if invalidchars: 897 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars))) 898 return True
899
900 - def filepaths(self, str1, str2):
901 """checks that file paths have not been translated""" 902 for word1 in self.filteraccelerators(str1).split(): 903 if word1.startswith(u"/"): 904 if not helpers.countsmatch(str1, str2, (word1,)): 905 return False 906 return True
907
908 - def xmltags(self, str1, str2):
909 """checks that XML/HTML tags have not been translated""" 910 tags1 = tag_re.findall(str1) 911 if len(tags1) > 0: 912 if (len(tags1[0]) == len(str1)) and not u"=" in tags1[0]: 913 return True 914 tags2 = tag_re.findall(str2) 915 properties1 = tagproperties(tags1, self.config.ignoretags) 916 properties2 = tagproperties(tags2, self.config.ignoretags) 917 filtered1 = [] 918 filtered2 = [] 919 for property1 in properties1: 920 filtered1 += [intuplelist(property1, self.config.canchangetags)] 921 for property2 in properties2: 922 filtered2 += [intuplelist(property2, self.config.canchangetags)] 923 924 #TODO: consider the consequences of different ordering of attributes/tags 925 if filtered1 != filtered2: 926 return False 927 else: 928 # No tags in str1, let's just check that none were added in str2. This 929 # might be useful for fuzzy strings wrongly unfuzzied, for example. 930 tags2 = tag_re.findall(str2) 931 if len(tags2) > 0: 932 return False 933 return True
934
935 - def kdecomments(self, str1, str2):
936 """checks to ensure that no KDE style comments appear in the translation""" 937 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
938
939 - def compendiumconflicts(self, str1, str2):
940 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 941 return str2.find(u"#-#-#-#-#") == -1
942
943 - def simpleplurals(self, str1, str2):
944 """checks for English style plural(s) for you to review""" 945 def numberofpatterns(string, patterns): 946 number = 0 947 for pattern in patterns: 948 number += len(re.findall(pattern, string)) 949 return number
950 951 sourcepatterns = ["\(s\)"] 952 targetpatterns = ["\(s\)"] 953 sourcecount = numberofpatterns(str1, sourcepatterns) 954 targetcount = numberofpatterns(str2, targetpatterns) 955 if self.config.lang.nplurals == 1: 956 return not targetcount 957 return sourcecount == targetcount
958
959 - def spellcheck(self, str1, str2):
960 """checks words that don't pass a spell check""" 961 if not self.config.targetlanguage: 962 return True 963 if not spelling.available: 964 return True 965 # TODO: filterxml? 966 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel) 967 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel) 968 ignore1 = [] 969 messages = [] 970 for word, index, suggestions in spelling.check(str1, lang="en"): 971 ignore1.append(word) 972 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 973 if word in self.config.notranslatewords: 974 continue 975 if word in ignore1: 976 continue 977 # hack to ignore hyphenisation rules 978 if word in suggestions: 979 continue 980 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5]))) 981 if messages: 982 raise FilterFailure(messages) 983 return True
984
985 - def credits(self, str1, str2):
986 """checks for messages containing translation credits instead of normal translations.""" 987 return not str1 in self.config.credit_sources
988 989 # If the precondition filter is run and fails then the other tests listed are ignored 990 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps", 991 "accelerators", "brackets", "endpunc", 992 "acronyms", "xmltags", "startpunc", 993 "endwhitespace", "startwhitespace", 994 "escapes", "doublequoting", "singlequoting", 995 "filepaths", "purepunc", "doublespacing", 996 "sentencecount", "numbers", "isfuzzy", 997 "isreview", "notranslatewords", "musttranslatewords", 998 "emails", "simpleplurals", "urls", "printf", 999 "tabs", "newlines", "functions", "options", 1000 "blank", "nplurals", "gconf"), 1001 "blank": ("simplecaps", "variables", "startcaps", 1002 "accelerators", "brackets", "endpunc", 1003 "acronyms", "xmltags", "startpunc", 1004 "endwhitespace", "startwhitespace", 1005 "escapes", "doublequoting", "singlequoting", 1006 "filepaths", "purepunc", "doublespacing", 1007 "sentencecount", "numbers", "isfuzzy", 1008 "isreview", "notranslatewords", "musttranslatewords", 1009 "emails", "simpleplurals", "urls", "printf", 1010 "tabs", "newlines", "functions", "options", 1011 "gconf"), 1012 "credits": ("simplecaps", "variables", "startcaps", 1013 "accelerators", "brackets", "endpunc", 1014 "acronyms", "xmltags", "startpunc", 1015 "escapes", "doublequoting", "singlequoting", 1016 "filepaths", "doublespacing", 1017 "sentencecount", "numbers", 1018 "emails", "simpleplurals", "urls", "printf", 1019 "tabs", "newlines", "functions", "options"), 1020 "purepunc": ("startcaps", "options"), 1021 # This is causing some problems since Python 2.6, as 1022 # startcaps is now seen as an important one to always execute 1023 # and could now be done before it is blocked by a failing 1024 # "untranslated" or "blank" test. This is probably happening 1025 # due to slightly different implementation of the internal 1026 # dict handling since Python 2.6. We should never have relied 1027 # on this ordering anyway. 1028 #"startcaps": ("simplecaps",), 1029 "endwhitespace": ("endpunc",), 1030 "startwhitespace":("startpunc",), 1031 "unchanged": ("doublewords",), 1032 "compendiumconflicts": ("accelerators", "brackets", "escapes", 1033 "numbers", "startpunc", "long", "variables", 1034 "startcaps", "sentencecount", "simplecaps", 1035 "doublespacing", "endpunc", "xmltags", 1036 "startwhitespace", "endwhitespace", 1037 "singlequoting", "doublequoting", 1038 "filepaths", "purepunc", "doublewords", "printf") } 1039 1040 # code to actually run the tests (use unittest?) 1041 1042 openofficeconfig = CheckerConfig( 1043 accelmarkers = ["~"], 1044 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 1045 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)], 1046 canchangetags = [("link", "name", None)] 1047 ) 1048
1049 -class OpenOfficeChecker(StandardChecker):
1050 - def __init__(self, **kwargs):
1051 checkerconfig = kwargs.get("checkerconfig", None) 1052 if checkerconfig is None: 1053 checkerconfig = CheckerConfig() 1054 kwargs["checkerconfig"] = checkerconfig 1055 checkerconfig.update(openofficeconfig) 1056 StandardChecker.__init__(self, **kwargs)
1057 1058 mozillaconfig = CheckerConfig( 1059 accelmarkers = ["&"], 1060 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")], 1061 criticaltests = ["accelerators"] 1062 ) 1063
1064 -class MozillaChecker(StandardChecker):
1065 - def __init__(self, **kwargs):
1066 checkerconfig = kwargs.get("checkerconfig", None) 1067 if checkerconfig is None: 1068 checkerconfig = CheckerConfig() 1069 kwargs["checkerconfig"] = checkerconfig 1070 checkerconfig.update(mozillaconfig) 1071 StandardChecker.__init__(self, **kwargs)
1072
1073 - def credits(self, str1, str2):
1074 """checks for messages containing translation credits instead of normal translations.""" 1075 for location in self.locations: 1076 if location in ['MOZ_LANGPACK_CONTRIBUTORS', 'credit.translation']: 1077 return False 1078 return True
1079 1080 drupalconfig = CheckerConfig( 1081 varmatches = [("%", None), ("@", None)], 1082 ) 1083
1084 -class DrupalChecker(StandardChecker):
1085 - def __init__(self, **kwargs):
1086 checkerconfig = kwargs.get("checkerconfig", None) 1087 if checkerconfig is None: 1088 checkerconfig = CheckerConfig() 1089 kwargs["checkerconfig"] = checkerconfig 1090 checkerconfig.update(drupalconfig) 1091 StandardChecker.__init__(self, **kwargs)
1092 1093 gnomeconfig = CheckerConfig( 1094 accelmarkers = ["_"], 1095 varmatches = [("%", 1), ("$(", ")")], 1096 credit_sources = [u"translator-credits"] 1097 ) 1098
1099 -class GnomeChecker(StandardChecker):
1100 - def __init__(self, **kwargs):
1101 checkerconfig = kwargs.get("checkerconfig", None) 1102 if checkerconfig is None: 1103 checkerconfig = CheckerConfig() 1104 kwargs["checkerconfig"] = checkerconfig 1105 checkerconfig.update(gnomeconfig) 1106 StandardChecker.__init__(self, **kwargs)
1107
1108 - def gconf(self, str1, str2):
1109 """Checks if we have any gconf config settings translated.""" 1110 for location in self.locations: 1111 if location.find('schemas.in') != -1: 1112 gconf_attributes = gconf_attribute_re.findall(str1) 1113 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 1114 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2] 1115 if stopwords: 1116 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords))) 1117 return True
1118 1119 kdeconfig = CheckerConfig( 1120 accelmarkers = ["&"], 1121 varmatches = [("%", 1)], 1122 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"] 1123 ) 1124
1125 -class KdeChecker(StandardChecker):
1126 - def __init__(self, **kwargs):
1127 # TODO allow setup of KDE plural and translator comments so that they do 1128 # not create false postives 1129 checkerconfig = kwargs.get("checkerconfig", None) 1130 if checkerconfig is None: 1131 checkerconfig = CheckerConfig() 1132 kwargs["checkerconfig"] = checkerconfig 1133 checkerconfig.update(kdeconfig) 1134 StandardChecker.__init__(self, **kwargs)
1135 1136 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1137 -class CCLicenseChecker(StandardChecker):
1138 - def __init__(self, **kwargs):
1139 checkerconfig = kwargs.get("checkerconfig", None) 1140 if checkerconfig is None: 1141 checkerconfig = CheckerConfig() 1142 kwargs["checkerconfig"] = checkerconfig 1143 checkerconfig.update(cclicenseconfig) 1144 StandardChecker.__init__(self, **kwargs)
1145 1146 projectcheckers = { 1147 "openoffice": OpenOfficeChecker, 1148 "mozilla": MozillaChecker, 1149 "kde": KdeChecker, 1150 "wx": KdeChecker, 1151 "gnome": GnomeChecker, 1152 "creativecommons": CCLicenseChecker, 1153 "drupal": DrupalChecker, 1154 } 1155 1156
1157 -class StandardUnitChecker(UnitChecker):
1158 """The standard checks for common checks on translation units."""
1159 - def isfuzzy(self, unit):
1160 """Check if the unit has been marked fuzzy.""" 1161 return not unit.isfuzzy()
1162
1163 - def isreview(self, unit):
1164 """Check if the unit has been marked review.""" 1165 return not unit.isreview()
1166
1167 - def nplurals(self, unit):
1168 """Checks for the correct number of noun forms for plural translations.""" 1169 if unit.hasplural(): 1170 # if we don't have a valid nplurals value, don't run the test 1171 nplurals = self.config.lang.nplurals 1172 if nplurals > 0: 1173 return len(unit.target.strings) == nplurals 1174 return True
1175
1176 - def hassuggestion(self, unit):
1177 """Checks if there is at least one suggested translation for this unit.""" 1178 self.suggestion_store = getattr(self, 'suggestion_store', None) 1179 suggestions = [] 1180 if self.suggestion_store: 1181 source = unit.source 1182 suggestions = [unit for unit in self.suggestion_store.units if unit.source == source] 1183 elif xliff and isinstance(unit, xliff.xliffunit): 1184 # TODO: we probably want to filter them somehow 1185 suggestions = unit.getalttrans() 1186 return not bool(suggestions)
1187 1188
1189 -def runtests(str1, str2, ignorelist=()):
1190 """verifies that the tests pass for a pair of strings""" 1191 from translate.storage import base 1192 str1 = data.normalized_unicode(str1) 1193 str2 = data.normalized_unicode(str2) 1194 unit = base.TranslationUnit(str1) 1195 unit.target = str2 1196 checker = StandardChecker(excludefilters=ignorelist) 1197 failures = checker.run_filters(unit) 1198 for test in failures: 1199 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2) 1200 return failures
1201
1202 -def batchruntests(pairs):
1203 """runs test on a batch of string pairs""" 1204 passed, numpairs = 0, len(pairs) 1205 for str1, str2 in pairs: 1206 if runtests(str1, str2): 1207 passed += 1 1208 print 1209 print "total: %d/%d pairs passed" % (passed, numpairs)
1210 1211 if __name__ == '__main__': 1212 testset = [(r"simple", r"somple"), 1213 (r"\this equals \that", r"does \this equal \that?"), 1214 (r"this \'equals\' that", r"this 'equals' that"), 1215 (r" start and end! they must match.", r"start and end! they must match."), 1216 (r"check for matching %variables marked like %this", r"%this %variable is marked"), 1217 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"), 1218 (r"check for mismatching %variables% too", r"how many %variable% are marked"), 1219 (r"%% %%", r"%%"), 1220 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1221 (r"simple lowercase", r"it is all lowercase"), 1222 (r"simple lowercase", r"It Is All Lowercase"), 1223 (r"Simple First Letter Capitals", r"First Letters"), 1224 (r"SIMPLE CAPITALS", r"First Letters"), 1225 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1226 (r"forgot to translate", r" ") 1227 ] 1228 batchruntests(testset) 1229