Package translate :: Package tools :: Module pretranslate
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pretranslate

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2008 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Fill localization files with suggested translations based on 
 22  translation memory and existing translations. 
 23  """ 
 24   
 25  from translate.storage import factory 
 26  from translate.storage import xliff 
 27  from translate.search import match 
 28   
 29  # We don't want to reinitialise the TM each time, so let's store it here. 
 30  tmmatcher = None 
 31   
32 -def memory(tmfiles, max_candidates=1, min_similarity=75, max_length=1000):
33 """Returns the TM store to use. Only initialises on first call.""" 34 global tmmatcher 35 # Only initialise first time 36 if tmmatcher is None: 37 if isinstance(tmfiles, list): 38 tmstore = [factory.getobject(tmfile) for tmfile in tmfiles] 39 else: 40 tmstore = factory.getobject(tmfiles) 41 tmmatcher = match.matcher(tmstore, max_candidates=max_candidates, min_similarity=min_similarity, max_length=max_length) 42 return tmmatcher
43 44
45 -def pretranslate_file(input_file, output_file, template_file, tm=None, min_similarity=75, fuzzymatching=True):
46 """Pretranslate any factory supported file with old translations and translation memory.""" 47 input_store = factory.getobject(input_file) 48 template_store = None 49 if template_file is not None: 50 template_store = factory.getobject(template_file) 51 52 output = pretranslate_store(input_store, template_store, tm, min_similarity, fuzzymatching) 53 output_file.write(str(output)) 54 return 1
55 56
57 -def match_template_id(input_unit, template_store):
58 """Returns a matching unit from a template.""" 59 # we want to use slightly different matching strategies for PO files 60 # generated by our own moz2po and oo2po. Let's take a cheap shot at 61 # detecting them from the presence of a ':' in the first location. 62 locations = input_unit.getlocations() 63 if not locations or ":" in locations[0]: 64 # do normal gettext-like matching 65 matching_units = template_store.findunits(input_unit.source) 66 if matching_units: 67 for unit in matching_units: 68 if unit.getcontext() == input_unit.getcontext(): 69 return unit 70 71 else: 72 #since oo2po and moz2po use location as unique identifiers for strings 73 #we match against location first, then check for matching source strings 74 #this makes no sense for normal gettext files 75 for location in locations: 76 matching_unit = template_store.locationindex.get(location, None) 77 #do we really want to discard units with matching locations but no matching source? 78 if matching_unit is not None and matching_unit.source == input_unit.source and matching_unit.gettargetlen() > 0: 79 return matching_unit
80 81
82 -def match_fuzzy(input_unit, matchers):
83 """Return a fuzzy match from a queue of matchers.""" 84 for matcher in matchers: 85 fuzzycandidates = matcher.matches(input_unit.source) 86 if fuzzycandidates: 87 return fuzzycandidates[0]
88 89
90 -def pretranslate_unit(input_unit, template_store, matchers=None, mark_reused=False) :
91 """Pretranslate a unit or return unchanged if no translation was found.""" 92 93 matching_unit = None 94 #do template matching 95 if template_store: 96 matching_unit = match_template_id(input_unit, template_store) 97 98 if matching_unit and matching_unit.gettargetlen() > 0: 99 input_unit.merge(matching_unit, authoritative=True) 100 elif matchers: 101 #do fuzzy matching 102 matching_unit = match_fuzzy(input_unit, matchers) 103 if matching_unit and matching_unit.gettargetlen() > 0: 104 #FIXME: should we dispatch here instead of this crude type check 105 if isinstance(input_unit, xliff.xliffunit): 106 #FIXME: what about origin, lang and matchquality 107 input_unit.addalttrans(matching_unit.target, origin="fish", sourcetxt=matching_unit.source) 108 else: 109 input_unit.merge(matching_unit, authoritative=True) 110 111 #FIXME: ugly hack required by pot2po to mark old 112 #translations reused for new file. loops over 113 if mark_reused and matching_unit and template_store: 114 original_unit = template_store.findunit(matching_unit.source) 115 if original_unit is not None: 116 original_unit.reused = True 117 118 return input_unit
119
120 -def prepare_template_pofile(template_store):
121 """PO format specific template preparation logic.""" 122 #do we want to consider obsolete translations? 123 for unit in template_store.units: 124 if unit.isobsolete(): 125 unit.resurrect()
126
127 -def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True):
128 """Do the actual pretranslation of a whole store.""" 129 #preperation 130 matchers = [] 131 #prepare template 132 if template_store is not None: 133 template_store.makeindex() 134 #template preparation based on type 135 prepare_template = "prepare_template_%s" % template_store.__class__.__name__ 136 if globals().has_key(prepare_template): 137 globals()[prepare_template](template_store) 138 139 if fuzzymatching: 140 #create template matcher 141 #FIXME: max_length hardcoded 142 matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) 143 matcher.addpercentage = False 144 matchers.append(matcher) 145 146 #prepare tm 147 #create tm matcher 148 if tm and fuzzymatching: 149 #FIXME: max_length hardcoded 150 matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) 151 matcher.addpercentage = False 152 matchers.append(matcher) 153 154 #main loop 155 for input_unit in input_store.units: 156 if input_unit.istranslatable(): 157 input_unit = pretranslate_unit(input_unit, template_store, matchers) 158 159 return input_store
160 161
162 -def main(argv=None):
163 from translate.convert import convert 164 formats = {"pot": ("po", pretranslate_file), ("pot", "po"): ("po", pretranslate_file), 165 "po": ("po", pretranslate_file), ("po", "po"): ("po", pretranslate_file), 166 "xlf": ("xlf", pretranslate_file), ("xlf", "xlf"): ("xlf", pretranslate_file), 167 } 168 parser = convert.ConvertOptionParser(formats, usetemplates=True, 169 allowmissingtemplate=True, description=__doc__) 170 parser.add_option("", "--tm", dest="tm", default=None, 171 help="The file to use as translation memory when fuzzy matching") 172 parser.passthrough.append("tm") 173 defaultsimilarity = 75 174 parser.add_option("-s", "--similarity", dest="min_similarity", default=defaultsimilarity, 175 type="float", help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity) 176 parser.passthrough.append("min_similarity") 177 parser.add_option("--nofuzzymatching", dest="fuzzymatching", action="store_false", 178 default=True, help="Disable fuzzy matching") 179 parser.passthrough.append("fuzzymatching") 180 parser.run(argv)
181 182 183 if __name__ == '__main__': 184 main() 185