Package translate :: Package convert :: Module xliff2oo
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.xliff2oo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2004-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  """convert XLIFF localization files to an OpenOffice.org (SDF) localization file""" 
 24   
 25  import sys 
 26  import os 
 27  from translate.storage import oo 
 28  from translate.storage import factory 
 29  from translate.filters import pofilter 
 30  from translate.filters import checks 
 31  from translate.filters import autocorrect  
 32  import time 
 33   
34 -class reoo:
35 - def __init__(self, templatefile, languages=None, timestamp=None, includefuzzy=False, long_keys=False, filteraction="exclude"):
36 """construct a reoo converter for the specified languages (timestamp=0 means leave unchanged)""" 37 # languages is a pair of language ids 38 self.long_keys = long_keys 39 self.readoo(templatefile) 40 self.languages = languages 41 self.filteraction = filteraction 42 if timestamp is None: 43 self.timestamp = time.strptime("2002-02-02 02:02:02", "%Y-%m-%d %H:%M:%S") 44 else: 45 self.timestamp = timestamp 46 if self.timestamp: 47 self.timestamp_str = time.strftime("%Y-%m-%d %H:%M:%S", self.timestamp) 48 else: 49 self.timestamp_str = None 50 self.includefuzzy = includefuzzy
51
52 - def makeindex(self):
53 """makes an index of the oo keys that are used in the source file""" 54 self.index = {} 55 for ookey, theoo in self.o.ookeys.iteritems(): 56 sourcekey = oo.makekey(ookey, self.long_keys) 57 self.index[sourcekey] = theoo
58
59 - def readoo(self, of):
60 """read in the oo from the file""" 61 oosrc = of.read() 62 self.o = oo.oofile() 63 self.o.parse(oosrc) 64 self.makeindex()
65
66 - def handleunit(self, unit):
67 # TODO: make this work for multiple columns in oo... 68 locations = unit.getlocations() 69 # technically our formats should just have one location for each entry... 70 # but we handle multiple ones just to be safe... 71 for location in locations: 72 subkeypos = location.rfind('.') 73 subkey = location[subkeypos+1:] 74 key = location[:subkeypos] 75 # this is just to handle our old system of using %s/%s:%s instead of %s/%s#%s 76 key = key.replace(':', '#') 77 # this is to handle using / instead of \ in the sourcefile... 78 key = key.replace('\\', '/') 79 key = oo.normalizefilename(key) 80 if self.index.has_key(key): 81 # now we need to replace the definition of entity with msgstr 82 theoo = self.index[key] # find the oo 83 self.applytranslation(key, subkey, theoo, unit) 84 else: 85 print >> sys.stderr, "couldn't find key %s from po in %d keys" % (key, len(self.index)) 86 try: 87 sourceunitlines = str(unit) 88 if isinstance(sourceunitlines, unicode): 89 sourceunitlines = sourceunitlines.encode("utf-8") 90 print >> sys.stderr, sourceunitlines 91 except: 92 print >> sys.stderr, "error outputting source unit %r" % (str(unit),)
93
94 - def applytranslation(self, key, subkey, theoo, unit):
95 """applies the translation from the source unit to the oo unit""" 96 if not self.includefuzzy and unit.isfuzzy(): 97 return 98 makecopy = False 99 if self.languages is None: 100 part1 = theoo.lines[0] 101 if len(theoo.lines) > 1: 102 part2 = theoo.lines[1] 103 else: 104 makecopy = True 105 else: 106 part1 = theoo.languages[self.languages[0]] 107 if self.languages[1] in theoo.languages: 108 part2 = theoo.languages[self.languages[1]] 109 else: 110 makecopy = True 111 if makecopy: 112 part2 = oo.ooline(part1.getparts()) 113 unquotedid = unit.source 114 unquotedstr = unit.target 115 # If there is no translation, we don't want to add a line 116 if len(unquotedstr.strip()) == 0: 117 return 118 if isinstance(unquotedstr, unicode): 119 unquotedstr = unquotedstr.encode("UTF-8") 120 # finally set the new definition in the oo, but not if its empty 121 if len(unquotedstr) > 0: 122 subkey = subkey.strip() 123 setattr(part2, subkey, unquotedstr) 124 # set the modified time 125 if self.timestamp_str: 126 part2.timestamp = self.timestamp_str 127 if self.languages: 128 part2.languageid = self.languages[1] 129 if makecopy: 130 theoo.addline(part2)
131
132 - def convertstore(self, sourcestore):
133 self.p = sourcestore 134 # translate the strings 135 for unit in self.p.units: 136 # there may be more than one element due to msguniq merge 137 if filter.validelement(unit, self.p.filename, self.filteraction): 138 self.handleunit(unit) 139 # return the modified oo file object 140 return self.o
141
142 -def getmtime(filename):
143 import stat 144 return time.localtime(os.stat(filename)[stat.ST_MTIME])
145
146 -class oocheckfilter(pofilter.pocheckfilter):
147 - def validelement(self, unit, filename, filteraction):
148 """Returns whether or not to use unit in conversion. (filename is just for error reporting)""" 149 if filteraction == "none": return True 150 filterresult = self.filterunit(unit) 151 if filterresult: 152 if filterresult != autocorrect: 153 for filtername, filtermessage in filterresult.iteritems(): 154 location = unit.getlocations()[0] 155 if filtername in self.options.error: 156 print >> sys.stderr, "Error at %s::%s: %s" % (filename, location, filtermessage) 157 return not filteraction in ["exclude-all", "exclude-serious"] 158 if filtername in self.options.warning or self.options.alwayswarn: 159 print >> sys.stderr, "Warning at %s::%s: %s" % (filename, location, filtermessage) 160 return not filteraction in ["exclude-all"] 161 return True
162
163 -class oofilteroptions:
164 error = ['variables', 'xmltags', 'escapes'] 165 warning = ['blank'] 166 #To only issue warnings for tests listed in warning, change the following to False: 167 alwayswarn = True 168 limitfilters = error + warning 169 #To use all available tests, uncomment the following: 170 #limitfilters = [] 171 #To exclude certain tests, list them in here: 172 excludefilters = {} 173 includefuzzy = False 174 includereview = False 175 includeheader = False 176 autocorrect = False
177 178 options = oofilteroptions() 179 filter = oocheckfilter(options, [checks.OpenOfficeChecker, checks.StandardUnitChecker], checks.openofficeconfig) 180
181 -def convertoo(inputfile, outputfile, templatefile, sourcelanguage=None, targetlanguage=None, timestamp=None, includefuzzy=False, multifilestyle="single", filteraction=None):
182 inputstore = factory.getobject(inputfile) 183 inputstore.filename = getattr(inputfile, 'name', '') 184 if not targetlanguage: 185 raise ValueError("You must specify the target language") 186 if not sourcelanguage: 187 if targetlanguage.isdigit(): 188 sourcelanguage = "01" 189 else: 190 sourcelanguage = "en-US" 191 languages = (sourcelanguage, targetlanguage) 192 if templatefile is None: 193 raise ValueError("must have template file for oo files") 194 else: 195 convertor = reoo(templatefile, languages=languages, timestamp=timestamp, includefuzzy=includefuzzy, long_keys=multifilestyle != "single", filteraction=filteraction) 196 outputstore = convertor.convertstore(inputstore) 197 # TODO: check if we need to manually delete missing items 198 outputfile.write(str(outputstore)) 199 return True
200
201 -def main(argv=None):
202 from translate.convert import convert 203 formats = {("po", "oo"):("oo", convertoo), ("xlf", "oo"):("oo", convertoo), ("xlf", "sdf"):("sdf", convertoo)} 204 # always treat the input as an archive unless it is a directory 205 archiveformats = {(None, "output"): oo.oomultifile, (None, "template"): oo.oomultifile} 206 parser = convert.ArchiveConvertOptionParser(formats, usetemplates=True, description=__doc__, archiveformats=archiveformats) 207 parser.add_option("-l", "--language", dest="targetlanguage", default=None, 208 help="set target language code (e.g. af-ZA) [required]", metavar="LANG") 209 parser.add_option("", "--source-language", dest="sourcelanguage", default=None, 210 help="set source language code (default en-US)", metavar="LANG") 211 parser.add_option("-T", "--keeptimestamp", dest="timestamp", default=None, action="store_const", const=0, 212 help="don't change the timestamps of the strings") 213 parser.add_option("", "--nonrecursiveoutput", dest="allowrecursiveoutput", default=True, action="store_false", help="don't treat the output oo as a recursive store") 214 parser.add_option("", "--nonrecursivetemplate", dest="allowrecursivetemplate", default=True, action="store_false", help="don't treat the template oo as a recursive store") 215 parser.add_option("", "--filteraction", dest="filteraction", default="none", metavar="ACTION", 216 help="action on pofilter failure: none (default), warn, exclude-serious, exclude-all") 217 parser.add_fuzzy_option() 218 parser.add_multifile_option() 219 parser.passthrough.append("sourcelanguage") 220 parser.passthrough.append("targetlanguage") 221 parser.passthrough.append("timestamp") 222 parser.passthrough.append("filteraction") 223 parser.run(argv)
224 225 if __name__ == '__main__': 226 main() 227