Package translate :: Package tools :: Module posegment
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.posegment

 1  #!/usr/bin/env python 
 2  # -*- coding: utf-8 -*- 
 3  # 
 4  # Copyright 2008-2009 Zuza Software Foundation 
 5  # 
 6  # This file is part of the Translate Toolkit. 
 7  # 
 8  # This program is free software; you can redistribute it and/or modify 
 9  # it under the terms of the GNU General Public License as published by 
10  # the Free Software Foundation; either version 2 of the License, or 
11  # (at your option) any later version. 
12  # 
13  # This program is distributed in the hope that it will be useful, 
14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
16  # GNU General Public License for more details. 
17  # 
18  # You should have received a copy of the GNU General Public License 
19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
20   
21  """Segment Gettext PO, XLIFF and TMX localization files at the sentence level. 
22   
23  See: http://translate.sourceforge.net/wiki/toolkit/posegment for examples and 
24  usage instructions 
25  """ 
26   
27  from translate.storage import factory 
28  from translate.lang import factory as lang_factory 
29   
30 -class segment:
31
32 - def __init__(self, sourcelang, targetlang, stripspaces=True):
33 self.sourcelang = sourcelang 34 self.targetlang = targetlang 35 self.stripspaces = stripspaces
36
37 - def segmentunit(self, unit):
38 if unit.isheader() or unit.hasplural(): 39 return [unit] 40 sourcesegments = self.sourcelang.sentences(unit.source, strip=self.stripspaces) 41 targetsegments = self.targetlang.sentences(unit.target, strip=self.stripspaces) 42 if unit.istranslated() and (len(sourcesegments) != len(targetsegments)): 43 return [unit] 44 # We could do more here to check if the lengths correspond more or less, 45 # certain quality checks are passed, etc. But for now this is a good 46 # start. 47 units = [] 48 for i in range(len(sourcesegments)): 49 newunit = unit.copy() 50 newunit.source = sourcesegments[i] 51 if not unit.istranslated(): 52 newunit.target = "" 53 else: 54 newunit.target = targetsegments[i] 55 units.append(newunit) 56 return units
57
58 - def convertstore(self, fromstore):
59 tostore = type(fromstore)() 60 for unit in fromstore.units: 61 newunits = self.segmentunit(unit) 62 for newunit in newunits: 63 tostore.addunit(newunit) 64 return tostore
65
66 -def segmentfile(inputfile, outputfile, templatefile, sourcelanguage="en", targetlanguage=None, stripspaces=True):
67 """reads in inputfile, segments it then, writes to outputfile""" 68 # note that templatefile is not used, but it is required by the converter... 69 inputstore = factory.getobject(inputfile) 70 if inputstore.isempty(): 71 return 0 72 sourcelang = lang_factory.getlanguage(sourcelanguage) 73 targetlang = lang_factory.getlanguage(targetlanguage) 74 convertor = segment(sourcelang, targetlang, stripspaces=stripspaces) 75 outputstore = convertor.convertstore(inputstore) 76 outputfile.write(str(outputstore)) 77 return 1
78
79 -def main():
80 from translate.convert import convert 81 formats = {"po":("po", segmentfile), "xlf":("xlf", segmentfile), "tmx": ("tmx", segmentfile)} 82 parser = convert.ConvertOptionParser(formats, usepots=True, description=__doc__) 83 parser.add_option("-l", "--language", dest="targetlanguage", default=None, 84 help="the target language code", metavar="LANG") 85 parser.add_option("", "--source-language", dest="sourcelanguage", default=None, 86 help="the source language code (default 'en')", metavar="LANG") 87 parser.passthrough.append("sourcelanguage") 88 parser.passthrough.append("targetlanguage") 89 parser.add_option("", "--keepspaces", dest="stripspaces", action="store_false", 90 default=True, help="Disable automatic stripping of whitespace") 91 parser.passthrough.append("stripspaces") 92 parser.run()
93 94 95 if __name__ == '__main__': 96 main() 97