Package translate :: Package storage :: Module xliff
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xliff

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2005-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Module for handling XLIFF files for translation. 
 22   
 23  The official recommendation is to use the extention .xlf for XLIFF files. 
 24  """ 
 25   
 26  from lxml import etree 
 27   
 28  from translate.misc.multistring import multistring 
 29  from translate.misc.xml_helpers import * 
 30  from translate.storage import base, lisa 
 31  from translate.storage.lisa import getXMLspace 
 32  from translate.storage.placeables.lisa import xml_to_strelem, strelem_to_xml 
 33   
 34  # TODO: handle translation types 
 35   
36 -class xliffunit(lisa.LISAunit):
37 """A single term in the xliff file.""" 38 39 rootNode = "trans-unit" 40 languageNode = "source" 41 textNode = "" 42 namespace = 'urn:oasis:names:tc:xliff:document:1.1' 43 44 _default_xml_space = "default" 45 46 #TODO: id and all the trans-unit level stuff 47
48 - def __init__(self, source, empty=False, **kwargs):
49 """Override the constructor to set xml:space="preserve".""" 50 if empty: 51 return 52 super(xliffunit, self).__init__(source, empty, **kwargs) 53 lisa.setXMLspace(self.xmlelement, "preserve")
54
55 - def createlanguageNode(self, lang, text, purpose):
56 """Returns an xml Element setup with given parameters.""" 57 58 #TODO: for now we do source, but we have to test if it is target, perhaps 59 # with parameter. Alternatively, we can use lang, if supplied, since an xliff 60 #file has to conform to the bilingual nature promised by the header. 61 assert purpose 62 langset = etree.Element(self.namespaced(purpose)) 63 #TODO: check language 64 # lisa.setXMLlang(langset, lang) 65 66 # self.createPHnodes(langset, text) 67 langset.text = text 68 return langset
69
70 - def getlanguageNodes(self):
71 """We override this to get source and target nodes.""" 72 source = None 73 target = None 74 nodes = [] 75 try: 76 source = self.xmlelement.iterchildren(self.namespaced(self.languageNode)).next() 77 target = self.xmlelement.iterchildren(self.namespaced('target')).next() 78 nodes = [source, target] 79 except StopIteration: 80 if source is not None: 81 nodes.append(source) 82 if not target is None: 83 nodes.append(target) 84 return nodes
85
86 - def set_rich_source(self, value, sourcelang='en'):
87 sourcelanguageNode = self.get_source_dom() 88 if sourcelanguageNode is None: 89 sourcelanguageNode = self.createlanguageNode(sourcelang, u'', "source") 90 self.set_source_dom(sourcelanguageNode) 91 92 # Clear sourcelanguageNode first 93 for i in range(len(sourcelanguageNode)): 94 del sourcelanguageNode[0] 95 sourcelanguageNode.text = None 96 97 strelem_to_xml(sourcelanguageNode, value[0])
98
99 - def get_rich_source(self):
100 #rsrc = xml_to_strelem(self.source_dom) 101 #logging.debug('rich source: %s' % (repr(rsrc))) 102 #from dubulib.debug.misc import print_stack_funcs 103 #print_stack_funcs() 104 return [xml_to_strelem(self.source_dom, getXMLspace(self.xmlelement, self._default_xml_space))]
105 rich_source = property(get_rich_source, set_rich_source) 106
107 - def set_rich_target(self, value, lang='xx', append=False):
108 if value is None: 109 self.set_target_dom(self.createlanguageNode(lang, u'', "target")) 110 return 111 112 languageNode = self.get_target_dom() 113 if languageNode is None: 114 languageNode = self.createlanguageNode(lang, u'', "target") 115 self.set_target_dom(languageNode, append) 116 117 # Clear languageNode first 118 for i in range(len(languageNode)): 119 del languageNode[0] 120 languageNode.text = None 121 122 strelem_to_xml(languageNode, value[0])
123
124 - def get_rich_target(self, lang=None):
125 """retrieves the "target" text (second entry), or the entry in the 126 specified language, if it exists""" 127 return [xml_to_strelem(self.get_target_dom(lang), getXMLspace(self.xmlelement, self._default_xml_space))]
128 rich_target = property(get_rich_target, set_rich_target) 129
130 - def addalttrans(self, txt, origin=None, lang=None, sourcetxt=None, matchquality=None):
131 """Adds an alt-trans tag and alt-trans components to the unit. 132 133 @type txt: String 134 @param txt: Alternative translation of the source text. 135 """ 136 137 #TODO: support adding a source tag ad match quality attribute. At 138 # the source tag is needed to inject fuzzy matches from a TM. 139 if isinstance(txt, str): 140 txt = txt.decode("utf-8") 141 alttrans = etree.SubElement(self.xmlelement, self.namespaced("alt-trans")) 142 lisa.setXMLspace(alttrans, "preserve") 143 if sourcetxt: 144 if isinstance(sourcetxt, str): 145 sourcetxt = sourcetxt.decode("utf-8") 146 altsource = etree.SubElement(alttrans, self.namespaced("source")) 147 altsource.text = sourcetxt 148 alttarget = etree.SubElement(alttrans, self.namespaced("target")) 149 alttarget.text = txt 150 if matchquality: 151 alttrans.set("match-quality", matchquality) 152 if origin: 153 alttrans.set("origin", origin) 154 if lang: 155 lisa.setXMLlang(alttrans, lang)
156
157 - def getalttrans(self, origin=None):
158 """Returns <alt-trans> for the given origin as a list of units. No 159 origin means all alternatives.""" 160 translist = [] 161 for node in self.xmlelement.iterdescendants(self.namespaced("alt-trans")): 162 if self.correctorigin(node, origin): 163 # We build some mini units that keep the xmlelement. This 164 # makes it easier to delete it if it is passed back to us. 165 newunit = base.TranslationUnit(self.source) 166 167 # the source tag is optional 168 sourcenode = node.iterdescendants(self.namespaced("source")) 169 try: 170 newunit.source = lisa.getText(sourcenode.next(), getXMLspace(node, self._default_xml_space)) 171 except StopIteration: 172 pass 173 174 # must have one or more targets 175 targetnode = node.iterdescendants(self.namespaced("target")) 176 newunit.target = lisa.getText(targetnode.next(), getXMLspace(node, self._default_xml_space)) 177 #TODO: support multiple targets better 178 #TODO: support notes in alt-trans 179 newunit.xmlelement = node 180 181 translist.append(newunit) 182 return translist
183
184 - def delalttrans(self, alternative):
185 """Removes the supplied alternative from the list of alt-trans tags""" 186 self.xmlelement.remove(alternative.xmlelement)
187
188 - def addnote(self, text, origin=None):
189 """Add a note specifically in a "note" tag""" 190 if isinstance(text, str): 191 text = text.decode("utf-8") 192 note = etree.SubElement(self.xmlelement, self.namespaced("note")) 193 note.text = text.strip() 194 if origin: 195 note.set("from", origin)
196
197 - def getnotelist(self, origin=None):
198 """Private method that returns the text from notes matching 'origin' or all notes.""" 199 notenodes = self.xmlelement.iterdescendants(self.namespaced("note")) 200 # TODO: consider using xpath to construct initial_list directly 201 # or to simply get the correct text from the outset (just remember to 202 # check for duplication. 203 initial_list = [lisa.getText(note, getXMLspace(self.xmlelement, self._default_xml_space)) for note in notenodes if self.correctorigin(note, origin)] 204 205 # Remove duplicate entries from list: 206 dictset = {} 207 notelist = [dictset.setdefault(note, note) for note in initial_list if note not in dictset] 208 209 return notelist
210
211 - def getnotes(self, origin=None):
212 return '\n'.join(self.getnotelist(origin=origin))
213
214 - def removenotes(self, origin="translator"):
215 """Remove all the translator notes.""" 216 notes = self.xmlelement.iterdescendants(self.namespaced("note")) 217 for note in notes: 218 if self.correctorigin(note, origin=origin): 219 self.xmlelement.remove(note)
220
221 - def adderror(self, errorname, errortext):
222 """Adds an error message to this unit.""" 223 #TODO: consider factoring out: some duplication between XLIFF and TMX 224 text = errorname + ': ' + errortext 225 self.addnote(text, origin="pofilter")
226
227 - def geterrors(self):
228 """Get all error messages.""" 229 #TODO: consider factoring out: some duplication between XLIFF and TMX 230 notelist = self.getnotelist(origin="pofilter") 231 errordict = {} 232 for note in notelist: 233 errorname, errortext = note.split(': ') 234 errordict[errorname] = errortext 235 return errordict
236
237 - def isapproved(self):
238 """States whether this unit is approved.""" 239 return self.xmlelement.get("approved") == "yes"
240
241 - def markapproved(self, value=True):
242 """Mark this unit as approved.""" 243 if value: 244 self.xmlelement.set("approved", "yes") 245 elif self.isapproved(): 246 self.xmlelement.set("approved", "no")
247
248 - def isreview(self):
249 """States whether this unit needs to be reviewed""" 250 targetnode = self.getlanguageNode(lang=None, index=1) 251 return not targetnode is None and \ 252 "needs-review" in targetnode.get("state", "")
253
254 - def markreviewneeded(self, needsreview=True, explanation=None):
255 """Marks the unit to indicate whether it needs review. Adds an optional explanation as a note.""" 256 targetnode = self.getlanguageNode(lang=None, index=1) 257 if not targetnode is None: 258 if needsreview: 259 targetnode.set("state", "needs-review-translation") 260 if explanation: 261 self.addnote(explanation, origin="translator") 262 else: 263 del targetnode.attrib["state"]
264
265 - def isfuzzy(self):
266 # targetnode = self.getlanguageNode(lang=None, index=1) 267 # return not targetnode is None and \ 268 # (targetnode.get("state-qualifier") == "fuzzy-match" or \ 269 # targetnode.get("state") == "needs-review-translation") 270 return not self.isapproved()
271
272 - def markfuzzy(self, value=True):
273 if value: 274 self.markapproved(False) 275 else: 276 self.markapproved(True) 277 targetnode = self.getlanguageNode(lang=None, index=1) 278 if not targetnode is None: 279 if value: 280 targetnode.set("state", "needs-review-translation") 281 else: 282 for attribute in ["state", "state-qualifier"]: 283 if attribute in targetnode.attrib: 284 del targetnode.attrib[attribute]
285
286 - def settarget(self, text, lang='xx', append=False):
287 """Sets the target string to the given value.""" 288 super(xliffunit, self).settarget(text, lang, append) 289 if text: 290 self.marktranslated()
291 292 # This code is commented while this will almost always return false. 293 # This way pocount, etc. works well. 294 # def istranslated(self): 295 # targetnode = self.getlanguageNode(lang=None, index=1) 296 # return not targetnode is None and \ 297 # (targetnode.get("state") == "translated") 298
299 - def istranslatable(self):
300 value = self.xmlelement.get("translate") 301 if value and value.lower() == 'no': 302 return False 303 return True
304
305 - def marktranslated(self):
306 targetnode = self.getlanguageNode(lang=None, index=1) 307 if targetnode is None: 308 return 309 if self.isfuzzy() and "state-qualifier" in targetnode.attrib: 310 #TODO: consider 311 del targetnode.attrib["state-qualifier"] 312 targetnode.set("state", "translated")
313
314 - def setid(self, id):
315 self.xmlelement.set("id", id)
316
317 - def getid(self):
318 return self.xmlelement.get("id") or ""
319
320 - def addlocation(self, location):
321 self.setid(location)
322
323 - def getlocations(self):
324 return [self.getid()]
325
326 - def createcontextgroup(self, name, contexts=None, purpose=None):
327 """Add the context group to the trans-unit with contexts a list with 328 (type, text) tuples describing each context.""" 329 assert contexts 330 group = etree.Element(self.namespaced("context-group")) 331 # context-group tags must appear at the start within <group> 332 # tags. Otherwise it must be appended to the end of a group 333 # of tags. 334 if self.xmlelement.tag == self.namespaced("group"): 335 self.xmlelement.insert(0, group) 336 else: 337 self.xmlelement.append(group) 338 group.set("name", name) 339 if purpose: 340 group.set("purpose", purpose) 341 for type, text in contexts: 342 if isinstance(text, str): 343 text = text.decode("utf-8") 344 context = etree.SubElement(group, self.namespaced("context")) 345 context.text = text 346 context.set("context-type", type)
347
348 - def getcontextgroups(self, name):
349 """Returns the contexts in the context groups with the specified name""" 350 groups = [] 351 grouptags = self.xmlelement.iterdescendants(self.namespaced("context-group")) 352 #TODO: conbine name in query 353 for group in grouptags: 354 if group.get("name") == name: 355 contexts = group.iterdescendants(self.namespaced("context")) 356 pairs = [] 357 for context in contexts: 358 pairs.append((context.get("context-type"), lisa.getText(context, getXMLspace(self.xmlelement, self._default_xml_space)))) 359 groups.append(pairs) #not extend 360 return groups
361
362 - def getrestype(self):
363 """returns the restype attribute in the trans-unit tag""" 364 return self.xmlelement.get("restype")
365
366 - def merge(self, otherunit, overwrite=False, comments=True, authoritative=False):
367 #TODO: consider other attributes like "approved" 368 super(xliffunit, self).merge(otherunit, overwrite, comments) 369 if self.target: 370 self.marktranslated() 371 if otherunit.isfuzzy(): 372 self.markfuzzy() 373 elif otherunit.source == self.source: 374 self.markfuzzy(False) 375 if comments: 376 self.addnote(otherunit.getnotes())
377
378 - def correctorigin(self, node, origin):
379 """Check against node tag's origin (e.g note or alt-trans)""" 380 if origin == None: 381 return True 382 elif origin in node.get("from", ""): 383 return True 384 elif origin in node.get("origin", ""): 385 return True 386 else: 387 return False
388
389 - def multistring_to_rich(self, mstr):
390 """Override L{TranslationUnit.multistring_to_rich} which is used by the 391 C{rich_source} and C{rich_target} properties.""" 392 strings = mstr 393 if isinstance(mstr, multistring): 394 strings = mstr.strings 395 elif isinstance(mstr, basestring): 396 strings = [mstr] 397 398 return [xml_to_strelem(s) for s in strings]
399 multistring_to_rich = classmethod(multistring_to_rich) 400
401 - def rich_to_multistring(self, elem_list):
402 """Override L{TranslationUnit.rich_to_multistring} which is used by the 403 C{rich_source} and C{rich_target} properties.""" 404 return multistring([unicode(elem) for elem in elem_list])
405 rich_to_multistring = classmethod(rich_to_multistring)
406 407
408 -class xlifffile(lisa.LISAfile):
409 """Class representing a XLIFF file store.""" 410 UnitClass = xliffunit 411 Name = _("XLIFF Translation File") 412 Mimetypes = ["application/x-xliff", "application/x-xliff+xml"] 413 Extensions = ["xlf", "xliff"] 414 rootNode = "xliff" 415 bodyNode = "body" 416 XMLskeleton = '''<?xml version="1.0" ?> 417 <xliff version='1.1' xmlns='urn:oasis:names:tc:xliff:document:1.1'> 418 <file original='NoName' source-language='en' datatype='plaintext'> 419 <body> 420 </body> 421 </file> 422 </xliff>''' 423 namespace = 'urn:oasis:names:tc:xliff:document:1.1' 424 suggestions_in_format = True 425 """xliff units have alttrans tags which can be used to store suggestions""" 426
427 - def __init__(self, *args, **kwargs):
428 self._filename = None 429 lisa.LISAfile.__init__(self, *args, **kwargs) 430 self._messagenum = 0
431 432
433 - def initbody(self):
434 self.namespace = self.document.getroot().nsmap.get(None, None) 435 436 if self._filename: 437 self.body = self.getcontextnode(self._filename) 438 else: 439 self.body = self.document.getroot() 440 441 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next() 442 sourcelanguage = filenode.get('source-language') 443 if sourcelanguage: 444 self.setsourcelanguage(sourcelanguage) 445 targetlanguage = filenode.get('target-language') 446 if targetlanguage: 447 self.settargetlanguage(targetlanguage)
448
449 - def addheader(self):
450 """Initialise the file header.""" 451 filenode = self.document.getroot().iterchildren(self.namespaced("file")).next() 452 filenode.set("source-language", self.sourcelanguage) 453 if self.targetlanguage: 454 filenode.set("target-language", self.targetlanguage)
455
456 - def createfilenode(self, filename, sourcelanguage=None, targetlanguage=None, datatype='plaintext'):
457 """creates a filenode with the given filename. All parameters are needed 458 for XLIFF compliance.""" 459 self.removedefaultfile() 460 if sourcelanguage is None: 461 sourcelanguage = self.sourcelanguage 462 if targetlanguage is None: 463 targetlanguage = self.targetlanguage 464 filenode = etree.Element(self.namespaced("file")) 465 filenode.set("original", filename) 466 filenode.set("source-language", sourcelanguage) 467 if targetlanguage: 468 filenode.set("target-language", targetlanguage) 469 filenode.set("datatype", datatype) 470 bodyNode = etree.SubElement(filenode, self.namespaced(self.bodyNode)) 471 return filenode
472
473 - def getfilename(self, filenode):
474 """returns the name of the given file""" 475 return filenode.get("original")
476
477 - def setfilename(self, filenode, filename):
478 """set the name of the given file""" 479 return filenode.set("original", filename)
480
481 - def getfilenames(self):
482 """returns all filenames in this XLIFF file""" 483 filenodes = self.document.getroot().iterchildren(self.namespaced("file")) 484 filenames = [self.getfilename(filenode) for filenode in filenodes] 485 filenames = filter(None, filenames) 486 if len(filenames) == 1 and filenames[0] == '': 487 filenames = [] 488 return filenames
489
490 - def getfilenode(self, filename):
491 """finds the filenode with the given name""" 492 filenodes = self.document.getroot().iterchildren(self.namespaced("file")) 493 for filenode in filenodes: 494 if self.getfilename(filenode) == filename: 495 return filenode 496 return None
497
498 - def getdatatype(self, filename=None):
499 """Returns the datatype of the stored file. If no filename is given, 500 the datatype of the first file is given.""" 501 if filename: 502 node = self.getfilenode(filename) 503 if not node is None: 504 return node.get("datatype") 505 else: 506 filenames = self.getfilenames() 507 if len(filenames) > 0 and filenames[0] != "NoName": 508 return self.getdatatype(filenames[0]) 509 return ""
510
511 - def getdate(self, filename=None):
512 """Returns the date attribute for the file. If no filename is given, 513 the date of the first file is given. If the date attribute is not 514 specified, None is returned.""" 515 if filename: 516 node = self.getfilenode(filename) 517 if not node is None: 518 return node.get("date") 519 else: 520 filenames = self.getfilenames() 521 if len(filenames) > 0 and filenames[0] != "NoName": 522 return self.getdate(filenames[0]) 523 return None
524
525 - def removedefaultfile(self):
526 """We want to remove the default file-tag as soon as possible if we 527 know if still present and empty.""" 528 filenodes = list(self.document.getroot().iterchildren(self.namespaced("file"))) 529 if len(filenodes) > 1: 530 for filenode in filenodes: 531 if filenode.get("original") == "NoName" and \ 532 not list(filenode.iterdescendants(self.namespaced(self.UnitClass.rootNode))): 533 self.document.getroot().remove(filenode) 534 break
535
536 - def getheadernode(self, filenode, createifmissing=False):
537 """finds the header node for the given filenode""" 538 # TODO: Deprecated? 539 headernode = filenode.iterchildren(self.namespaced("header")) 540 try: 541 return headernode.next() 542 except StopIteration: 543 pass 544 if not createifmissing: 545 return None 546 headernode = etree.SubElement(filenode, self.namespaced("header")) 547 return headernode
548
549 - def getbodynode(self, filenode, createifmissing=False):
550 """finds the body node for the given filenode""" 551 bodynode = filenode.iterchildren(self.namespaced("body")) 552 try: 553 return bodynode.next() 554 except StopIteration: 555 pass 556 if not createifmissing: 557 return None 558 bodynode = etree.SubElement(filenode, self.namespaced("body")) 559 return bodynode
560
561 - def addsourceunit(self, source, filename="NoName", createifmissing=False):
562 """adds the given trans-unit to the last used body node if the filename has changed it uses the slow method instead (will create the nodes required if asked). Returns success""" 563 if self._filename != filename: 564 if not self.switchfile(filename, createifmissing): 565 return None 566 unit = super(xlifffile, self).addsourceunit(source) 567 self._messagenum += 1 568 unit.setid("%d" % self._messagenum) 569 return unit
570
571 - def switchfile(self, filename, createifmissing=False):
572 """adds the given trans-unit (will create the nodes required if asked). Returns success""" 573 self._filename = filename 574 filenode = self.getfilenode(filename) 575 if filenode is None: 576 if not createifmissing: 577 return False 578 filenode = self.createfilenode(filename) 579 self.document.getroot().append(filenode) 580 581 self.body = self.getbodynode(filenode, createifmissing=createifmissing) 582 if self.body is None: 583 return False 584 self._messagenum = len(list(self.body.iterdescendants(self.namespaced("trans-unit")))) 585 #TODO: was 0 based before - consider 586 # messagenum = len(self.units) 587 #TODO: we want to number them consecutively inside a body/file tag 588 #instead of globally in the whole XLIFF file, but using len(self.units) 589 #will be much faster 590 return True
591
592 - def creategroup(self, filename="NoName", createifmissing=False, restype=None):
593 """adds a group tag into the specified file""" 594 if self._filename != filename: 595 if not self.switchfile(filename, createifmissing): 596 return None 597 group = etree.SubElement(self.body, self.namespaced("group")) 598 if restype: 599 group.set("restype", restype) 600 return group
601
602 - def __str__(self):
603 self.removedefaultfile() 604 return super(xlifffile, self).__str__()
605
606 - def parsestring(cls, storestring):
607 """Parses the string to return the correct file object""" 608 xliff = super(xlifffile, cls).parsestring(storestring) 609 if xliff.units: 610 header = xliff.units[0] 611 if ("gettext-domain-header" in (header.getrestype() or "") \ 612 or xliff.getdatatype() == "po") \ 613 and cls.__name__.lower() != "poxlifffile": 614 import poxliff 615 xliff = poxliff.PoXliffFile.parsestring(storestring) 616 return xliff
617 parsestring = classmethod(parsestring)
618