1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 from translate.storage import base
26 from translate.lang import data
27 try:
28 from lxml import etree
29 from translate.misc.xml_helpers import getText, getXMLlang, setXMLlang, \
30 getXMLspace, setXMLspace, namespaced
31 except ImportError, e:
32 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
33
34
36 """generate match objects for all L{re_obj} matches in L{text}."""
37 start = 0
38 max = len(text)
39 while start < max:
40 m = re_obj.search(text, start)
41 if not m:
42 break
43 yield m
44 start = m.end()
45
46
47 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)',
48 '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
49 re_placeholders = [re.compile(ph) for ph in placeholders]
60
61
63 """
64 A single unit in the file. Provisional work is done to make several
65 languages possible.
66 """
67
68
69 rootNode = ""
70
71
72 languageNode = ""
73
74 textNode = ""
75
76 namespace = None
77 _default_xml_space = "preserve"
78 """The default handling of spacing in the absense of an xml:space
79 attribute.
80
81 This is mostly for correcting XLIFF behaviour."""
82
83 - def __init__(self, source, empty=False, **kwargs):
84 """Constructs a unit containing the given source string"""
85 self._rich_source = None
86 self._rich_target = None
87 if empty:
88 self._state_n = 0
89 return
90 self.xmlelement = etree.Element(self.namespaced(self.rootNode))
91
92 super(LISAunit, self).__init__(source)
93
113
115 """Returns name in Clark notation.
116
117 For example namespaced("source") in an XLIFF document might return::
118 {urn:oasis:names:tc:xliff:document:1.1}source
119 This is needed throughout lxml.
120 """
121 return namespaced(self.namespace, name)
122
124 languageNodes = self.getlanguageNodes()
125 if len(languageNodes) > 0:
126 self.xmlelement.replace(languageNodes[0], dom_node)
127 else:
128 self.xmlelement.append(dom_node)
129
132 source_dom = property(get_source_dom, set_source_dom)
133
139
144 source = property(getsource, setsource)
145
147 languageNodes = self.getlanguageNodes()
148 assert len(languageNodes) > 0
149 if dom_node is not None:
150 if append or len(languageNodes) == 0:
151 self.xmlelement.append(dom_node)
152 else:
153 self.xmlelement.insert(1, dom_node)
154 if not append and len(languageNodes) > 1:
155 self.xmlelement.remove(languageNodes[1])
156
162 target_dom = property(get_target_dom)
163
164 - def settarget(self, text, lang='xx', append=False):
191
198 target = property(gettarget, settarget)
199
201 """Returns a xml Element setup with given parameters to represent a
202 single language entry. Has to be overridden."""
203 return None
204
233
235 """Returns a list of all nodes that contain per language information.
236 """
237 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
238
240 """Retrieves a languageNode either by language or by index"""
241 if lang is None and index is None:
242 raise KeyError("No criterea for languageNode given")
243 languageNodes = self.getlanguageNodes()
244 if lang:
245 for set in languageNodes:
246 if getXMLlang(set) == lang:
247 return set
248 else:
249 if index >= len(languageNodes):
250 return None
251 else:
252 return languageNodes[index]
253 return None
254
255 - def getNodeText(self, languageNode, xml_space="preserve"):
256 """Retrieves the term from the given languageNode"""
257 if languageNode is None:
258 return None
259 if self.textNode:
260 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
261 if terms is None:
262 return None
263 else:
264 return getText(terms.next(), xml_space)
265 else:
266 return getText(languageNode, xml_space)
267
269 return etree.tostring(self.xmlelement, pretty_print=True,
270 encoding='utf-8')
271
274
275 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
276 lambda self, value: self._set_property(self.namespaced('xid'), value))
277
278 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
279 lambda self, value: self._set_property(self.namespaced('rid'), value))
280
282 term = cls(None, empty=True)
283 term.xmlelement = element
284 return term
285 createfromxmlElement = classmethod(createfromxmlElement)
286
287
289 """A class representing a file store for one of the LISA file formats."""
290 UnitClass = LISAunit
291
292 rootNode = ""
293
294 bodyNode = ""
295
296 XMLskeleton = ""
297
298 namespace = None
299
300 - def __init__(self, inputfile=None, sourcelanguage='en',
301 targetlanguage=None, unitclass=None):
314
316 """Method to be overridden to initialise headers, etc."""
317 pass
318
320 """Returns name in Clark notation.
321
322 For example namespaced("source") in an XLIFF document might return::
323 {urn:oasis:names:tc:xliff:document:1.1}source
324 This is needed throughout lxml.
325 """
326 return namespaced(self.namespace, name)
327
328 - def initbody(self):
329 """Initialises self.body so it never needs to be retrieved from the
330 XML again."""
331 self.namespace = self.document.getroot().nsmap.get(None, None)
332 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
333
335
336 """Adds and returns a new unit with the given string as first entry."""
337 newunit = self.UnitClass(source)
338 self.addunit(newunit)
339 return newunit
340
341 - def addunit(self, unit, new=True):
346
348 """Converts to a string containing the file's XML"""
349 return etree.tostring(self.document, pretty_print=True,
350 xml_declaration=True, encoding='utf-8')
351
353 """Populates this object from the given xml string"""
354 if not hasattr(self, 'filename'):
355 self.filename = getattr(xml, 'name', '')
356 if hasattr(xml, "read"):
357 xml.seek(0)
358 posrc = xml.read()
359 xml = posrc
360 if etree.LXML_VERSION >= (2, 1, 0):
361
362
363 parser = etree.XMLParser(strip_cdata=False)
364 else:
365 parser = etree.XMLParser()
366 self.document = etree.fromstring(xml, parser).getroottree()
367 self._encoding = self.document.docinfo.encoding
368 self.initbody()
369 assert self.document.getroot().tag == self.namespaced(self.rootNode)
370 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)):
371 term = self.UnitClass.createfromxmlElement(entry)
372 self.addunit(term, new=False)
373