Package translate :: Package storage :: Package xml_extract :: Module generate
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xml_extract.generate

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  import lxml.etree as etree 
 24   
 25  from translate.storage import base 
 26   
 27  from translate.misc.typecheck import accepts, IsCallable, Any 
 28  from translate.storage.xml_extract import misc 
 29  from translate.storage.xml_extract import extract 
 30  from translate.storage.xml_extract import unit_tree 
 31  from translate.storage.xml_name import XmlNamer 
32 33 @accepts(etree._Element) 34 -def _get_tag_arrays(dom_node):
35 """Return a dictionary indexed by child tag names, where each tag is associated with an array 36 of all the child nodes with matching the tag name, in the order in which they appear as children 37 of dom_node. 38 39 >>> xml = '<a><b></b><c></c><b></b><d/></a>' 40 >>> element = etree.fromstring(xml) 41 >>> get_tag_arrays(element) 42 {'b': [<Element a at 84df144>, <Element a at 84df148>], 'c': [<Element a at 84df120>], 'd': [<Element a at 84df152>]} 43 """ 44 child_dict = {} 45 for child in dom_node: 46 if child.tag not in child_dict: 47 child_dict[child.tag] = [] 48 child_dict[child.tag].append(child) 49 return child_dict
50 51 @accepts(etree._Element, unit_tree.XPathTree, extract.Translatable, IsCallable())
52 -def apply_translations(dom_node, unit_node, do_translate):
53 tag_array = _get_tag_arrays(dom_node) 54 for unit_child_index, unit_child in unit_node.children.iteritems(): 55 tag, index = unit_child_index 56 try: 57 dom_child = tag_array[XmlNamer(dom_node).name(tag)][index] 58 apply_translations(dom_child, unit_child, do_translate) 59 # Raised if tag is not in tag_array. We might want to complain to the 60 # user in the future. 61 except KeyError: 62 pass 63 # Raised if index is not in tag_array[tag]. We might want to complain to 64 # the user in the future 65 except IndexError: 66 pass 67 # If there is a translation unit associated with this unit_node... 68 if unit_node.unit != None: 69 # The invoke do_translate on the dom_node and the unit; do_translate 70 # should replace the text in dom_node with the text in unit_node. 71 do_translate(dom_node, unit_node.unit)
72
73 @accepts(IsCallable(), etree._Element, vargs=[Any()]) 74 -def reduce_dom_tree(f, dom_node, *state):
75 return misc.reduce_tree(f, dom_node, dom_node, lambda dom_node: dom_node, *state)
76
77 @accepts(etree._Element, etree._Element) 78 -def find_dom_root(parent_dom_node, dom_node):
79 """@see: L{find_placeable_dom_tree_roots}""" 80 if dom_node is None or parent_dom_node is None: 81 return None 82 if dom_node.getparent() == parent_dom_node: 83 return dom_node 84 elif dom_node.getparent() is None: 85 return None 86 else: 87 return find_dom_root(parent_dom_node, dom_node.getparent())
88 89 @accepts(extract.Translatable)
90 -def find_placeable_dom_tree_roots(unit_node):
91 """For an inline placeable, find the root DOM node for the placeable in its 92 parent. 93 94 Consider the diagram. In this pseudo-ODF example, there is an inline span 95 element. However, the span is contained in other tags (which we never process). 96 When splicing the template DOM tree (that is, the DOM which comes from 97 the XML document we're using to generate a translated XML document), we'll 98 need to move DOM sub-trees around and we need the roots of these sub-trees:: 99 100 <p> This is text \/ <- Paragraph containing an inline placeable 101 <blah> <- Inline placeable's root (which we want to find) 102 ... <- Any number of intermediate DOM nodes 103 <span> bold text <- The inline placeable's Translatable 104 holds a reference to this DOM node 105 """ 106 107 def set_dom_root_for_unit_node(parent_unit_node, unit_node, dom_tree_roots): 108 dom_tree_roots[unit_node] = find_dom_root(parent_unit_node.dom_node, unit_node.dom_node) 109 return dom_tree_roots
110 return extract.reduce_unit_tree(set_dom_root_for_unit_node, unit_node, {}) 111 112 @accepts(extract.Translatable, etree._Element)
113 -def _map_source_dom_to_doc_dom(unit_node, source_dom_node):
114 """Creating a mapping from the DOM nodes in source_dom_node which correspond to 115 placeables, with DOM nodes in the XML document template (this information is obtained 116 from unit_node). We are interested in DOM nodes in the XML document template which 117 are the roots of placeables. See the diagram below, as well as 118 L{find_placeable_dom_tree_roots}. 119 120 XLIFF Source (below):: 121 <source>This is text <g> bold text</g> and a footnote<x/></source> 122 / \________ 123 / \ 124 <p>This is text<blah>...<span> bold text</span>...</blah> and <note>...</note></p> 125 Input XML document used as a template (above) 126 127 In the above diagram, the XLIFF source DOM node <g> is associated with the XML 128 document DOM node <blah>, whereas the XLIFF source DOM node <x> is associated with 129 the XML document DOM node <note>. 130 """ 131 dom_tree_roots = find_placeable_dom_tree_roots(unit_node) 132 source_dom_to_doc_dom = {} 133 134 def loop(unit_node, source_dom_node): 135 for child_unit_node, child_source_dom in zip(unit_node.placeables, source_dom_node): 136 source_dom_to_doc_dom[child_source_dom] = dom_tree_roots[child_unit_node] 137 loop(child_unit_node, child_source_dom)
138 139 loop(unit_node, source_dom_node) 140 return source_dom_to_doc_dom 141
142 @accepts(etree._Element, etree._Element) 143 -def _map_target_dom_to_source_dom(source_dom_node, target_dom_node):
144 """Associate placeables in source_dom_node and target_dom_node which 145 have the same 'id' attributes. 146 147 We're using XLIFF placeables. The XLIFF standard requires that 148 placeables have unique ids. The id of a placeable is never modified, 149 which means that even if placeables are moved around in a translation, 150 we can easily associate placeables from the source text with placeables 151 in the target text. 152 153 This function does exactly that. 154 """ 155 156 def map_id_to_dom_node(parent_node, node, id_to_dom_node): 157 # If this DOM node has an 'id' attribute, then add an id -> node 158 # mapping to 'id_to_dom_node'. 159 if u'id' in node.attrib: 160 id_to_dom_node[node.attrib[u'id']] = node 161 return id_to_dom_node
162 163 # Build a mapping of id attributes to the DOM nodes which have these ids. 164 id_to_dom_node = reduce_dom_tree(map_id_to_dom_node, target_dom_node, {}) 165 166 def map_target_dom_to_source_dom_aux(parent_node, node, target_dom_to_source_dom): 167 # 168 if u'id' in node.attrib and node.attrib[u'id'] in id_to_dom_node: 169 target_dom_to_source_dom[id_to_dom_node[node.attrib[u'id']]] = node 170 return target_dom_to_source_dom 171 172 # For each node in the DOM tree rooted at source_dom_node: 173 # 1. Check whether the node has an 'id' attribute. 174 # 2. If so, check whether there is a mapping of this id to a target DOM node 175 # in id_to_dom_node. 176 # 3. If so, associate this source DOM node with the target DOM node. 177 return reduce_dom_tree(map_target_dom_to_source_dom_aux, source_dom_node, {}) 178
179 -def _build_target_dom_to_doc_dom(unit_node, source_dom, target_dom):
180 source_dom_to_doc_dom = _map_source_dom_to_doc_dom(unit_node, source_dom) 181 target_dom_to_source_dom = _map_target_dom_to_source_dom(source_dom, target_dom) 182 return misc.compose_mappings(target_dom_to_source_dom, source_dom_to_doc_dom)
183
184 @accepts(etree._Element, {etree._Element: etree._Element}) 185 -def _get_translated_node(target_node, target_dom_to_doc_dom):
186 """Convenience function to get node corresponding to 'target_node' 187 and to assign the tail text of 'target_node' to this node.""" 188 dom_node = target_dom_to_doc_dom[target_node] 189 dom_node.tail = target_node.tail 190 return dom_node
191
192 @accepts(etree._Element, etree._Element, {etree._Element: etree._Element}) 193 -def _build_translated_dom(dom_node, target_node, target_dom_to_doc_dom):
194 """Use the "shape" of 'target_node' (which is a DOM tree) to insert nodes 195 into the DOM tree rooted at 'dom_node'. 196 197 The mapping 'target_dom_to_doc_dom' is used to map nodes from 'target_node' 198 to nodes which much be inserted into dom_node. 199 """ 200 dom_node.text = target_node.text 201 # 1. Find all child nodes of target_node. 202 # 2. Filter out the children which map to None. 203 # 3. Call _get_translated_node on the remaining children; this maps a node in 204 # 'target_node' to a node in 'dom_node' and assigns the tail text of 'target_node' 205 # to the mapped node. 206 # 4. Add all of these mapped nodes to 'dom_node' 207 dom_node.extend(_get_translated_node(child, target_dom_to_doc_dom) for child in target_node 208 if target_dom_to_doc_dom[child] is not None) 209 # Recursively call this function on pairs of matched children in 210 # dom_node and target_node. 211 for dom_child, target_child in zip(dom_node, target_node): 212 _build_translated_dom(dom_child, target_child, target_dom_to_doc_dom)
213
214 @accepts(IsCallable()) 215 -def replace_dom_text(make_parse_state):
216 """Return a function:: 217 218 action: etree_Element x base.TranslationUnit -> None 219 220 which takes a dom_node and a translation unit. The dom_node is rearranged 221 according to rearrangement of placeables in unit.target (relative to their 222 positions in unit.source). 223 """ 224 225 @accepts(etree._Element, base.TranslationUnit) 226 def action(dom_node, unit): 227 """Use the unit's target (or source in the case where there is no translation) 228 to update the text in the dom_node and at the tails of its children.""" 229 source_dom = unit.source_dom 230 if unit.target_dom is not None: 231 target_dom = unit.target_dom 232 else: 233 target_dom = unit.source_dom 234 # Build a tree of (non-DOM) nodes which correspond to the translatable DOM nodes in 'dom_node'. 235 # Pass in a fresh parse_state every time, so as avoid working with stale parse state info. 236 unit_node = extract.find_translatable_dom_nodes(dom_node, make_parse_state())[0] 237 target_dom_to_doc_dom = _build_target_dom_to_doc_dom(unit_node, source_dom, target_dom) 238 # Before we start reconstructing the sub-tree rooted at dom_node, we must clear out its children 239 dom_node[:] = [] 240 _build_translated_dom(dom_node, target_dom, target_dom_to_doc_dom)
241 242 return action 243