Package translate :: Package storage :: Module catkeys
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.catkeys

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2010 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Manage the Haiku catkeys translation format 
 22   
 23     The Haiku catkeys format is the translation format used for localisation of 
 24     the U{Haiku<http://www.haiku-os.org/>} operating system. 
 25   
 26     It is a bilingual base class derived format with L{CatkeysFile} and 
 27     L{CatkeysUnit} providing file and unit level access.  The file format is 
 28     described here: 
 29     http://www.haiku-os.org/blog/pulkomandy/2009-09-24_haiku_locale_kit_translator_handbook 
 30   
 31     Implementation 
 32     ============== 
 33     The implementation covers the full requirements of a catkeys file. The 
 34     files are simple Tab Separated Value (TSV) files that can be read 
 35     by Microsoft Excel and other spreadsheet programs. They use the .txt  
 36     extension which does make it more difficult to automatically identify  
 37     such files. 
 38   
 39     The dialect of the TSV files is specified by L{CatkeysDialect}. 
 40   
 41     Encoding 
 42     -------- 
 43     The files are UTF-8 encoded. 
 44   
 45     Header 
 46     ------ 
 47     L{CatkeysHeader} provides header management support. 
 48   
 49     Escaping 
 50     -------- 
 51     catkeys seem to escape things like in C++ (strings are just extracted from 
 52     the source code unchanged, it seems. 
 53   
 54     Functions allow for L{escaping<_escape>} and L{unescaping<_unescape>}. 
 55  """ 
 56   
 57  import csv 
 58  import sys 
 59  from translate.storage import base 
 60  from translate.lang.data import tr_lang 
 61   
 62  FIELDNAMES_HEADER = ["version", "language", "mimetype", "checksum"] 
 63  """Field names for the catkeys header""" 
 64   
 65  FIELDNAMES = ["source", "context", "comment", "target"] 
 66  """Field names for a catkeys TU""" 
 67   
 68  FIELDNAMES_HEADER_DEFAULTS = { 
 69  "version": "1", 
 70  "language": "", 
 71  "mimetype": "", 
 72  "checksum": "", 
 73  } 
 74  """Default or minimum header entries for a catkeys file""" 
 75   
 76  _unescape_map = {"\\r": "\r", "\\t": "\t", '\\n': '\n', '\\\\': '\\'} 
 77  _escape_map = dict([(value, key) for (key, value) in _unescape_map.items()]) 
 78  # We don't yet do escaping correctly, just for lack of time to do it.  The 
 79  # current implementation is just based on something simple that will work with 
 80  # investaged files.  The only escapes found were "\n", "\t", "\\n" 
 81   
82 -def _escape(string):
83 if string: 84 string = string.replace(r"\n", r"\\n").replace("\n", "\\n").replace("\t", "\\t") 85 return string
86
87 -def _unescape(string):
88 if string: 89 string = string.replace("\\n", "\n").replace("\\t", "\t").replace(r"\n", r"\\n") 90 return string
91 92
93 -class CatkeysDialect(csv.Dialect):
94 """Describe the properties of a catkeys generated TAB-delimited file.""" 95 delimiter = "\t" 96 lineterminator = "\n" 97 quoting = csv.QUOTE_NONE 98 if sys.version_info < (2, 5, 0): 99 # We need to define the following items for csv in Python < 2.5 100 quoting = csv.QUOTE_MINIMAL # catkeys does not quote anything, since we escape 101 # \t anyway in _escape this should not be a problem 102 doublequote = False 103 skipinitialspace = False 104 escapechar = None 105 quotechar = '"'
106 csv.register_dialect("catkeys", CatkeysDialect) 107
108 -class CatkeysHeader(object):
109 """A catkeys translation memory header"""
110 - def __init__(self, header=None):
111 self._header_dict = {} 112 if not header: 113 self.header = self._create_default_header() 114 elif isinstance(header, dict): 115 self.header = header
116
117 - def _create_default_header(self):
118 """Create a default catkeys header""" 119 defaultheader = FIELDNAMES_HEADER_DEFAULTS 120 return defaultheader
121
122 - def settargetlang(self, newlang):
123 """Set a human readable target language""" 124 self._header_dict['language'] = tr_lang('en')(newlang)
125 targetlang = property(None, settargetlang)
126
127 -class CatkeysUnit(base.TranslationUnit):
128 """A catkeys translation memory unit"""
129 - def __init__(self, source=None):
130 self._dict = {} 131 if source: 132 self.source = source 133 super(CatkeysUnit, self).__init__(source)
134
135 - def getdict(self):
136 """Get the dictionary of values for a catkeys line""" 137 return self._dict
138
139 - def setdict(self, newdict):
140 """Set the dictionary of values for a catkeys line 141 142 @param newdict: a new dictionary with catkeys line elements 143 @type newdict: Dict 144 """ 145 # TODO First check that the values are OK 146 self._dict = newdict
147 dict = property(getdict, setdict) 148
149 - def _get_source_or_target(self, key):
150 if self._dict.get(key, None) is None: 151 return None 152 elif self._dict[key]: 153 return _unescape(self._dict[key]).decode('utf-8') 154 else: 155 return ""
156
157 - def _set_source_or_target(self, key, newvalue):
158 if newvalue is None: 159 self._dict[key] = None 160 if isinstance(newvalue, unicode): 161 newvalue = newvalue.encode('utf-8') 162 newvalue = _escape(newvalue) 163 if not key in self._dict or newvalue != self._dict[key]: 164 self._dict[key] = newvalue
165
166 - def getsource(self):
167 return self._get_source_or_target('source')
168
169 - def setsource(self, newsource):
170 self._rich_source = None 171 return self._set_source_or_target('source', newsource)
172 source = property(getsource, setsource) 173
174 - def gettarget(self):
175 return self._get_source_or_target('target')
176
177 - def settarget(self, newtarget):
178 self._rich_target = None 179 return self._set_source_or_target('target', newtarget)
180 target = property(gettarget, settarget) 181
182 - def getnotes(self, origin=None):
183 if not origin or origin in ["programmer", "developer", "source code"]: 184 return self._dict["comment"].decode('utf-8') 185 return u""
186
187 - def getcontext(self):
188 return self._dict["context"].decode('utf-8')
189
190 - def getid(self):
191 context = self.getcontext() 192 notes = self.getnotes() 193 id = self.source 194 if notes: 195 id = u"%s\04%s" % (notes, id) 196 if context: 197 id = u"%s\04%s" % (context, id) 198 return id
199
200 - def markfuzzy(self, present=True):
201 if present: 202 self.target = u""
203
204 - def settargetlang(self, newlang):
205 self._dict['target-lang'] = newlang
206 targetlang = property(None, settargetlang) 207
208 - def __str__(self):
209 return str(self._dict)
210
211 - def istranslated(self):
212 if not self._dict.get('source', None): 213 return False 214 return bool(self._dict.get('target', None))
215
216 - def merge(self, otherunit, overwrite=False, comments=True, 217 authoritative=False):
218 """Do basic format agnostic merging.""" 219 # We can't go fuzzy, so just do nothing 220 if self.source != otherunit.source or self.getcontext() != otherunit.getcontext() or otherunit.isfuzzy(): 221 return 222 if not self.istranslated() or overwrite: 223 self.rich_target = otherunit.rich_target
224 225
226 -class CatkeysFile(base.TranslationStore):
227 """A catkeys translation memory file""" 228 Name = _("Haiku catkeys file") 229 Mimetypes = ["application/x-catkeys"] 230 Extensions = ["catkeys"]
231 - def __init__(self, inputfile=None, unitclass=CatkeysUnit):
232 """Construct a catkeys store, optionally reading in from inputfile.""" 233 self.UnitClass = unitclass 234 base.TranslationStore.__init__(self, unitclass=unitclass) 235 self.filename = '' 236 self.header = CatkeysHeader() 237 self._encoding = 'utf-8' 238 if inputfile is not None: 239 self.parse(inputfile)
240
241 - def parse(self, input):
242 """parsse the given file or file source string""" 243 if hasattr(input, 'name'): 244 self.filename = input.name 245 elif not getattr(self, 'filename', ''): 246 self.filename = '' 247 if hasattr(input, "read"): 248 tmsrc = input.read() 249 input.close() 250 input = tmsrc 251 for header in csv.DictReader(input.split("\n")[:1], fieldnames=FIELDNAMES_HEADER, dialect="catkeys"): 252 self.header = CatkeysHeader(header) 253 lines = csv.DictReader(input.split("\n")[1:], fieldnames=FIELDNAMES, dialect="catkeys") 254 for line in lines: 255 newunit = CatkeysUnit() 256 newunit.dict = line 257 self.addunit(newunit)
258
259 - def __str__(self):
260 output = csv.StringIO() 261 writer = csv.DictWriter(output, fieldnames=FIELDNAMES, dialect="catkeys") 262 writer.writerow(self.header._header_dict) 263 for unit in self.units: 264 writer.writerow(unit.dict) 265 return output.getvalue()
266