Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """classes that hold units of .po files (pounit) or entire files (pofile) 
 22  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 23   
 24  from __future__ import generators 
 25  from translate.misc.multistring import multistring 
 26  from translate.misc import quote 
 27  from translate.misc import textwrap 
 28  from translate.lang import data 
 29  from translate.storage import pocommon, base 
 30  import re 
 31  import copy 
 32  import cStringIO 
 33  import poparser 
 34   
 35  lsep = "\n#: " 
 36  """Seperator for #: entries""" 
 37   
 38  # general functions for quoting / unquoting po strings 
 39   
 40  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 41  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 42   
43 -def escapeforpo(line):
44 """Escapes a line for po format. assumes no \n occurs in the line. 45 46 @param line: unescaped text 47 """ 48 special_locations = [] 49 for special_key in po_escape_map: 50 special_locations.extend(quote.find_all(line, special_key)) 51 special_locations = dict.fromkeys(special_locations).keys() 52 special_locations.sort() 53 escaped_line = "" 54 last_location = 0 55 for location in special_locations: 56 escaped_line += line[last_location:location] 57 escaped_line += po_escape_map[line[location:location+1]] 58 last_location = location+1 59 escaped_line += line[last_location:] 60 return escaped_line
61
62 -def unescapehandler(escape):
63 64 return po_unescape_map.get(escape, escape)
65
66 -def wrapline(line):
67 """Wrap text for po files.""" 68 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 69 70 # Lines should not start with a space... 71 if len(wrappedlines) > 1: 72 for index, line in enumerate(wrappedlines[1:]): 73 if line.startswith(' '): 74 # Remove the space at the beginning of the line: 75 wrappedlines[index+1] = line[1:] 76 77 # Append a space to the previous line: 78 wrappedlines[index] += ' ' 79 return wrappedlines
80
81 -def quoteforpo(text):
82 """quotes the given text for a PO file, returning quoted and escaped lines""" 83 polines = [] 84 if text is None: 85 return polines 86 lines = text.split("\n") 87 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 88 if len(lines) != 2 or lines[1]: 89 polines.extend(['""']) 90 for line in lines[:-1]: 91 #TODO: We should only wrap after escaping 92 lns = wrapline(line) 93 if len(lns) > 0: 94 for ln in lns[:-1]: 95 polines.extend(['"' + escapeforpo(ln) + '"']) 96 if lns[-1]: 97 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 98 else: 99 polines.extend(['"\\n"']) 100 if lines[-1]: 101 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 102 return polines
103
104 -def extractpoline(line):
105 """Remove quote and unescape line from po file. 106 107 @param line: a quoted line from a po file (msgid or msgstr) 108 """ 109 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 110 return extracted
111
112 -def unquotefrompo(postr):
113 return u"".join([extractpoline(line) for line in postr])
114
115 -def encodingToUse(encoding):
116 """Tests whether the given encoding is known in the python runtime, or returns utf-8. 117 This function is used to ensure that a valid encoding is always used.""" 118 if encoding == "CHARSET" or encoding == None: 119 return 'utf-8' 120 return encoding
121 # if encoding is None: return False 122 # return True 123 # try: 124 # tuple = codecs.lookup(encoding) 125 # except LookupError: 126 # return False 127 # return True 128
129 -def is_null(lst):
130 return lst == [] or len(lst) == 1 and lst[0] == '""'
131
132 -def extractstr(string):
133 left = string.find('"') 134 right = string.rfind('"') 135 if right > -1: 136 return string[left:right+1] 137 else: 138 return string[left:] + '"'
139
140 -class pounit(pocommon.pounit):
141 # othercomments = [] # # this is another comment 142 # automaticcomments = [] # #. comment extracted from the source code 143 # sourcecomments = [] # #: sourcefile.xxx:35 144 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 145 # prev_msgid = [] # 146 # prev_msgid_plural = [] # 147 # typecomments = [] # #, fuzzy 148 # msgidcomments = [] # _: within msgid 149 # msgctxt 150 # msgid = [] 151 # msgstr = [] 152 153 # Our homegrown way to indicate what must be copied in a shallow 154 # fashion 155 __shallow__ = ['_store'] 156
157 - def __init__(self, source=None, encoding="UTF-8"):
158 self._encoding = encodingToUse(encoding) 159 self.obsolete = False 160 self._initallcomments(blankall=True) 161 self.prev_msgctxt = [] 162 self.prev_msgid = [] 163 self.prev_msgid_plural = [] 164 self.msgctxt = [] 165 self.msgid = [] 166 self.msgid_pluralcomments = [] 167 self.msgid_plural = [] 168 self.msgstr = [] 169 self.obsoletemsgctxt = [] 170 self.obsoletemsgid = [] 171 self.obsoletemsgid_pluralcomments = [] 172 self.obsoletemsgid_plural = [] 173 self.obsoletemsgstr = [] 174 pocommon.pounit.__init__(self, source)
175
176 - def _initallcomments(self, blankall=False):
177 """Initialises allcomments""" 178 if blankall: 179 self.othercomments = [] 180 self.automaticcomments = [] 181 self.sourcecomments = [] 182 self.typecomments = [] 183 self.msgidcomments = [] 184 self.obsoletemsgidcomments = []
185
186 - def _get_all_comments(self):
187 return [self.othercomments, 188 self.automaticcomments, 189 self.sourcecomments, 190 self.typecomments, 191 self.msgidcomments, 192 self.obsoletemsgidcomments]
193 194 allcomments = property(_get_all_comments) 195
196 - def _get_source_vars(self, msgid, msgid_plural):
197 multi = multistring(unquotefrompo(msgid), self._encoding) 198 if self.hasplural(): 199 pluralform = unquotefrompo(msgid_plural) 200 if isinstance(pluralform, str): 201 pluralform = pluralform.decode(self._encoding) 202 multi.strings.append(pluralform) 203 return multi
204
205 - def _set_source_vars(self, source):
206 msgid = None 207 msgid_plural = None 208 if isinstance(source, str): 209 source = source.decode(self._encoding) 210 if isinstance(source, multistring): 211 source = source.strings 212 if isinstance(source, list): 213 msgid = quoteforpo(source[0]) 214 if len(source) > 1: 215 msgid_plural = quoteforpo(source[1]) 216 else: 217 msgid_plural = [] 218 else: 219 msgid = quoteforpo(source) 220 msgid_plural = [] 221 return msgid, msgid_plural
222
223 - def getsource(self):
224 """Returns the unescaped msgid""" 225 return self._get_source_vars(self.msgid, self.msgid_plural)
226
227 - def setsource(self, source):
228 """Sets the msgid to the given (unescaped) value. 229 230 @param source: an unescaped source string. 231 """ 232 self.msgid, self.msgid_plural = self._set_source_vars(source)
233 source = property(getsource, setsource) 234
235 - def _get_prev_source(self):
236 """Returns the unescaped msgid""" 237 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
238
239 - def _set_prev_source(self, source):
240 """Sets the msgid to the given (unescaped) value. 241 242 @param source: an unescaped source string. 243 """ 244 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
245 prev_source = property(_get_prev_source, _set_prev_source) 246
247 - def gettarget(self):
248 """Returns the unescaped msgstr""" 249 if isinstance(self.msgstr, dict): 250 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 251 else: 252 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 253 return multi
254
255 - def settarget(self, target):
256 """Sets the msgstr to the given (unescaped) value""" 257 self._rich_target = None 258 if isinstance(target, str): 259 target = target.decode(self._encoding) 260 if self.hasplural(): 261 if isinstance(target, multistring): 262 target = target.strings 263 elif isinstance(target, basestring): 264 target = [target] 265 elif isinstance(target, (dict, list)): 266 if len(target) == 1: 267 target = target[0] 268 else: 269 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 270 templates = self.msgstr 271 if isinstance(templates, list): 272 templates = {0: templates} 273 if isinstance(target, list): 274 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 275 elif isinstance(target, dict): 276 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 277 else: 278 self.msgstr = quoteforpo(target)
279 target = property(gettarget, settarget) 280
281 - def getnotes(self, origin=None):
282 """Return comments based on origin value (programmer, developer, source code and translator)""" 283 if origin == None: 284 comments = u"".join([comment[2:] for comment in self.othercomments]) 285 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 286 elif origin == "translator": 287 comments = u"".join ([comment[2:] for comment in self.othercomments]) 288 elif origin in ["programmer", "developer", "source code"]: 289 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 290 else: 291 raise ValueError("Comment type not valid") 292 # Let's drop the last newline 293 return comments[:-1]
294
295 - def addnote(self, text, origin=None, position="append"):
296 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 297 # ignore empty strings and strings without non-space characters 298 if not (text and text.strip()): 299 return 300 text = data.forceunicode(text) 301 commentlist = self.othercomments 302 linestart = "# " 303 if origin in ["programmer", "developer", "source code"]: 304 autocomments = True 305 commentlist = self.automaticcomments 306 linestart = "#. " 307 text = text.split("\n") 308 if position == "append": 309 commentlist += [linestart + line + "\n" for line in text] 310 else: 311 newcomments = [linestart + line + "\n" for line in text] 312 newcomments += [line for line in commentlist] 313 if autocomments: 314 self.automaticcomments = newcomments 315 else: 316 self.othercomments = newcomments
317
318 - def removenotes(self):
319 """Remove all the translator's notes (other comments)""" 320 self.othercomments = []
321
322 - def __deepcopy__(self, memo={}):
323 # Make an instance to serve as the copy 324 new_unit = self.__class__() 325 # We'll be testing membership frequently, so make a set from 326 # self.__shallow__ 327 shallow = set(self.__shallow__) 328 # Make deep copies of all members which are not in shallow 329 for key, value in self.__dict__.iteritems(): 330 if key not in shallow: 331 setattr(new_unit, key, copy.deepcopy(value)) 332 # Make shallow copies of all members which are in shallow 333 for key in set(shallow): 334 setattr(new_unit, key, getattr(self, key)) 335 # Mark memo with ourself, so that we won't get deep copied 336 # again 337 memo[id(self)] = self 338 # Return our copied unit 339 return new_unit
340
341 - def copy(self):
342 return copy.deepcopy(self)
343
344 - def _msgidlen(self):
345 if self.hasplural(): 346 return len(unquotefrompo(self.msgid).strip()) + len(unquotefrompo(self.msgid_plural).strip()) 347 else: 348 return len(unquotefrompo(self.msgid).strip())
349
350 - def _msgstrlen(self):
351 if isinstance(self.msgstr, dict): 352 combinedstr = "\n".join([unquotefrompo(msgstr).strip() for msgstr in self.msgstr.itervalues()]) 353 return len(combinedstr.strip()) 354 else: 355 return len(unquotefrompo(self.msgstr).strip())
356
357 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
358 """Merges the otherpo (with the same msgid) into this one. 359 360 Overwrite non-blank self.msgstr only if overwrite is True 361 merge comments only if comments is True 362 """ 363 364 def mergelists(list1, list2, split=False): 365 #decode where necessary 366 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 367 for position, item in enumerate(list1): 368 if isinstance(item, str): 369 list1[position] = item.decode("utf-8") 370 for position, item in enumerate(list2): 371 if isinstance(item, str): 372 list2[position] = item.decode("utf-8") 373 374 #Determine the newline style of list1 375 lineend = "" 376 if list1 and list1[0]: 377 for candidate in ["\n", "\r", "\n\r"]: 378 if list1[0].endswith(candidate): 379 lineend = candidate 380 if not lineend: 381 lineend = "" 382 else: 383 lineend = "\n" 384 385 #Split if directed to do so: 386 if split: 387 splitlist1 = [] 388 splitlist2 = [] 389 prefix = "#" 390 for item in list1: 391 splitlist1.extend(item.split()[1:]) 392 prefix = item.split()[0] 393 for item in list2: 394 splitlist2.extend(item.split()[1:]) 395 prefix = item.split()[0] 396 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 397 else: 398 #Normal merge, but conform to list1 newline style 399 if list1 != list2: 400 for item in list2: 401 if lineend: 402 item = item.rstrip() + lineend 403 # avoid duplicate comment lines (this might cause some problems) 404 if item not in list1 or len(item) < 5: 405 list1.append(item)
406 if not isinstance(otherpo, pounit): 407 super(pounit, self).merge(otherpo, overwrite, comments) 408 return 409 if comments: 410 mergelists(self.othercomments, otherpo.othercomments) 411 mergelists(self.typecomments, otherpo.typecomments) 412 if not authoritative: 413 # We don't bring across otherpo.automaticcomments as we consider ourself 414 # to be the the authority. Same applies to otherpo.msgidcomments 415 mergelists(self.automaticcomments, otherpo.automaticcomments) 416 mergelists(self.msgidcomments, otherpo.msgidcomments) 417 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 418 if not self.istranslated() or overwrite: 419 # Remove kde-style comments from the translation (if any). 420 if self._extract_msgidcomments(otherpo.target): 421 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 422 self.target = otherpo.target 423 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 424 self.markfuzzy() 425 else: 426 self.markfuzzy(otherpo.isfuzzy()) 427 elif not otherpo.istranslated(): 428 if self.source != otherpo.source: 429 self.markfuzzy() 430 else: 431 if self.target != otherpo.target: 432 self.markfuzzy()
433
434 - def isheader(self):
435 #return (self._msgidlen() == 0) and (self._msgstrlen() > 0) and (len(self.msgidcomments) == 0) 436 #rewritten here for performance: 437 return (is_null(self.msgid) 438 and not is_null(self.msgstr) 439 and self.msgidcomments == [] 440 and is_null(self.msgctxt) 441 )
442
443 - def isblank(self):
444 if self.isheader() or len(self.msgidcomments): 445 return False 446 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)): 447 return True 448 return False
449 # TODO: remove: 450 # Before, the equivalent of the following was the final return statement: 451 # return len(self.source.strip()) == 0 452
453 - def hastypecomment(self, typecomment):
454 """check whether the given type comment is present""" 455 # check for word boundaries properly by using a regular expression... 456 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
457
458 - def hasmarkedcomment(self, commentmarker):
459 """check whether the given comment marker is present as # (commentmarker) ...""" 460 commentmarker = "(%s)" % commentmarker 461 for comment in self.othercomments: 462 if comment.replace("#", "", 1).strip().startswith(commentmarker): 463 return True 464 return False
465
466 - def settypecomment(self, typecomment, present=True):
467 """alters whether a given typecomment is present""" 468 if self.hastypecomment(typecomment) != present: 469 if present: 470 self.typecomments.append("#, %s\n" % typecomment) 471 else: 472 # this should handle word boundaries properly ... 473 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 474 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
475
476 - def isfuzzy(self):
477 return self.hastypecomment("fuzzy")
478
479 - def markfuzzy(self, present=True):
480 self.settypecomment("fuzzy", present)
481
482 - def isobsolete(self):
483 return self.obsolete
484
485 - def makeobsolete(self):
486 """Makes this unit obsolete""" 487 self.obsolete = True 488 if self.msgctxt: 489 self.obsoletemsgctxt = self.msgctxt 490 if self.msgid: 491 self.obsoletemsgid = self.msgid 492 self.msgid = [] 493 if self.msgidcomments: 494 self.obsoletemsgidcomments = self.msgidcomments 495 self.msgidcomments = [] 496 if self.msgid_plural: 497 self.obsoletemsgid_plural = self.msgid_plural 498 self.msgid_plural = [] 499 if self.msgstr: 500 self.obsoletemsgstr = self.msgstr 501 self.msgstr = [] 502 self.sourcecomments = [] 503 self.automaticcomments = []
504
505 - def resurrect(self):
506 """Makes an obsolete unit normal""" 507 self.obsolete = False 508 if self.obsoletemsgctxt: 509 self.msgid = self.obsoletemsgctxt 510 self.obsoletemsgctxt = [] 511 if self.obsoletemsgid: 512 self.msgid = self.obsoletemsgid 513 self.obsoletemsgid = [] 514 if self.obsoletemsgidcomments: 515 self.msgidcomments = self.obsoletemsgidcomments 516 self.obsoletemsgidcomments = [] 517 if self.obsoletemsgid_plural: 518 self.msgid_plural = self.obsoletemsgid_plural 519 self.obsoletemsgid_plural = [] 520 if self.obsoletemsgstr: 521 self.msgstr = self.obsoletemsgstr 522 self.obsoletemgstr = []
523
524 - def hasplural(self):
525 """returns whether this pounit contains plural strings...""" 526 return len(self.msgid_plural) > 0
527
528 - def parse(self, src):
529 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
530
531 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
532 if isinstance(partlines, dict): 533 partkeys = partlines.keys() 534 partkeys.sort() 535 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 536 partstr = partname + " " 537 partstartline = 0 538 if len(partlines) > 0 and len(partcomments) == 0: 539 partstr += partlines[0] 540 partstartline = 1 541 elif len(partcomments) > 0: 542 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 543 # if there is a blank leader line, it must come before the comment 544 partstr += partlines[0] + '\n' 545 # but if the whole string is blank, leave it in 546 if len(partlines) > 1: 547 partstartline += 1 548 else: 549 # All partcomments should start on a newline 550 partstr += '""\n' 551 # combine comments into one if more than one 552 if len(partcomments) > 1: 553 combinedcomment = [] 554 for comment in partcomments: 555 comment = unquotefrompo([comment]) 556 if comment.startswith("_:"): 557 comment = comment[len("_:"):] 558 if comment.endswith("\\n"): 559 comment = comment[:-len("\\n")] 560 #Before we used to strip. Necessary in some cases? 561 combinedcomment.append(comment) 562 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 563 # comments first, no blank leader line needed 564 partstr += "\n".join(partcomments) 565 partstr = quote.rstripeol(partstr) 566 else: 567 partstr += '""' 568 partstr += '\n' 569 # add the rest 570 for partline in partlines[partstartline:]: 571 partstr += partline + '\n' 572 return partstr
573
574 - def _encodeifneccessary(self, output):
575 """encodes unicode strings and returns other strings unchanged""" 576 if isinstance(output, unicode): 577 encoding = encodingToUse(getattr(self, "encoding", "UTF-8")) 578 return output.encode(encoding) 579 return output
580
581 - def __str__(self):
582 """convert to a string. double check that unicode is handled somehow here""" 583 output = self._getoutput() 584 return self._encodeifneccessary(output)
585
586 - def _getoutput(self):
587 """return this po element as a string""" 588 def add_prev_msgid_lines(lines, header, var): 589 if len(var) > 0: 590 lines.append("#| %s %s\n" % (header, var[0])) 591 lines.extend("#| %s\n" % line for line in var[1:])
592 593 def add_prev_msgid_info(lines): 594 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt) 595 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid) 596 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural) 597 598 lines = [] 599 lines.extend(self.othercomments) 600 if self.isobsolete(): 601 lines.extend(self.typecomments) 602 obsoletelines = [] 603 if self.obsoletemsgctxt: 604 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt)) 605 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments)) 606 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments: 607 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments)) 608 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr)) 609 for index, obsoleteline in enumerate(obsoletelines): 610 # We need to account for a multiline msgid or msgstr here 611 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 612 lines.extend(obsoletelines) 613 lines = [self._encodeifneccessary(line) for line in lines] 614 return "".join(lines) 615 # if there's no msgid don't do msgid and string, unless we're the header 616 # this will also discard any comments other than plain othercomments... 617 if is_null(self.msgid): 618 if not (self.isheader() or self.getcontext() or self.sourcecomments): 619 return "".join(lines) 620 lines.extend(self.automaticcomments) 621 lines.extend(self.sourcecomments) 622 lines.extend(self.typecomments) 623 add_prev_msgid_info(lines) 624 if self.msgctxt: 625 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) 626 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments)) 627 if self.msgid_plural or self.msgid_pluralcomments: 628 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 629 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) 630 lines = [self._encodeifneccessary(line) for line in lines] 631 postr = "".join(lines) 632 return postr 633
634 - def getlocations(self):
635 """Get a list of locations from sourcecomments in the PO unit 636 637 rtype: List 638 return: A list of the locations with '#: ' stripped 639 640 """ 641 locations = [] 642 for sourcecomment in self.sourcecomments: 643 locations += quote.rstripeol(sourcecomment)[3:].split() 644 return locations
645
646 - def addlocation(self, location):
647 """Add a location to sourcecomments in the PO unit 648 649 @param location: Text location e.g. 'file.c:23' does not include #: 650 @type location: String 651 652 """ 653 self.sourcecomments.append("#: %s\n" % location)
654
655 - def _extract_msgidcomments(self, text=None):
656 """Extract KDE style msgid comments from the unit. 657 658 @rtype: String 659 @return: Returns the extracted msgidcomments found in this unit's msgid. 660 """ 661 662 if not text: 663 text = unquotefrompo(self.msgidcomments) 664 return text.split('\n')[0].replace('_: ', '', 1)
665
666 - def getcontext(self):
667 """Get the message context.""" 668 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
669
670 - def getid(self):
671 """Returns a unique identifier for this unit.""" 672 context = self.getcontext() 673 # Gettext does not consider the plural to determine duplicates, only 674 # the msgid. For generation of .mo files, we might want to use this 675 # code to generate the entry for the hash table, but for now, it is 676 # commented out for conformance to gettext. 677 # id = '\0'.join(self.source.strings) 678 id = self.source 679 if self.msgidcomments: 680 id = "_: %s\n%s" % (context, id) 681 elif context: 682 id = "%s\04%s" % (context, id) 683 return id
684
685 -class pofile(pocommon.pofile):
686 """this represents a .po file containing various units""" 687 UnitClass = pounit
688 - def __init__(self, inputfile=None, encoding=None, unitclass=pounit):
689 """construct a pofile, optionally reading in from inputfile. 690 encoding can be specified but otherwise will be read from the PO header""" 691 self.UnitClass = unitclass 692 pocommon.pofile.__init__(self, unitclass=unitclass) 693 self.units = [] 694 self.filename = '' 695 self._encoding = encodingToUse(encoding) 696 if inputfile is not None: 697 self.parse(inputfile)
698
699 - def changeencoding(self, newencoding):
700 """Deprecated: changes the encoding on the file.""" 701 # This should not be here but in poheader. It also shouldn't mangle the 702 # header itself, but use poheader methods. All users are removed, so 703 # we can deprecate after one release. 704 raise DeprecationWarning 705 706 self._encoding = encodingToUse(newencoding) 707 if not self.units: 708 return 709 header = self.header() 710 if not header or header.isblank(): 711 return 712 charsetline = None 713 headerstr = unquotefrompo(header.msgstr) 714 for line in headerstr.split("\n"): 715 if not ":" in line: 716 continue 717 key, value = line.strip().split(":", 1) 718 if key.strip() != "Content-Type": 719 continue 720 charsetline = line 721 if charsetline is None: 722 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 723 else: 724 charset = re.search("charset=([^ ]*)", charsetline) 725 if charset is None: 726 newcharsetline = charsetline 727 if not newcharsetline.strip().endswith(";"): 728 newcharsetline += ";" 729 newcharsetline += " charset=%s" % self._encoding 730 else: 731 charset = charset.group(1) 732 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 733 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 734 header.msgstr = quoteforpo(headerstr)
735
736 - def parse(self, input):
737 """parses the given file or file source string""" 738 try: 739 if hasattr(input, 'name'): 740 self.filename = input.name 741 elif not getattr(self, 'filename', ''): 742 self.filename = '' 743 if isinstance(input, str): 744 input = cStringIO.StringIO(input) 745 poparser.parse_units(poparser.ParseState(input, pounit), self) 746 except Exception, e: 747 raise base.ParseError(e)
748
749 - def removeduplicates(self, duplicatestyle="merge"):
750 """make sure each msgid is unique ; merge comments etc from duplicates into original""" 751 # TODO: can we handle consecutive calls to removeduplicates()? What 752 # about files already containing msgctxt? - test 753 id_dict = {} 754 uniqueunits = [] 755 # TODO: this is using a list as the pos aren't hashable, but this is slow. 756 # probably not used frequently enough to worry about it, though. 757 markedpos = [] 758 def addcomment(thepo): 759 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 760 markedpos.append(thepo)
761 for thepo in self.units: 762 id = thepo.getid() 763 if thepo.isheader() and not thepo.getlocations(): 764 # header msgids shouldn't be merged... 765 uniqueunits.append(thepo) 766 elif id in id_dict: 767 if duplicatestyle == "merge": 768 if id: 769 id_dict[id].merge(thepo) 770 else: 771 addcomment(thepo) 772 uniqueunits.append(thepo) 773 elif duplicatestyle == "msgctxt": 774 origpo = id_dict[id] 775 if origpo not in markedpos: 776 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations()))) 777 markedpos.append(thepo) 778 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 779 uniqueunits.append(thepo) 780 else: 781 if not id: 782 if duplicatestyle == "merge": 783 addcomment(thepo) 784 else: 785 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 786 id_dict[id] = thepo 787 uniqueunits.append(thepo) 788 self.units = uniqueunits
789
790 - def __str__(self):
791 """convert to a string. double check that unicode is handled somehow here""" 792 output = self._getoutput() 793 if isinstance(output, unicode): 794 return output.encode(getattr(self, "encoding", "UTF-8")) 795 return output
796
797 - def _getoutput(self):
798 """convert the units back to lines""" 799 lines = [] 800 for unit in self.units: 801 unitsrc = str(unit) + "\n" 802 lines.append(unitsrc) 803 lines = "".join(self.encode(lines)).rstrip() 804 #After the last pounit we will have \n\n and we only want to end in \n: 805 if lines: 806 lines += "\n" 807 return lines
808
809 - def encode(self, lines):
810 """encode any unicode strings in lines in self._encoding""" 811 newlines = [] 812 encoding = self._encoding 813 if encoding is None or encoding.lower() == "charset": 814 encoding = 'UTF-8' 815 for line in lines: 816 if isinstance(line, unicode): 817 line = line.encode(encoding) 818 newlines.append(line) 819 return newlines
820
821 - def decode(self, lines):
822 """decode any non-unicode strings in lines with self._encoding""" 823 newlines = [] 824 for line in lines: 825 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 826 try: 827 line = line.decode(self._encoding) 828 except UnicodeError, e: 829 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 830 newlines.append(line) 831 return newlines
832
833 - def unit_iter(self):
834 for unit in self.units: 835 if not (unit.isheader() or unit.isobsolete()): 836 yield unit
837