Package translate :: Package storage :: Module csvl10n
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.csvl10n

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """classes that hold units of comma-separated values (.csv) files (csvunit) 
 23  or entire files (csvfile) for use with localisation 
 24  """ 
 25   
 26  import csv 
 27   
 28  from translate.misc import sparse 
 29  from translate.storage import base 
 30   
 31   
32 -class SimpleDictReader:
33 - def __init__(self, fileobj, fieldnames):
34 self.fieldnames = fieldnames 35 self.contents = fileobj.read() 36 self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"], whitespacechars="\r") 37 self.parser.stringescaping = 0 38 self.parser.quotechars = '"' 39 self.tokens = self.parser.tokenize(self.contents) 40 self.tokenpos = 0
41
42 - def __iter__(self):
43 return self
44
45 - def getvalue(self, value):
46 """returns a value, evaluating strings as neccessary""" 47 if (value.startswith("'") and value.endswith("'")) or (value.startswith('"') and value.endswith('"')): 48 return sparse.stringeval(value) 49 else: 50 return value
51
52 - def next(self):
53 lentokens = len(self.tokens) 54 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n": 55 self.tokenpos += 1 56 if self.tokenpos >= lentokens: 57 raise StopIteration() 58 thistokens = [] 59 while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n": 60 thistokens.append(self.tokens[self.tokenpos]) 61 self.tokenpos += 1 62 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n": 63 self.tokenpos += 1 64 fields = [] 65 # patch together fields since we can have quotes inside a field 66 currentfield = '' 67 fieldparts = 0 68 for token in thistokens: 69 if token == ',': 70 # a field is only quoted if the whole thing is quoted 71 if fieldparts == 1: 72 currentfield = self.getvalue(currentfield) 73 fields.append(currentfield) 74 currentfield = '' 75 fieldparts = 0 76 else: 77 currentfield += token 78 fieldparts += 1 79 # things after the last comma... 80 if fieldparts: 81 if fieldparts == 1: 82 currentfield = self.getvalue(currentfield) 83 fields.append(currentfield) 84 values = {} 85 for fieldnum in range(len(self.fieldnames)): 86 if fieldnum >= len(fields): 87 values[self.fieldnames[fieldnum]] = "" 88 else: 89 values[self.fieldnames[fieldnum]] = fields[fieldnum] 90 return values
91 92
93 -class csvunit(base.TranslationUnit):
94 spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
95 - def __init__(self, source=None):
96 super(csvunit, self).__init__(source) 97 self.comment = "" 98 self.source = source 99 self.target = ""
100
101 - def add_spreadsheet_escapes(self, source, target):
102 """add common spreadsheet escapes to two strings""" 103 for unescaped, escaped in self.spreadsheetescapes: 104 if source.startswith(unescaped): 105 source = source.replace(unescaped, escaped, 1) 106 if target.startswith(unescaped): 107 target = target.replace(unescaped, escaped, 1) 108 return source, target
109
110 - def remove_spreadsheet_escapes(self, source, target):
111 """remove common spreadsheet escapes from two strings""" 112 for unescaped, escaped in self.spreadsheetescapes: 113 if source.startswith(escaped): 114 source = source.replace(escaped, unescaped, 1) 115 if target.startswith(escaped): 116 target = target.replace(escaped, unescaped, 1) 117 return source, target
118
119 - def fromdict(self, cedict):
120 self.comment = cedict.get('location', '').decode('utf-8') 121 self.source = cedict.get('source', '').decode('utf-8') 122 self.target = cedict.get('target', '').decode('utf-8') 123 if self.comment is None: 124 self.comment = '' 125 if self.source is None: 126 self.source = '' 127 if self.target is None: 128 self.target = '' 129 self.source, self.target = self.remove_spreadsheet_escapes(self.source, self.target)
130
131 - def todict(self, encoding='utf-8'):
132 comment, source, target = self.comment, self.source, self.target 133 source, target = self.add_spreadsheet_escapes(source, target) 134 if isinstance(comment, unicode): 135 comment = comment.encode(encoding) 136 if isinstance(source, unicode): 137 source = source.encode(encoding) 138 if isinstance(target, unicode): 139 target = target.encode(encoding) 140 return {'location': comment, 'source': source, 'target': target}
141
142 -class csvfile(base.TranslationStore):
143 """This class represents a .csv file with various lines. 144 The default format contains three columns: location, source, target""" 145 UnitClass = csvunit 146 Name = _("Comma Separated Value") 147 Mimetypes = ['text/comma-separated-values', 'text/csv'] 148 Extensions = ["csv"]
149 - def __init__(self, inputfile=None, fieldnames=None):
150 base.TranslationStore.__init__(self, unitclass=self.UnitClass) 151 self.units = [] 152 if fieldnames is None: 153 self.fieldnames = ['location', 'source', 'target'] 154 else: 155 if isinstance(fieldnames, basestring): 156 fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")] 157 self.fieldnames = fieldnames 158 self.filename = getattr(inputfile, 'name', '') 159 if inputfile is not None: 160 csvsrc = inputfile.read() 161 inputfile.close() 162 self.parse(csvsrc)
163
164 - def parse(self, csvsrc):
165 csvfile = csv.StringIO(csvsrc) 166 reader = SimpleDictReader(csvfile, self.fieldnames) 167 for row in reader: 168 newce = self.UnitClass() 169 newce.fromdict(row) 170 self.addunit(newce)
171
172 - def __str__(self):
173 """convert to a string. double check that unicode is handled somehow here""" 174 source = self.getoutput() 175 if isinstance(source, unicode): 176 return source.encode(getattr(self, "encoding", "UTF-8")) 177 return source
178
179 - def getoutput(self):
180 csvfile = csv.StringIO() 181 writer = csv.DictWriter(csvfile, self.fieldnames) 182 for ce in self.units: 183 cedict = ce.todict() 184 writer.writerow(cedict) 185 csvfile.reset() 186 return "".join(csvfile.readlines())
187