Package translate :: Package tools :: Module pocount
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pocount

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2003-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Create string and word counts for supported localization files including: 
 22  XLIFF, TMX, Gettex PO and MO, Qt .ts and .qm, Wordfast TM, etc 
 23   
 24  See: http://translate.sourceforge.net/wiki/toolkit/pocount for examples and 
 25  usage instructions 
 26  """ 
 27   
 28  from translate.storage import factory 
 29  from translate.storage import statsdb 
 30  from optparse import OptionParser 
 31  import sys 
 32  import os 
 33   
 34  # define style constants 
 35  style_full, style_csv, style_short_strings, style_short_words = range(4) 
 36   
 37  # default output style 
 38  default_style = style_full 
 39   
 40   
41 -def calcstats_old(filename):
42 """This is the previous implementation of calcstats() and is left for 43 comparison and debuging purposes.""" 44 # ignore totally blank or header units 45 try: 46 store = factory.getobject(filename) 47 except ValueError, e: 48 print str(e) 49 return {} 50 units = filter(lambda unit: not unit.isheader(), store.units) 51 translated = translatedmessages(units) 52 fuzzy = fuzzymessages(units) 53 review = filter(lambda unit: unit.isreview(), units) 54 untranslated = untranslatedmessages(units) 55 wordcounts = dict(map(lambda unit: (unit, statsdb.wordsinunit(unit)), units)) 56 sourcewords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][0], elementlist)) 57 targetwords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][1], elementlist)) 58 stats = {} 59 60 #units 61 stats["translated"] = len(translated) 62 stats["fuzzy"] = len(fuzzy) 63 stats["untranslated"] = len(untranslated) 64 stats["review"] = len(review) 65 stats["total"] = stats["translated"] + stats["fuzzy"] + stats["untranslated"] 66 67 #words 68 stats["translatedsourcewords"] = sourcewords(translated) 69 stats["translatedtargetwords"] = targetwords(translated) 70 stats["fuzzysourcewords"] = sourcewords(fuzzy) 71 stats["untranslatedsourcewords"] = sourcewords(untranslated) 72 stats["reviewsourcewords"] = sourcewords(review) 73 stats["totalsourcewords"] = stats["translatedsourcewords"] + \ 74 stats["fuzzysourcewords"] + \ 75 stats["untranslatedsourcewords"] 76 return stats
77
78 -def calcstats(filename):
79 statscache = statsdb.StatsCache() 80 return statscache.filetotals(filename)
81
82 -def summarize(title, stats, style=style_full, indent=8, incomplete_only=False):
83 """ 84 Print summary for a .po file in specified format. 85 86 @param title: name of .po file 87 @param stats: array with translation statistics for the file specified 88 @param indent: indentation of the 2nd column (length of longest filename) 89 @param incomplete_only: omit fully translated files 90 @type incomplete_only: Boolean 91 @rtype: Boolean 92 @return: 1 if counting incomplete files (incomplete_only=True) and the 93 file is completely translated, 0 otherwise 94 """ 95 def percent(denominator, devisor): 96 if devisor == 0: 97 return 0 98 else: 99 return denominator*100/devisor
100 101 if incomplete_only and (stats["total"] == stats["translated"]): 102 return 1 103 104 if (style == style_csv): 105 print "%s, " % title, 106 print "%d, %d, %d," % (stats["translated"], stats["translatedsourcewords"], stats["translatedtargetwords"]), 107 print "%d, %d," % (stats["fuzzy"], stats["fuzzysourcewords"]), 108 print "%d, %d," % (stats["untranslated"], stats["untranslatedsourcewords"]), 109 print "%d, %d" % (stats["total"], stats["totalsourcewords"]), 110 if stats["review"] > 0: 111 print ", %d, %d" % (stats["review"], stats["reviewsourdcewords"]), 112 print 113 elif (style == style_short_strings): 114 spaces = " "*(indent - len(title)) 115 print "%s%s strings: total: %d\t| %dt\t%df\t%du\t| %d%%t\t%d%%f\t%d%%u" % (title, spaces,\ 116 stats["total"], stats["translated"], stats["fuzzy"], stats["untranslated"], \ 117 percent(stats["translated"], stats["total"]), \ 118 percent(stats["fuzzy"], stats["total"]), \ 119 percent(stats["untranslated"], stats["total"])) 120 elif (style == style_short_words): 121 spaces = " "*(indent - len(title)) 122 print "%s%s source words: total: %d\t| %dt\t%df\t%du\t| %d%%t\t%d%%f\t%d%%u" % (title, spaces,\ 123 stats["totalsourcewords"], stats["translatedsourcewords"], stats["fuzzysourcewords"], stats["untranslatedsourcewords"], \ 124 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \ 125 percent(stats["fuzzysourcewords"], stats["totalsourcewords"]), \ 126 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"])) 127 else: # style == style_full 128 print title 129 print "type strings words (source) words (translation)" 130 print "translated: %5d (%3d%%) %10d (%3d%%) %15d" % \ 131 (stats["translated"], \ 132 percent(stats["translated"], stats["total"]), \ 133 stats["translatedsourcewords"], \ 134 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \ 135 stats["translatedtargetwords"]) 136 print "fuzzy: %5d (%3d%%) %10d (%3d%%) n/a" % \ 137 (stats["fuzzy"], \ 138 percent(stats["fuzzy"], stats["total"]), \ 139 stats["fuzzysourcewords"], \ 140 percent(stats["fuzzysourcewords"], stats["totalsourcewords"])) 141 print "untranslated: %5d (%3d%%) %10d (%3d%%) n/a" % \ 142 (stats["untranslated"], \ 143 percent(stats["untranslated"], stats["total"]), \ 144 stats["untranslatedsourcewords"], \ 145 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"])) 146 print "Total: %5d %17d %22d" % \ 147 (stats["total"], \ 148 stats["totalsourcewords"], \ 149 stats["translatedtargetwords"]) 150 if stats["review"] > 0: 151 print "review: %5d %17d n/a" % \ 152 (stats["review"], stats["reviewsourcewords"]) 153 print 154 return 0 155
156 -def fuzzymessages(units):
157 return filter(lambda unit: unit.isfuzzy() and unit.target, units)
158
159 -def translatedmessages(units):
160 return filter(lambda unit: unit.istranslated(), units)
161
162 -def untranslatedmessages(units):
163 return filter(lambda unit: not (unit.istranslated() or unit.isfuzzy()) and unit.source, units)
164
165 -class summarizer:
166 - def __init__(self, filenames, style=default_style, incomplete_only=False):
167 self.totals = {} 168 self.filecount = 0 169 self.longestfilename = 0 170 self.style = style 171 self.incomplete_only = incomplete_only 172 self.complete_count = 0 173 174 if (self.style == style_csv): 175 print "Filename, Translated Messages, Translated Source Words, Translated \ 176 Target Words, Fuzzy Messages, Fuzzy Source Words, Untranslated Messages, \ 177 Untranslated Source Words, Total Message, Total Source Words, \ 178 Review Messages, Review Source Words" 179 if (self.style == style_short_strings or self.style == style_short_words): 180 for filename in filenames: # find longest filename 181 if (len(filename) > self.longestfilename): 182 self.longestfilename = len(filename) 183 for filename in filenames: 184 if not os.path.exists(filename): 185 print >> sys.stderr, "cannot process %s: does not exist" % filename 186 continue 187 elif os.path.isdir(filename): 188 self.handledir(filename) 189 else: 190 self.handlefile(filename) 191 if self.filecount > 1 and (self.style == style_full): 192 if self.incomplete_only: 193 summarize("TOTAL (incomplete only):", self.totals, incomplete_only=True) 194 print "File count (incomplete): %5d" % (self.filecount - self.complete_count) 195 else: 196 summarize("TOTAL:", self.totals, incomplete_only=False) 197 print "File count: %5d" % (self.filecount) 198 print
199
200 - def updatetotals(self, stats):
201 """Update self.totals with the statistics in stats.""" 202 for key in stats.keys(): 203 if not self.totals.has_key(key): 204 self.totals[key] = 0 205 self.totals[key] += stats[key]
206
207 - def handlefile(self, filename):
208 try: 209 stats = calcstats(filename) 210 self.updatetotals(stats) 211 self.complete_count += summarize(filename, stats, self.style, self.longestfilename, self.incomplete_only) 212 self.filecount += 1 213 except: # This happens if we have a broken file. 214 print >> sys.stderr, sys.exc_info()[1]
215
216 - def handlefiles(self, dirname, filenames):
217 for filename in filenames: 218 pathname = os.path.join(dirname, filename) 219 if os.path.isdir(pathname): 220 self.handledir(pathname) 221 else: 222 self.handlefile(pathname)
223
224 - def handledir(self, dirname):
225 path, name = os.path.split(dirname) 226 if name in ["CVS", ".svn", "_darcs", ".git", ".hg", ".bzr"]: 227 return 228 entries = os.listdir(dirname) 229 self.handlefiles(dirname, entries)
230
231 -def main():
232 parser = OptionParser(usage="usage: %prog [options] po-files") 233 parser.add_option("--incomplete", action="store_const", const = True, dest = "incomplete_only", 234 help="skip 100% translated files.") 235 # options controlling output format: 236 parser.add_option("--full", action="store_const", const = style_csv, dest = "style_full", 237 help="(default) statistics in full, verbose format") 238 parser.add_option("--csv", action="store_const", const = style_csv, dest = "style_csv", 239 help="statistics in CSV format") 240 parser.add_option("--short", action="store_const", const = style_csv, dest = "style_short_strings", 241 help="same as --short-strings") 242 parser.add_option("--short-strings", action="store_const", const = style_csv, dest = "style_short_strings", 243 help="statistics of strings in short format - one line per file") 244 parser.add_option("--short-words", action="store_const", const = style_csv, dest = "style_short_words", 245 help="statistics of words in short format - one line per file") 246 247 (options, args) = parser.parse_args() 248 249 if (options.incomplete_only == None): 250 options.incomplete_only = False 251 252 if (options.style_full and options.style_csv) or \ 253 (options.style_full and options.style_short_strings) or \ 254 (options.style_full and options.style_short_words) or \ 255 (options.style_csv and options.style_short_strings) or \ 256 (options.style_csv and options.style_short_words) or \ 257 (options.style_short_strings and options.style_short_words): 258 parser.error("options --full, --csv, --short-strings and --short-words are mutually exclusive") 259 sys.exit(2) 260 261 style = default_style # default output style 262 if options.style_csv: 263 style = style_csv 264 if options.style_full: 265 style = style_full 266 if options.style_short_strings: 267 style = style_short_strings 268 if options.style_short_words: 269 style = style_short_words 270 271 try: 272 import psyco 273 psyco.full() 274 except Exception: 275 pass 276 277 summarizer(args, style, options.incomplete_only)
278 279 if __name__ == '__main__': 280 main() 281