1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 from UserDict import UserDict
22
23 """Module to provide a cache of statistics in a database.
24
25 @organization: Zuza Software Foundation
26 @copyright: 2007 Zuza Software Foundation
27 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
28 """
29
30 from translate import __version__ as toolkitversion
31 from translate.storage import factory
32 from translate.misc.multistring import multistring
33 from translate.lang.common import Common
34
35 try:
36 from sqlite3 import dbapi2
37 except ImportError:
38 from pysqlite2 import dbapi2
39 import os.path
40 import re
41 import sys
42 import stat
43 import thread
44
45 kdepluralre = re.compile("^_n: ")
46 brtagre = re.compile("<br\s*?/?>")
47 xmltagre = re.compile("<[^>]+>")
48 numberre = re.compile("\\D\\.\\D")
49
50 state_strings = {0: "untranslated", 1: "translated", 2: "fuzzy"}
61
63 """Counts the words in the unit's source and target, taking plurals into
64 account. The target words are only counted if the unit is translated."""
65 (sourcewords, targetwords) = (0, 0)
66 if isinstance(unit.source, multistring):
67 sourcestrings = unit.source.strings
68 else:
69 sourcestrings = [unit.source or ""]
70 for s in sourcestrings:
71 sourcewords += wordcount(s)
72 if not unit.istranslated():
73 return sourcewords, targetwords
74 if isinstance(unit.target, multistring):
75 targetstrings = unit.target.strings
76 else:
77 targetstrings = [unit.target or ""]
78 for s in targetstrings:
79 targetwords += wordcount(s)
80 return sourcewords, targetwords
81
83 - def __init__(self, record_keys, record_values=None, compute_derived_values = lambda x: x):
90
92 return tuple(self[key] for key in self.record_keys)
93
100
107
110
112 """Modifies f to commit database changes if it executes without exceptions.
113 Otherwise it rolls back the database.
114
115 ALL publicly accessible methods in StatsCache MUST be decorated with this
116 decorator.
117 """
118
119 def decorated_f(self, *args, **kwargs):
120 try:
121 result = f(self, *args, **kwargs)
122 self.con.commit()
123 return result
124 except:
125
126
127
128 if self.con:
129 self.con.rollback()
130 raise
131 return decorated_f
132
133 UNTRANSLATED, TRANSLATED, FUZZY = 0, 1, 2
135 """Returns the numeric database state for the unit."""
136 if unit.istranslated():
137 return TRANSLATED
138 if unit.isfuzzy() and unit.target:
139 return FUZZY
140 return UNTRANSLATED
141
143 keys = ['translatedsourcewords',
144 'fuzzysourcewords',
145 'untranslatedsourcewords',
146 'translated',
147 'fuzzy',
148 'untranslated',
149 'translatedtargetwords']
150
153
155 self.cur = cur
156 self.cur.execute("""
157 CREATE TABLE IF NOT EXISTS filetotals(
158 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
159 translatedsourcewords INTEGER NOT NULL,
160 fuzzysourcewords INTEGER NOT NULL,
161 untranslatedsourcewords INTEGER NOT NULL,
162 translated INTEGER NOT NULL,
163 fuzzy INTEGER NOT NULL,
164 untranslated INTEGER NOT NULL,
165 translatedtargetwords INTEGER NOT NULL);""")
166
167 - def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
168 record = Record(cls.keys, compute_derived_values = cls._compute_derived_values)
169 if state_for_db is not None:
170 if state_for_db is UNTRANSLATED:
171 record['untranslated'] = 1
172 record['untranslatedsourcewords'] = sourcewords
173 if state_for_db is TRANSLATED:
174 record['translated'] = 1
175 record['translatedsourcewords'] = sourcewords
176 record['translatedtargetwords'] = targetwords
177 elif state_for_db is FUZZY:
178 record['fuzzy'] = 1
179 record['fuzzysourcewords'] = sourcewords
180 return record
181
182 new_record = classmethod(new_record)
183
185 record["total"] = record["untranslated"] + \
186 record["translated"] + \
187 record["fuzzy"]
188 record["totalsourcewords"] = record["untranslatedsourcewords"] + \
189 record["translatedsourcewords"] + \
190 record["fuzzysourcewords"]
191 record["review"] = 0
192 _compute_derived_values = classmethod(_compute_derived_values)
193
200
202 self.cur.execute("""
203 INSERT OR REPLACE into filetotals
204 VALUES (%(fileid)d, %(vals)s);
205 """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
206
208 self.cur.execute("""
209 DELETE FROM filetotals
210 WHERE fileid=?;
211 """, (fileid,))
212
214 """Returns a dictionary with all statistics initalised to 0."""
215 return FileTotals.new_record()
216
219
221 return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
222
224 return {"sourcewordcount": [], "targetwordcount": []}
225
233 file_stat = os.stat(file_path)
234 assert not stat.S_ISDIR(file_stat.st_mode)
235 return file_stat.st_mtime, file_stat.st_size
236
238 return os.path.extsep + 'pending'
239
242
245 """An object instantiated as a singleton for each statsfile that provides
246 access to the database cache from a pool of StatsCache objects."""
247 _caches = {}
248 defaultfile = None
249 con = None
250 """This cache's connection"""
251 cur = None
252 """The current cursor"""
253
255 current_thread = thread.get_ident()
256 def make_database(statsfile):
257 def connect(cache):
258 cache.con = dbapi2.connect(statsfile)
259 cache.cur = cache.con.cursor()
260
261 def clear_old_data(cache):
262 try:
263 cache.cur.execute("""SELECT toolkitbuild FROM files""")
264 val = cache.cur.fetchone()
265
266
267 if val is None or val[0] < toolkitversion.build:
268 cache.con.close()
269 del cache
270 os.unlink(statsfile)
271 return True
272 return False
273 except dbapi2.OperationalError:
274 return False
275
276 cache = cls._caches.setdefault(current_thread, {})[statsfile] = object.__new__(cls)
277 connect(cache)
278 if clear_old_data(cache):
279 connect(cache)
280 cache.create()
281 return cache
282
283 if not statsfile:
284 if not cls.defaultfile:
285 userdir = os.path.expanduser("~")
286 cachedir = None
287 if os.name == "nt":
288 cachedir = os.path.join(userdir, "Translate Toolkit")
289 else:
290 cachedir = os.path.join(userdir, ".translate_toolkit")
291 if not os.path.exists(cachedir):
292 os.mkdir(cachedir)
293 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db"))
294 statsfile = cls.defaultfile
295 else:
296 statsfile = os.path.realpath(statsfile)
297
298 if current_thread in cls._caches and statsfile in cls._caches[current_thread]:
299 return cls._caches[current_thread][statsfile]
300
301 return make_database(statsfile)
302
303 @transaction
305 """Create all tables and indexes."""
306 self.file_totals = FileTotals(self.cur)
307
308 self.cur.execute("""CREATE TABLE IF NOT EXISTS files(
309 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
310 path VARCHAR NOT NULL UNIQUE,
311 st_mtime INTEGER NOT NULL,
312 st_size INTEGER NOT NULL,
313 toolkitbuild INTEGER NOT NULL);""")
314
315 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex
316 ON files (path);""")
317
318 self.cur.execute("""CREATE TABLE IF NOT EXISTS units(
319 id INTEGER PRIMARY KEY AUTOINCREMENT,
320 unitid VARCHAR NOT NULL,
321 fileid INTEGER NOT NULL,
322 unitindex INTEGER NOT NULL,
323 source VARCHAR NOT NULL,
324 target VARCHAR,
325 state INTEGER,
326 sourcewords INTEGER,
327 targetwords INTEGER);""")
328
329 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex
330 ON units(fileid);""")
331
332 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs(
333 configid INTEGER PRIMARY KEY AUTOINCREMENT,
334 config VARCHAR);""")
335
336 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex
337 ON checkerconfigs(config);""")
338
339 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors(
340 errorid INTEGER PRIMARY KEY AUTOINCREMENT,
341 unitindex INTEGER NOT NULL,
342 fileid INTEGER NOT NULL,
343 configid INTEGER NOT NULL,
344 name VARCHAR NOT NULL,
345 message VARCHAR);""")
346
347 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex
348 ON uniterrors(fileid, configid);""")
349
350 @transaction
351 - def _getfileid(self, filename, check_mod_info=True, store=None):
352 """return fileid representing the given file in the statscache.
353
354 if file not in cache or has been updated since last record
355 update, recalculate stats.
356
357 optional argument store can be used to avoid unnessecary
358 reparsing of already loaded translation files.
359
360 store can be a TranslationFile object or a callback that returns one.
361 """
362 realpath = os.path.realpath(filename)
363 self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files
364 WHERE path=?;""", (realpath,))
365 filerow = self.cur.fetchone()
366 mod_info = get_mod_info(realpath)
367 if filerow:
368 fileid = filerow[0]
369 if not check_mod_info:
370
371 self.cur.execute("""UPDATE files
372 SET st_mtime=?, st_size=?
373 WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid))
374 return fileid
375 if (filerow[1], filerow[2]) == mod_info:
376 return fileid
377
378 assert check_mod_info
379 if callable(store):
380 store = store()
381 else:
382 store = store or factory.getobject(realpath)
383
384 return self._cachestore(store, realpath, mod_info)
385
387 """See if this checker configuration has been used before."""
388 config = str(checker.config.__dict__)
389 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE
390 config=?;""", (config,))
391 configrow = self.cur.fetchone()
392 if not configrow or configrow[1] != config:
393 return None
394 else:
395 return configrow[0]
396
397 @transaction
399 """Cache the statistics for the supplied unit(s)."""
400 unitvalues = []
401 for index, unit in enumerate(units):
402 if unit.istranslatable():
403 sourcewords, targetwords = wordsinunit(unit)
404 if unitindex:
405 index = unitindex
406
407 unitvalues.append((unit.getid(), fileid, index, \
408 unit.source, unit.target, \
409 sourcewords, targetwords, \
410 statefordb(unit)))
411 file_totals_record = file_totals_record + FileTotals.new_record(statefordb(unit), sourcewords, targetwords)
412
413 self.cur.executemany("""INSERT INTO units
414 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state)
415 values (?, ?, ?, ?, ?, ?, ?, ?);""",
416 unitvalues)
417 self.file_totals[fileid] = file_totals_record
418 if unitindex:
419 return state_strings[statefordb(units[0])]
420 return ""
421
422 @transaction
424 """Calculates and caches the statistics of the given store
425 unconditionally."""
426 self.cur.execute("""DELETE FROM files WHERE
427 path=?;""", (realpath,))
428 self.cur.execute("""INSERT INTO files
429 (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""",
430 (realpath, mod_info[0], mod_info[1], toolkitversion.build))
431 fileid = self.cur.lastrowid
432 self.cur.execute("""DELETE FROM units WHERE
433 fileid=?""", (fileid,))
434 self._cacheunitstats(store.units, fileid)
435 return fileid
436
438 """Retrieves the statistics for the given file if possible, otherwise
439 delegates to cachestore()."""
440 return self.file_totals[self._getfileid(filename, store=store)]
441
442 @transaction
444 """Helper method for cachestorechecks() and recacheunit()"""
445
446
447 dummy = (-1, fileid, configid, "noerror", "")
448 unitvalues = [dummy]
449
450 errornames = []
451 for index, unit in enumerate(units):
452 if unit.istranslatable():
453
454 if unitindex:
455 index = unitindex
456 failures = checker.run_filters(unit)
457 for checkname, checkmessage in failures.iteritems():
458 unitvalues.append((index, fileid, configid, checkname, checkmessage))
459 errornames.append("check-" + checkname)
460 checker.setsuggestionstore(None)
461
462 if unitindex:
463
464
465 unitvalues.remove(dummy)
466 errornames.append("total")
467
468
469 self.cur.executemany("""INSERT INTO uniterrors
470 (unitindex, fileid, configid, name, message)
471 values (?, ?, ?, ?, ?);""",
472 unitvalues)
473 return errornames
474
475 @transaction
477 """Calculates and caches the error statistics of the given store
478 unconditionally."""
479
480
481 self.cur.execute("""DELETE FROM uniterrors WHERE
482 fileid=?;""", (fileid,))
483 self._cacheunitschecks(store.units, fileid, configid, checker)
484 return fileid
485
487 values = self.cur.execute("""
488 SELECT state, sourcewords, targetwords
489 FROM units
490 WHERE fileid=? AND unitid=?
491 """, (fileid, unitid))
492 result = values.fetchone()
493 if result is not None:
494 return result
495 else:
496 print >> sys.stderr, """WARNING: Database in inconsistent state.
497 fileid %d and unitid %d have no entries in the table units.""" % (fileid, unitid)
498
499
500
501 return []
502
503 @transaction
505 """Recalculate all information for a specific unit. This is necessary
506 for updating all statistics when a translation of a unit took place,
507 for example.
508
509 This method assumes that everything was up to date before (file totals,
510 checks, checker config, etc."""
511 fileid = self._getfileid(filename, check_mod_info=False)
512 configid = self._get_config_id(fileid, checker)
513 unitid = unit.getid()
514
515 totals_without_unit = self.file_totals[fileid] - \
516 FileTotals.new_record(*self.get_unit_stats(fileid, unitid))
517 self.cur.execute("""SELECT unitindex FROM units WHERE
518 fileid=? AND unitid=?;""", (fileid, unitid))
519 unitindex = self.cur.fetchone()[0]
520 self.cur.execute("""DELETE FROM units WHERE
521 fileid=? AND unitid=?;""", (fileid, unitid))
522 state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)]
523
524 self.cur.execute("""DELETE FROM uniterrors WHERE
525 fileid=? AND unitindex=?;""", (fileid, unitindex))
526 if os.path.exists(suggestion_filename(filename)):
527 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
528 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex))
529 return state
530
531 - def _checkerrors(self, filename, fileid, configid, checker, store):
532 def geterrors():
533 self.cur.execute("""SELECT
534 name,
535 unitindex
536 FROM uniterrors WHERE fileid=? and configid=?
537 ORDER BY unitindex;""", (fileid, configid))
538 return self.cur.fetchone(), self.cur
539
540 first, cur = geterrors()
541 if first is not None:
542 return first, cur
543
544
545
546 if callable(store):
547 store = store()
548 else:
549 store = store or factory.getobject(filename)
550
551 if os.path.exists(suggestion_filename(filename)):
552 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
553 self._cachestorechecks(fileid, store, checker, configid)
554 return geterrors()
555
556 - def _geterrors(self, filename, fileid, configid, checker, store):
557 result = []
558 first, cur = self._checkerrors(filename, fileid, configid, checker, store)
559 result.append(first)
560 result.extend(cur.fetchall())
561 return result
562
563 @transaction
565 configid = self._getstoredcheckerconfig(checker)
566 if configid:
567 return configid
568 self.cur.execute("""INSERT INTO checkerconfigs
569 (configid, config) values (NULL, ?);""",
570 (str(checker.config.__dict__),))
571 return self.cur.lastrowid
572
573 - def filechecks(self, filename, checker, store=None):
574 """Retrieves the error statistics for the given file if possible,
575 otherwise delegates to cachestorechecks()."""
576 fileid = self._getfileid(filename, store=store)
577 configid = self._get_config_id(fileid, checker)
578 values = self._geterrors(filename, fileid, configid, checker, store)
579
580 errors = emptyfilechecks()
581 for value in values:
582 if value[1] == -1:
583 continue
584 checkkey = 'check-' + value[0]
585 if not checkkey in errors:
586 errors[checkkey] = []
587 errors[checkkey].append(value[1])
588
589 return errors
590
592 fileid = self._getfileid(filename)
593 configid = self._get_config_id(fileid, checker)
594 self._checkerrors(filename, fileid, configid, checker, None)
595 self.cur.execute("""SELECT
596 name,
597 unitindex
598 FROM uniterrors
599 WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name))
600 return self.cur.fetchone() is not None
601
603 """Return a dictionary of unit stats mapping sets of unit
604 indices with those states"""
605 stats = emptyfilestats()
606 fileid = self._getfileid(filename, store=store)
607
608 self.cur.execute("""SELECT
609 state,
610 unitindex
611 FROM units WHERE fileid=?
612 ORDER BY unitindex;""", (fileid,))
613 values = self.cur.fetchall()
614
615 for value in values:
616 stats[state_strings[value[0]]].append(value[1])
617 stats["total"].append(value[1])
618
619 return stats
620
621 - def filestats(self, filename, checker, store=None):
628
629 - def unitstats(self, filename, _lang=None, store=None):
630
631
632
633 """Return a dictionary of property names mapping to arrays which
634 map unit indices to property values.
635
636 Please note that this is different from filestats, since filestats
637 supplies sets of unit indices with a given property, whereas this
638 method supplies arrays which map unit indices to given values."""
639 stats = emptyunitstats()
640
641
642 fileid = self._getfileid(filename, store=store)
643
644 self.cur.execute("""SELECT
645 sourcewords, targetwords
646 FROM units WHERE fileid=?
647 ORDER BY unitindex;""", (fileid,))
648
649 for sourcecount, targetcount in self.cur.fetchall():
650 stats["sourcewordcount"].append(sourcecount)
651 stats["targetwordcount"].append(targetcount)
652
653 return stats
654