Package translate :: Package storage :: Module statsdb
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.statsdb

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2007 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """Module to provide a cache of statistics in a database. 
 23   
 24  @organization: Zuza Software Foundation 
 25  @copyright: 2007 Zuza Software Foundation 
 26  @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>} 
 27  """ 
 28   
 29  from translate import __version__ as toolkitversion 
 30  from translate.storage import factory 
 31  from translate.misc.multistring import multistring 
 32  from translate.lang.common import Common 
 33   
 34  try: 
 35      from sqlite3 import dbapi2 
 36  except ImportError: 
 37      from pysqlite2 import dbapi2 
 38  import os.path 
 39  import re 
 40  import sys 
 41   
 42  kdepluralre = re.compile("^_n: ") 
 43  brtagre = re.compile("<br\s*?/?>") 
 44  xmltagre = re.compile("<[^>]+>") 
 45  numberre = re.compile("\\D\\.\\D") 
 46   
 47  state_strings = {0: "untranslated", 1: "translated", 2: "fuzzy"} 
 48   
49 -def wordcount(string):
50 # TODO: po class should understand KDE style plurals 51 string = kdepluralre.sub("", string) 52 string = brtagre.sub("\n", string) 53 string = xmltagre.sub("", string) 54 string = numberre.sub(" ", string) 55 #TODO: This should still use the correct language to count in the target 56 #language 57 return len(Common.words(string))
58
59 -def wordsinunit(unit):
60 """Counts the words in the unit's source and target, taking plurals into 61 account. The target words are only counted if the unit is translated.""" 62 (sourcewords, targetwords) = (0, 0) 63 if isinstance(unit.source, multistring): 64 sourcestrings = unit.source.strings 65 else: 66 sourcestrings = [unit.source or ""] 67 for s in sourcestrings: 68 sourcewords += wordcount(s) 69 if not unit.istranslated(): 70 return sourcewords, targetwords 71 if isinstance(unit.target, multistring): 72 targetstrings = unit.target.strings 73 else: 74 targetstrings = [unit.target or ""] 75 for s in targetstrings: 76 targetwords += wordcount(s) 77 return sourcewords, targetwords
78
79 -def statefordb(unit):
80 """Returns the numeric database state for the unit.""" 81 if unit.istranslated(): 82 return 1 83 if unit.isfuzzy() and unit.target: 84 return 2 85 return 0
86
87 -def emptystats():
88 """Returns a dictionary with all statistics initalised to 0.""" 89 stats = {} 90 for state in ["total", "translated", "fuzzy", "untranslated", "review"]: 91 stats[state] = 0 92 stats[state + "sourcewords"] = 0 93 stats[state + "targetwords"] = 0 94 return stats
95
96 -def suggestioninfo(filename):
97 """Provides the filename of the associated file containing suggestions and 98 its mtime, if it exists.""" 99 root, ext = os.path.splitext(filename) 100 suggestion_filename = None 101 suggestion_mtime = -1 102 if ext == os.path.extsep + "po": 103 # For a PO file there might be an associated file with suggested 104 # translations. If either file changed, we want to regenerate the 105 # statistics. 106 suggestion_filename = filename + os.path.extsep + 'pending' 107 if not os.path.exists(suggestion_filename): 108 suggestion_filename = None 109 else: 110 suggestion_mtime = os.path.getmtime(suggestion_filename) 111 return suggestion_filename, suggestion_mtime
112
113 -class StatsCache(object):
114 """An object instantiated as a singleton for each statsfile that provides 115 access to the database cache from a pool of StatsCache objects.""" 116 caches = {} 117 defaultfile = None 118 con = None 119 """This cache's connection""" 120 cur = None 121 """The current cursor""" 122
123 - def __new__(cls, statsfile=None):
124 if not statsfile: 125 if not cls.defaultfile: 126 userdir = os.path.expanduser("~") 127 cachedir = None 128 if os.name == "nt": 129 cachedir = os.path.join(userdir, "Translate Toolkit") 130 else: 131 cachedir = os.path.join(userdir, ".translate_toolkit") 132 if not os.path.exists(cachedir): 133 os.mkdir(cachedir) 134 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db")) 135 statsfile = cls.defaultfile 136 else: 137 statsfile = os.path.realpath(statsfile) 138 # First see if a cache for this file already exists: 139 if statsfile in cls.caches: 140 return cls.caches[statsfile] 141 # No existing cache. Let's build a new one and keep a copy 142 cache = cls.caches[statsfile] = object.__new__(cls) 143 cache.con = dbapi2.connect(statsfile) 144 cache.cur = cache.con.cursor() 145 cache.create() 146 return cache
147
148 - def create(self):
149 """Create all tables and indexes.""" 150 self.cur.execute("""CREATE TABLE IF NOT EXISTS files( 151 fileid INTEGER PRIMARY KEY AUTOINCREMENT, 152 path VARCHAR NOT NULL UNIQUE, 153 mtime INTEGER NOT NULL, 154 toolkitbuild INTEGER NOT NULL);""") 155 156 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex 157 ON files (path);""") 158 159 self.cur.execute("""CREATE TABLE IF NOT EXISTS units( 160 id INTEGER PRIMARY KEY AUTOINCREMENT, 161 unitid VARCHAR NOT NULL, 162 fileid INTEGER NOT NULL, 163 unitindex INTEGER NOT NULL, 164 source VARCHAR NOT NULL, 165 target VARCHAR, 166 state INTEGER, 167 sourcewords INTEGER, 168 targetwords INTEGER);""") 169 170 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex 171 ON units(fileid);""") 172 173 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs( 174 configid INTEGER PRIMARY KEY AUTOINCREMENT, 175 config VARCHAR);""") 176 177 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex 178 ON checkerconfigs(config);""") 179 180 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors( 181 errorid INTEGER PRIMARY KEY AUTOINCREMENT, 182 unitindex INTEGER NOT NULL, 183 fileid INTEGER NOT NULL, 184 configid INTEGER NOT NULL, 185 name VARCHAR NOT NULL, 186 message VARCHAR);""") 187 188 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex 189 ON uniterrors(fileid, configid);""") 190 191 self.con.commit()
192
193 - def _getstoredfileid(self, filename, optmtime=-1, checkmtime=True):
194 """Attempt to find the fileid of the given file, if it hasn't been 195 updated since the last record update. 196 197 None is returned if either the file's record is not found, or if it is 198 not up to date. 199 200 @param filename: the filename to retrieve the id for 201 @param optmtime: an optional mtime to consider in addition to the mtime of 202 the given file 203 @rtype: String or None 204 """ 205 realpath = os.path.realpath(filename) 206 self.cur.execute("""SELECT fileid, mtime FROM files 207 WHERE path=?;""", (realpath,)) 208 filerow = self.cur.fetchone() 209 mtime = max(optmtime, os.path.getmtime(realpath)) 210 if checkmtime: 211 if not filerow or filerow[1] != mtime: 212 return None 213 if filerow: 214 fileid = filerow[0] 215 if not checkmtime: 216 # Update the mtime of the file 217 self.cur.execute("""UPDATE files 218 SET mtime=? 219 WHERE fileid=?;""", (mtime, fileid)) 220 return fileid 221 return None
222
223 - def _getstoredcheckerconfig(self, checker):
224 """See if this checker configuration has been used before.""" 225 config = str(checker.config.__dict__) 226 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE 227 config=?;""", (config,)) 228 configrow = self.cur.fetchone() 229 if not configrow or configrow[1] != config: 230 return None 231 else: 232 return configrow[0]
233
234 - def _cacheunitstats(self, units, fileid, unitindex=None):
235 """Cache the statistics for the supplied unit(s).""" 236 unitvalues = [] 237 for index, unit in enumerate(units): 238 if unit.istranslatable(): 239 sourcewords, targetwords = wordsinunit(unit) 240 if unitindex: 241 index = unitindex 242 # what about plurals in .source and .target? 243 unitvalues.append((unit.getid(), fileid, index, \ 244 unit.source, unit.target, \ 245 sourcewords, targetwords, \ 246 statefordb(unit))) 247 # XXX: executemany is non-standard 248 self.cur.executemany("""INSERT INTO units 249 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state) 250 values (?, ?, ?, ?, ?, ?, ?, ?);""", 251 unitvalues) 252 self.con.commit() 253 if unitindex: 254 return state_strings[statefordb(units[0])] 255 return ""
256
257 - def cachestore(self, store):
258 """Calculates and caches the statistics of the given store 259 unconditionally.""" 260 realpath = os.path.realpath(store.filename) 261 mtime = os.path.getmtime(realpath) 262 self.cur.execute("""DELETE FROM files WHERE 263 path=?;""", (realpath,)) 264 self.cur.execute("""INSERT INTO files 265 (fileid, path, mtime, toolkitbuild) values (NULL, ?, ?, ?);""", 266 (realpath, mtime, toolkitversion.build)) 267 fileid = self.cur.lastrowid 268 self.cur.execute("""DELETE FROM units WHERE 269 fileid=?""", (fileid,)) 270 self._cacheunitstats(store.units, fileid) 271 return fileid
272
273 - def directorytotals(self, dirname):
274 """Retrieves the stored statistics for a given directory, all summed. 275 276 Note that this does not check for mtimes or the presence of files.""" 277 realpath = os.path.realpath(dirname) 278 self.cur.execute("""SELECT 279 state, 280 count(unitid) as total, 281 sum(sourcewords) as sourcewords, 282 sum(targetwords) as targetwords 283 FROM units WHERE fileid IN 284 (SELECT fileid from files 285 WHERE substr(path, 0, ?)=?) 286 GROUP BY state;""", (len(realpath), realpath)) 287 totals = emptystats() 288 return self.cur.fetchall()
289
290 - def filetotals(self, filename):
291 """Retrieves the statistics for the given file if possible, otherwise 292 delegates to cachestore().""" 293 fileid = self._getstoredfileid(filename) 294 if not fileid: 295 try: 296 store = factory.getobject(filename) 297 fileid = self.cachestore(store) 298 except ValueError, e: 299 print >> sys.stderr, str(e) 300 return {} 301 302 self.cur.execute("""SELECT 303 state, 304 count(unitid) as total, 305 sum(sourcewords) as sourcewords, 306 sum(targetwords) as targetwords 307 FROM units WHERE fileid=? 308 GROUP BY state;""", (fileid,)) 309 values = self.cur.fetchall() 310 311 totals = emptystats() 312 for stateset in values: 313 state = state_strings[stateset[0]] # state 314 totals[state] = stateset[1] or 0 # total 315 totals[state + "sourcewords"] = stateset[2] # sourcewords 316 totals[state + "targetwords"] = stateset[3] # targetwords 317 totals["total"] = totals["untranslated"] + totals["translated"] + totals["fuzzy"] 318 totals["totalsourcewords"] = totals["untranslatedsourcewords"] + \ 319 totals["translatedsourcewords"] + \ 320 totals["fuzzysourcewords"] 321 return totals
322
323 - def _cacheunitschecks(self, units, fileid, configid, checker, unitindex=None):
324 """Helper method for cachestorechecks() and recacheunit()""" 325 # We always want to store one dummy error to know that we have actually 326 # run the checks on this file with the current checker configuration 327 dummy = (-1, fileid, configid, "noerror", "") 328 unitvalues = [dummy] 329 # if we are doing a single unit, we want to return the checknames 330 errornames = [] 331 for index, unit in enumerate(units): 332 if unit.istranslatable(): 333 # Correctly assign the unitindex 334 if unitindex: 335 index = unitindex 336 failures = checker.run_filters(unit) 337 for checkname, checkmessage in failures.iteritems(): 338 unitvalues.append((index, fileid, configid, checkname, checkmessage)) 339 errornames.append("check-" + checkname) 340 checker.setsuggestionstore(None) 341 342 343 if unitindex: 344 # We are only updating a single unit, so we don't want to add an 345 # extra noerror-entry 346 unitvalues.remove(dummy) 347 errornames.append("total") 348 349 # XXX: executemany is non-standard 350 self.cur.executemany("""INSERT INTO uniterrors 351 (unitindex, fileid, configid, name, message) 352 values (?, ?, ?, ?, ?);""", 353 unitvalues) 354 self.con.commit() 355 return errornames
356
357 - def cachestorechecks(self, fileid, store, checker, configid):
358 """Calculates and caches the error statistics of the given store 359 unconditionally.""" 360 # Let's purge all previous failures because they will probably just 361 # fill up the database without much use. 362 self.cur.execute("""DELETE FROM uniterrors WHERE 363 fileid=?;""", (fileid,)) 364 self._cacheunitschecks(store.units, fileid, configid, checker) 365 return fileid
366
367 - def recacheunit(self, filename, checker, unit):
368 """Recalculate all information for a specific unit. This is necessary 369 for updating all statistics when a translation of a unit took place, 370 for example. 371 372 This method assumes that everything was up to date before (file totals, 373 checks, checker config, etc.""" 374 suggestion_filename, suggestion_mtime = suggestioninfo(filename) 375 fileid = self._getstoredfileid(filename, suggestion_mtime, checkmtime=False) 376 configid = self._getstoredcheckerconfig(checker) 377 unitid = unit.getid() 378 # get the unit index 379 self.cur.execute("""SELECT unitindex FROM units WHERE 380 fileid=? AND unitid=?;""", (fileid, unitid)) 381 unitindex = self.cur.fetchone()[0] 382 self.cur.execute("""DELETE FROM units WHERE 383 fileid=? AND unitid=?;""", (fileid, unitid)) 384 state = [self._cacheunitstats([unit], fileid, unitindex)] 385 # remove the current errors 386 self.cur.execute("""DELETE FROM uniterrors WHERE 387 fileid=? AND unitindex=?;""", (fileid, unitindex)) 388 if suggestion_filename: 389 checker.setsuggestionstore(factory.getobject(suggestion_filename, ignore=os.path.extsep+ 'pending')) 390 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex)) 391 return state
392
393 - def filechecks(self, filename, checker, store=None):
394 """Retrieves the error statistics for the given file if possible, 395 otherwise delegates to cachestorechecks().""" 396 suggestion_filename, suggestion_mtime = suggestioninfo(filename) 397 fileid = self._getstoredfileid(filename, suggestion_mtime) 398 configid = self._getstoredcheckerconfig(checker) 399 try: 400 if not fileid: 401 store = store or factory.getobject(filename) 402 fileid = self.cachestore(store) 403 if not configid: 404 self.cur.execute("""INSERT INTO checkerconfigs 405 (configid, config) values (NULL, ?);""", 406 (str(checker.config.__dict__),)) 407 configid = self.cur.lastrowid 408 except ValueError, e: 409 print >> sys.stderr, str(e) 410 return {} 411 412 def geterrors(): 413 self.cur.execute("""SELECT 414 name, 415 unitindex 416 FROM uniterrors WHERE fileid=? and configid=? 417 ORDER BY unitindex;""", (fileid, configid)) 418 return self.cur.fetchall()
419 420 values = geterrors() 421 if not values: 422 # This could happen if we haven't done the checks before, or we the 423 # file changed, or we are using a different configuration 424 store = store or factory.getobject(filename) 425 if suggestion_filename: 426 checker.setsuggestionstore(factory.getobject(suggestion_filename, ignore=os.path.extsep+ 'pending')) 427 self.cachestorechecks(fileid, store, checker, configid) 428 values = geterrors() 429 430 errors = {} 431 for value in values: 432 if value[1] == -1: 433 continue 434 checkkey = 'check-' + value[0] #value[0] is the error name 435 if not checkkey in errors: 436 errors[checkkey] = [] 437 errors[checkkey].append(value[1]) #value[1] is the unitindex 438 439 return errors
440
441 - def filestats(self, filename, checker, store=None):
442 """complete stats""" 443 stats = {"total": [], "translated": [], "fuzzy": [], "untranslated": []} 444 445 stats.update(self.filechecks(filename, checker, store)) 446 fileid = self._getstoredfileid(filename) 447 448 self.cur.execute("""SELECT 449 state, 450 unitindex 451 FROM units WHERE fileid=? 452 ORDER BY unitindex;""", (fileid,)) 453 454 values = self.cur.fetchall() 455 for value in values: 456 stats[state_strings[value[0]]].append(value[1]) 457 stats["total"].append(value[1]) 458 459 return stats
460