Package translate :: Package search :: Package indexing
[hide private]
[frames] | no frames]

Source Code for Package translate.search.indexing

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  # Copyright 2008 Zuza Software Foundation 
  4  #  
  5  # This file is part of translate. 
  6  # 
  7  # translate is free software; you can redistribute it and/or modify 
  8  # it under the terms of the GNU General Public License as published by 
  9  # the Free Software Foundation; either version 2 of the License, or 
 10  # (at your option) any later version. 
 11  #  
 12  # translate is distributed in the hope that it will be useful, 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 15  # GNU General Public License for more details. 
 16  # 
 17  # You should have received a copy of the GNU General Public License 
 18  # along with translate; if not, write to the Free Software 
 19  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 20  # 
 21   
 22   
 23  """ 
 24  interface for differrent indexing engines for the translate toolkit 
 25   
 26  """ 
 27   
 28  __revision__ = "$Id: __init__.py 8503 2008-09-27 08:58:30Z dwaynebailey $" 
 29   
 30  import CommonIndexer 
 31  import os 
 32  import shutil 
 33   
 34  """ TODO for indexing engines: 
 35      * get rid of jToolkit.glock dependency 
 36      * add partial matching at the beginning of a term 
 37      * do a proper cleanup - e.g.: the pylucene lockfiles remain in /tmp/ 
 38      * do unittests for PyLucene v1.x 
 39      """ 
 40   
41 -def _get_available_indexers():
42 """get a list of the available supported indexing engines 43 44 search through the translate.search.indexer package for modules derived from 45 the CommonIndexer class 46 """ 47 result = [] 48 # get the package directory 49 indexer_dir = os.path.dirname(os.path.abspath(__file__)) 50 # sort the files in the directory by name - to make it determinable, 51 # which indexing engine is chosen in case of multiple possibilities 52 all_files = os.listdir(indexer_dir) 53 all_files.sort() 54 for mod_file in all_files: 55 if mod_file == __file__: 56 # we should not import ourself 57 continue 58 mod_path = os.path.join(indexer_dir, mod_file) 59 if (not mod_path.endswith(".py")) or (not os.path.isfile(mod_path)) \ 60 or (not os.access(mod_path, os.R_OK)): 61 # no file / wrong extension / not readable -> skip it 62 continue 63 # strip the ".py" prefix 64 mod_name = mod_file[:-3] 65 # TODO - debug: "[Indexer]: trying to import indexing engines from '%s'" % mod_path 66 try: 67 module = __import__(mod_name, globals(), {}) 68 except ImportError: 69 # maybe it is unusable or dependencies are missing 70 continue 71 # the module function "is_available" must return "True" 72 if not (hasattr(module, "is_available") and \ 73 callable(module.is_available) and \ 74 module.is_available()): 75 continue 76 for item in dir(module): 77 try: 78 element = getattr(module, item) 79 except TypeError: 80 # this rarely happens: e.g. for 'item' being 'None' 81 continue 82 try: 83 # the class must inherit CommonDatabase (without being the same) 84 if issubclass(element, CommonIndexer.CommonDatabase) \ 85 and not element is CommonIndexer.CommonDatabase: 86 # TODO: debug - "[Indexer]: indexing engine found in '%s': %s" % (mod_path, element) 87 # the interface is ok 88 result.append(element) 89 except TypeError: 90 # 'element' is not a class 91 continue 92 return result
93
94 -def _sort_indexers_by_preference(indexer_classes, pref_order):
95 """sort a given list of indexer classes according to the given order 96 97 the list of preferred indexers are strings that should match the filenames 98 (without suppix ".py") of the respective modules (e.g.: XapianIndexer or 99 PyLuceneIndexer) 100 101 @param indexer_classes: the list of all available indexer classes 102 @type indexer_classes: list of CommonIndexer.CommonDatabase objects 103 @param pref_order: list of preferred indexer names 104 @type pref_order: str 105 @return: sorted list of indexer classes 106 @rtype: list of CommonIndexer.CommonDatabase objects 107 """ 108 # define useful function for readability 109 get_indexer_name = lambda indexer_class: \ 110 os.path.basename(indexer_class.__module__).split(".")[-1] 111 # use a copy to avoid side effects 112 avail_indexers = indexer_classes[:] 113 result = [] 114 # go through all preferred items and move the matching indexers to 'result' 115 for choice in pref_order: 116 # find matching indexers 117 matches = [ indexer for indexer in avail_indexers 118 if get_indexer_name(indexer) == choice ] 119 # move all matching items to the 'result' queue 120 for match_item in matches: 121 result.append(match_item) 122 avail_indexers.remove(match_item) 123 # append the remaining indexers to the result 124 return result + avail_indexers
125 126 127 # store the available indexers - this is done only once during the first import 128 _AVAILABLE_INDEXERS = _get_available_indexers() 129 130 # True for a not-empty list - this should be used to check if indexing support 131 # is available 132 HAVE_INDEXER = bool(_AVAILABLE_INDEXERS) 133 134
135 -def get_indexer(basedir, preference=None):
136 """return an appropriate indexer for the given directory 137 138 If the directory already exists, then we check, if one of the available 139 indexers knows how to handle it. Otherwise we return the first available 140 indexer. 141 142 @raise IndexError: there is no indexing engine available 143 @raise ValueError: the database location already exists, but we did not find 144 a suitable indexing engine for it 145 @raise OSError: any error that could occour while creating or opening the 146 database 147 148 @param basedir: the parent directory of (possible) different indexing 149 databases 150 @type basedir: string 151 @return: the class of the most appropriate indexer 152 @rtype: subclass of L{CommonIndexer.CommonDatabase} 153 """ 154 if not _AVAILABLE_INDEXERS: 155 raise IndexError("Indexer: no indexing engines are available") 156 if preference is None: 157 preference = [] 158 # sort available indexers by preference 159 preferred_indexers = _sort_indexers_by_preference(_AVAILABLE_INDEXERS, 160 preference) 161 if os.path.exists(basedir): 162 for index_class in preferred_indexers: 163 try: 164 # the first match is sufficient - but we do not want to 165 # create a new database, if a database for another 166 # indexing engine could exist. Thus we try it read-only first. 167 return index_class(basedir, create_allowed=False) 168 except (ValueError, OSError): 169 # invalid type of database or some other error 170 continue 171 # the database does not exist yet or we did not find an appropriate 172 # class that can handle it - so we remove the whole base directory 173 shutil.rmtree(basedir, ignore_errors=True) 174 print "Deleting invalid indexing directory '%s'" % basedir 175 # the database does not exist or it was deleted (see above) 176 # we choose the first available indexing engine 177 return preferred_indexers[0](basedir)
178 179 180 if __name__ == "__main__": 181 # show all supported indexing engines (with fulfilled requirements) 182 for ONE_INDEX in _AVAILABLE_INDEXERS: 183 print ONE_INDEX 184