Main Page | Namespace List | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

perl/HyperEstraierWrapper.cpp

Go to the documentation of this file.
00001 
00004 #include <estraier.h>
00005 #include <estmtdb.h>
00006 #include <cabin.h>
00007 #include <cstdlib>
00008 #include <string>
00009 #include <vector>
00010 #include <map>
00011 #include <cassert>
00012 
00013 namespace estraier {
00014     class Condition {
00015     public:
00016         enum {                              // enumeration for options 
00017             SURE    = ESTCONDSURE,      // check every N-gram key
00018             USUAL   = ESTCONDUSUAL,     // check N-gram keys skipping by one
00019             FAST    = ESTCONDFAST,      // check N-gram keys skipping by two
00020             AGITO   = ESTCONDAGITO,     // check N-gram keys skipping by three
00021             NOIDF   = ESTCONDNOIDF,     // without TF-IDF tuning
00022             SIMPLE  = ESTCONDSIMPLE,    // with the simplefied phrase
00023         };
00024         ESTCOND * cond;
00025         Condition() {
00029             cond = est_cond_new();
00030         }
00031         ~Condition() {
00035             est_cond_delete(cond);
00036         }
00037         void set_phrase(const char *phrase) {
00041             est_cond_set_phrase(cond, phrase);
00042         }
00043         void add_attr(const char *expr) {
00047             est_cond_add_attr(cond, expr);
00048         }
00049         void set_order(const char *expr) {
00053             est_cond_set_order(cond, expr);
00054         }
00055         void set_max(int _max) {
00059             est_cond_set_max(cond, _max);
00060         }
00061         void set_options(int options) {
00065             est_cond_set_options(cond, options);
00066         }
00067     };
00068 
00069     class Document {
00070     private:
00071         std::string text_buf;
00072     public:
00073         ESTDOC *doc;
00074 
00075         Document() {
00079             doc = est_doc_new();
00080         }
00081         Document(const char* draft) {
00085             doc = est_doc_new_from_draft(draft);
00086         }
00087         Document(ESTDOC *_doc) {
00091             doc = _doc;
00092         }
00093         ~Document() {
00097             est_doc_delete(doc);
00098         }
00099         void add_attr(const char * name, const char*value) {
00103             est_doc_add_attr(doc, name, value);
00104         }
00105         void add_text(const char *text) {
00109             est_doc_add_text(doc, text);
00110         }
00111         void add_hidden_text(const char * text) {
00115             est_doc_add_hidden_text(doc, text);
00116         }
00117         int id() {
00121             return est_doc_id(doc);
00122         }
00123         std::vector<std::string> * attr_names() {
00127             std::vector<std::string> * vs = new std::vector<std::string>;
00128             CBLIST * attr_names = est_doc_attr_names(doc);
00129             for (int i=0; i < cblistnum(attr_names); i++) {
00130                 vs->push_back(cblistval(attr_names, i, NULL));
00131             }
00132             cblistclose(attr_names);
00133             return vs;
00134         }
00135         const char * attr(const char *name) {
00139             return est_doc_attr(doc, name);
00140         }
00141         const char * cat_texts() {
00145             // return est_doc_cat_texts(doc);
00146             return "This is mockup!";
00147         }
00148         std::vector<std::string>* texts() {
00152             std::vector<std::string> * vs = new std::vector<std::string>;
00153             const CBLIST *texts;
00154             texts = est_doc_texts(doc);
00155             for(int i = 0; i < cblistnum(texts); i++) {
00156                 vs->push_back(cblistval(texts, i, NULL));
00157             }
00158             return vs;
00159         }
00160         const char * dump_draft() {
00164             return est_doc_dump_draft(doc);
00165         }
00166         const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
00170             CBLIST * words;
00171             std::vector<std::string>::iterator iter;
00172 
00173             words = cblistopen();
00174 
00175             for (iter = _words.begin(); _words.end() != iter; iter++) {
00176                 cblistpush(words, iter->c_str(), -1);
00177             }
00178 
00179             const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); 
00180 
00181             cblistclose(words);
00182 
00183             return result;
00184         }
00185     };
00186 
00187     class Database {
00188     private:
00189         ESTMTDB *db;
00190     public:
00191         enum {                              // enumeration for error codes
00192             ERRNOERR    = ESTENOERR,        // no error
00193             ERRINVAL    = ESTEINVAL,        // invalid argument
00194             ERRACCES    = ESTEACCES,        // access forbidden
00195             ERRLOCK     = ESTELOCK,         // lock failure
00196             ERRDB       = ESTEDB,           // database problem
00197             ERRIO       = ESTEIO,           // I/O problem
00198             ERRNOITEM   = ESTENOITEM,       // no item
00199             ERRMISC     = ESTEMISC          // miscellaneous
00200         };
00201         enum {                              // enumeration for open modes
00202             DBREADER    = ESTDBREADER,      // open as a reader
00203             DBWRITER    = ESTDBWRITER,      // open as a writer
00204             DBCREAT     = ESTDBCREAT,       // a writer creating
00205             DBTRUNC     = ESTDBTRUNC,       // a writer truncating
00206             DBNOLCK     = ESTDBNOLCK,       // open without locking
00207             DBLCKNB     = ESTDBLCKNB,       // lock without blocking
00208             DBPERFNG    = ESTDBPERFNG       // use perfect N-gram analyzer
00209         };
00210         enum {                              // enumeration for options of document registration
00211             PDCLEAN     = ESTPDCLEAN        // clean up dispensable regions
00212         };
00213         enum {                              // enumeration for options of document deletion
00214             ODCLEAN     = ESTODCLEAN        // clean up dispensable regions
00215         };
00216         enum {                              // enumeration for options of optimization
00217             OPTNOPURGE  = ESTOPTNOPURGE,    // omit purging dispensable region of deleted
00218             OPTNODBOPT  = ESTOPTNODBOPT     // omit optimizization of the database files
00219         };
00220         enum {                              // enumeration for options of document retrieval
00221             GDNOATTR    = ESTGDNOATTR,      // no attributes
00222             GDNOTEXT    = ESTGDNOTEXT       // no text
00223         };
00224         Database() {
00228         }
00229         ~Database() {
00230             close();
00231         }
00232         bool open(const char * dbname, int mode) {
00236             int ecode;
00237             db = est_mtdb_open(dbname, mode, &ecode);
00238             return db;
00239         }
00240         bool close() {
00244             if (db) {
00245                 int ecode;
00246                 bool result = est_mtdb_close(db, &ecode);
00247                 db = NULL;
00248                 return result;
00249             } else {
00250                 return false;
00251             }
00252         }
00253         bool put_doc(Document *doc, int options) {
00257             return est_mtdb_put_doc(db, doc->doc, options);
00258         }
00259         std::vector<int> * search(Condition * cond, int options) {
00263             int resnum;
00264             int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
00265             std::vector<int> *numbers = new std::vector<int>;   
00266             for (int i=0; i<resnum; i++) {
00267                 numbers->push_back(result[i]);
00268             }
00269             return numbers;
00270         }
00271         static const char * err_msg(int ecode) {
00275             return est_err_msg(ecode);
00276         }
00277         int error() {
00281             return est_mtdb_error(db);
00282         }
00283         bool fatal() {
00287             return est_mtdb_fatal(db);
00288         }
00289         bool flush(int _max) {
00293             return est_mtdb_flush(db, _max);
00294         }
00295         bool sync() {
00299             return est_mtdb_sync(db);
00300         }
00301         bool optimize(int options) {
00305             return est_mtdb_optimize(db, options);
00306         }
00307         bool out_doc(int id, int options) {
00311             return est_mtdb_out_doc(db, id, options);
00312         }
00313         Document * get_doc(int id, int options) {
00317             ESTDOC *doc = est_mtdb_get_doc(db, id, options);
00318             if (!doc) {
00319                 throw est_err_msg(est_mtdb_error(db));
00320             } else {
00321                 return new Document(doc);
00322             }
00323         }
00324         int uri_to_id(const char *uri) {
00328             return est_mtdb_uri_to_id(db, uri);
00329         }
00330         std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
00334             std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
00335 
00336             CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
00337 
00338             cbmapiterinit(keys);
00339             int ksiz;
00340             while (const char *key = cbmapiternext(keys, &ksiz)) {
00341                 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
00342             }
00343             return mss;
00344         }
00345         bool iter_init() {
00349             return est_mtdb_iter_init(db);
00350         }
00351         int iter_next() {
00355             return est_mtdb_iter_next(db);
00356         }
00357         const char * name() {
00361             return est_mtdb_name(db);
00362         }
00363         int doc_num() {
00367             return est_mtdb_doc_num(db);
00368         }
00369         int word_num() {
00373             return est_mtdb_word_num(db);
00374         }
00375         double size() {
00379             return est_mtdb_size(db);
00380         }
00381         void set_cache_size(size_t size, int anum, int tnum) {
00385             est_mtdb_set_cache_size(db, size, anum, tnum);
00386         }
00387         void set_special_cache(const char *name, int num) {
00392             est_mtdb_set_special_cache(db, name, num);
00393         }
00394     };
00395 };

Generated on Thu Sep 8 02:02:20 2005 for HyperEstraierWrapper by  doxygen 1.4.4