Main Page | Namespace List | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members

ruby/HyperEstraierWrapper.cpp

Go to the documentation of this file.
00001 
00004 #include <estraier.h>
00005 #include <estmtdb.h>
00006 #include <cabin.h>
00007 #include <cstdlib>
00008 #include <string>
00009 #include <vector>
00010 #include <map>
00011 #include <cassert>
00012 #include <stdexcept>
00013 
00014 namespace estraier {
00015 
00016     class IOError : public std::runtime_error {
00017     public:
00018         explicit IOError (const std::string& w) : std::runtime_error(w) {}
00019     };
00020 
00021     class Condition {
00022     public:
00023         enum {                              // enumeration for options 
00024             SURE    = ESTCONDSURE,      // check every N-gram key
00025             USUAL   = ESTCONDUSUAL,     // check N-gram keys skipping by one
00026             FAST    = ESTCONDFAST,      // check N-gram keys skipping by two
00027             AGITO   = ESTCONDAGITO,     // check N-gram keys skipping by three
00028             NOIDF   = ESTCONDNOIDF,     // without TF-IDF tuning
00029             SIMPLE  = ESTCONDSIMPLE,    // with the simplefied phrase
00030         };
00031         ESTCOND * cond;
00032         Condition() {
00036             cond = est_cond_new();
00037         }
00038         ~Condition() {
00042             est_cond_delete(cond);
00043         }
00044         void set_phrase(const char *phrase) {
00048             est_cond_set_phrase(cond, phrase);
00049         }
00050         void add_attr(const char *expr) {
00054             est_cond_add_attr(cond, expr);
00055         }
00056         void set_order(const char *expr) {
00060             est_cond_set_order(cond, expr);
00061         }
00062         void set_max(int _max) {
00066             est_cond_set_max(cond, _max);
00067         }
00068         void set_options(int options) {
00072             est_cond_set_options(cond, options);
00073         }
00074     };
00075 
00076     class Document {
00077     private:
00078         std::string text_buf;
00079     public:
00080         ESTDOC *doc;
00081         Document() {
00085             doc = est_doc_new();
00086         }
00087         Document(const char* draft) {
00091             doc = est_doc_new_from_draft(draft);
00092         }
00093         Document(ESTDOC *_doc) {
00097             doc = _doc;
00098         }
00099         ~Document() {
00103             est_doc_delete(doc);
00104         }
00105         void add_attr(const char * name, const char*value) {
00109             est_doc_add_attr(doc, name, value);
00110         }
00111         void add_text(const char *text) {
00115             est_doc_add_text(doc, text);
00116         }
00117         void add_hidden_text(const char * text) {
00121             est_doc_add_hidden_text(doc, text);
00122         }
00123         int id() {
00127             return est_doc_id(doc);
00128         }
00129         std::vector<std::string> * attr_names() {
00133             std::vector<std::string> * vs = new std::vector<std::string>;
00134             CBLIST * attr_names = est_doc_attr_names(doc);
00135             for (int i=0; i < cblistnum(attr_names); i++) {
00136                 vs->push_back(cblistval(attr_names, i, NULL));
00137             }
00138             cblistclose(attr_names);
00139             return vs;
00140         }
00141         const char * attr(const char *name) {
00145             return est_doc_attr(doc, name);
00146         }
00147         const char * cat_texts() {
00151             return est_doc_cat_texts(doc);
00152         }
00153         std::vector<std::string>* texts() {
00157             std::vector<std::string> * vs = new std::vector<std::string>;
00158             const CBLIST *texts;
00159             texts = est_doc_texts(doc);
00160             for(int i = 0; i < cblistnum(texts); i++) {
00161                 vs->push_back(cblistval(texts, i, NULL));
00162             }
00163             return vs;
00164         }
00165         const char * dump_draft() {
00169             return est_doc_dump_draft(doc);
00170         }
00171         const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
00175             CBLIST * words;
00176             std::vector<std::string>::iterator iter;
00177             words = cblistopen();
00178             for (iter = _words.begin(); _words.end() != iter; iter++) {
00179                 cblistpush(words, iter->c_str(), -1);
00180             }
00181             const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); 
00182             cblistclose(words);
00183             return result;
00184         }
00185         const char * hidden_texts() {
00189             return est_doc_hidden_texts(doc);
00190         }
00191     };
00192 
00193     class Database {
00194     private:
00195         ESTMTDB *db;
00196         int ecode;
00197     public:
00198         enum {                              // enumeration for error codes
00199             ERRNOERR    = ESTENOERR,        // no error
00200             ERRINVAL    = ESTEINVAL,        // invalid argument
00201             ERRACCES    = ESTEACCES,        // access forbidden
00202             ERRLOCK     = ESTELOCK,         // lock failure
00203             ERRDB       = ESTEDB,           // database problem
00204             ERRIO       = ESTEIO,           // I/O problem
00205             ERRNOITEM   = ESTENOITEM,       // no item
00206             ERRMISC     = ESTEMISC          // miscellaneous
00207         };
00208         enum {                              // enumeration for open modes
00209             DBREADER    = ESTDBREADER,      // open as a reader
00210             DBWRITER    = ESTDBWRITER,      // open as a writer
00211             DBCREAT     = ESTDBCREAT,       // a writer creating
00212             DBTRUNC     = ESTDBTRUNC,       // a writer truncating
00213             DBNOLCK     = ESTDBNOLCK,       // open without locking
00214             DBLCKNB     = ESTDBLCKNB,       // lock without blocking
00215             DBPERFNG    = ESTDBPERFNG       // use perfect N-gram analyzer
00216         };
00217         enum {                              // enumeration for options of document registration
00218             PDCLEAN     = ESTPDCLEAN        // clean up dispensable regions
00219         };
00220         enum {                              // enumeration for options of document deletion
00221             ODCLEAN     = ESTODCLEAN        // clean up dispensable regions
00222         };
00223         enum {                              // enumeration for options of optimization
00224             OPTNOPURGE  = ESTOPTNOPURGE,    // omit purging dispensable region of deleted
00225             OPTNODBOPT  = ESTOPTNODBOPT     // omit optimizization of the database files
00226         };
00227         enum {                              // enumeration for options of document retrieval
00228             GDNOATTR    = ESTGDNOATTR,      // no attributes
00229             GDNOTEXT    = ESTGDNOTEXT       // no text
00230         };
00231         Database() {
00235             db = NULL;
00236             ecode = ERRNOERR;
00237         }
00238         ~Database() {
00239             if (db) close();
00240         }
00241         bool open(const char * dbname, int mode) {
00245             if (db) close();
00246             int ec;
00247             db = est_mtdb_open(dbname, mode, &ec);
00248             if (!db) ecode = ec;
00249             return db;
00250         }
00251         bool close() {
00255             if (!db) throw IOError("closed database");
00256             int ec;
00257             bool result = est_mtdb_close(db, &ec);
00258             if (!result) ecode = ec;
00259             db = NULL;
00260             return result;
00261         }
00262         bool put_doc(Document *doc, int options) {
00266             if (!db) throw IOError("closed database");
00267             bool result = est_mtdb_put_doc(db, doc->doc, options);
00268             if (!result) ecode = est_mtdb_error(db);
00269             return result;
00270         }
00271         std::vector<int> * search(Condition * cond, int options) {
00275             if (!db) throw IOError("closed database");
00276             int resnum;
00277             int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
00278             std::vector<int> *numbers = new std::vector<int>;   
00279             for (int i=0; i<resnum; i++) {
00280                 numbers->push_back(result[i]);
00281             }
00282             return numbers;
00283         }
00284         static const char * err_msg(int ecode) {
00288             return est_err_msg(ecode);
00289         }
00290         int error() {
00294             return ecode;
00295         }
00296         bool fatal() {
00300             if (!db) throw IOError("closed database");
00301             return est_mtdb_fatal(db);
00302         }
00303         bool flush(int _max) {
00307             if (!db) throw IOError("closed database");
00308             bool result = est_mtdb_flush(db, _max);
00309             if (!result) ecode = est_mtdb_error(db);
00310             return result;
00311         }
00312         bool sync() {
00316             if (!db) throw IOError("closed database");
00317             bool result = est_mtdb_sync(db);
00318             if (!result) ecode = est_mtdb_error(db);
00319             return result;
00320         }
00321         bool optimize(int options) {
00325             if (!db) throw IOError("closed database");
00326             bool result = est_mtdb_optimize(db, options);
00327             if (!result) ecode = est_mtdb_error(db);
00328             return result;
00329         }
00330         bool out_doc(int id, int options) {
00334             if (!db) throw IOError("closed database");
00335             bool result = est_mtdb_out_doc(db, id, options);
00336             if (!result) ecode = est_mtdb_error(db);
00337             return result;
00338         }
00339         bool edit_doc(Document *doc) {
00343             if (!db) throw IOError("closed database");
00344             bool result = est_mtdb_edit_doc(db, doc->doc);
00345             if (!result) ecode = est_mtdb_error(db);
00346             return result;
00347         }
00348         Document * get_doc(int id, int options) {
00352             if (!db) throw IOError("closed database");
00353             ESTDOC *doc = est_mtdb_get_doc(db, id, options);
00354             if (!doc) {
00355                 ecode = est_mtdb_error(db);
00356                 throw est_err_msg(est_mtdb_error(db));
00357             } else {
00358                 return new Document(doc);
00359             }
00360         }
00361         int uri_to_id(const char *uri) {
00365             if (!db) throw IOError("closed database");
00366             int result = est_mtdb_uri_to_id(db, uri);
00367             if(result == -1) ecode = est_mtdb_error(db);
00368             return result;
00369         }
00370         std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
00374             if (!db) throw IOError("closed database");
00375             std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
00376             CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
00377             cbmapiterinit(keys);
00378             int ksiz;
00379             while (const char *key = cbmapiternext(keys, &ksiz)) {
00380                 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
00381             }
00382             return mss;
00383         }
00384         const char * name() {
00388             if (!db) throw IOError("closed database");
00389             return est_mtdb_name(db);
00390         }
00391         int doc_num() {
00395             if (!db) throw IOError("closed database");
00396             return est_mtdb_doc_num(db);
00397         }
00398         int word_num() {
00402             if (!db) throw IOError("closed database");
00403             return est_mtdb_word_num(db);
00404         }
00405         double size() {
00409             if (!db) throw IOError("closed database");
00410             return est_mtdb_size(db);
00411         }
00412         void set_cache_size(size_t size, int anum, int tnum) {
00416             if (!db) throw IOError("closed database");
00417             est_mtdb_set_cache_size(db, size, anum, tnum);
00418         }
00419         void set_special_cache(const char *name, int num) {
00424             est_mtdb_set_special_cache(db, name, num);
00425         }
00426     };
00427 
00428     static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
00429         std::vector<std::string> * vs = new std::vector<std::string>;
00430         CBLIST *list;
00431         list = cblistopen();
00432         est_break_text(text, list, norm, tail);
00433         for (int i=0; i < cblistnum(list); i++) {
00434             vs->push_back(cblistval(list, i, NULL));
00435         }
00436         cblistclose(list);
00437         return vs;
00438     }
00439 
00440     static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
00441         std::vector<std::string> * vs = new std::vector<std::string>;
00442         CBLIST *list;
00443         list = cblistopen();
00444         est_break_text_perfng(text, list, norm, tail);
00445         for (int i=0; i < cblistnum(list); i++) {
00446             vs->push_back(cblistval(list, i, NULL));
00447         }
00448         cblistclose(list);
00449         return vs;
00450     }
00451 
00452 };

Generated on Thu Sep 8 02:02:20 2005 for HyperEstraierWrapper by  doxygen 1.4.4