00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00025 #define XAPIAN_INCLUDED_ENQUIRE_H
00026
00027 #include <string>
00028 #include <time.h>
00029
00030 #include <xapian/base.h>
00031 #include <xapian/error.h>
00032 #include <xapian/types.h>
00033 #include <xapian/termiterator.h>
00034
00035 namespace Xapian {
00036
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class Weight;
00043
00047 class MSet {
00048 public:
00049 class Internal;
00051 Xapian::Internal::RefCntPtr<Internal> internal;
00052
00054 explicit MSet(MSet::Internal * internal_);
00055
00057 MSet();
00058
00060 ~MSet();
00061
00063 MSet(const MSet & other);
00064
00066 void operator=(const MSet &other);
00067
00083 void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00084
00087 void fetch(const MSetIterator &item) const;
00088
00091 void fetch() const;
00092
00097 Xapian::percent convert_to_percent(Xapian::weight wt) const;
00098
00100 Xapian::percent convert_to_percent(const MSetIterator &it) const;
00101
00109 Xapian::doccount get_termfreq(const std::string &tname) const;
00110
00118 Xapian::weight get_termweight(const std::string &tname) const;
00119
00127 Xapian::doccount get_firstitem() const;
00128
00138 Xapian::doccount get_matches_lower_bound() const;
00139
00152 Xapian::doccount get_matches_estimated() const;
00153
00163 Xapian::doccount get_matches_upper_bound() const;
00164
00170 Xapian::weight get_max_possible() const;
00171
00185 Xapian::weight get_max_attained() const;
00186
00188 Xapian::doccount size() const;
00189
00191 Xapian::doccount max_size() const { return size(); }
00192
00194 bool empty() const;
00195
00197 void swap(MSet & other);
00198
00200 MSetIterator begin() const;
00201
00203 MSetIterator end() const;
00204
00206 MSetIterator back() const;
00207
00217 MSetIterator operator[](Xapian::doccount i) const;
00218
00220
00221 typedef MSetIterator value_type;
00222 typedef MSetIterator iterator;
00223 typedef MSetIterator const_iterator;
00224 typedef MSetIterator & reference;
00225 typedef MSetIterator & const_reference;
00226 typedef MSetIterator * pointer;
00227 typedef Xapian::doccount_diff difference_type;
00228 typedef Xapian::doccount size_type;
00230
00234 std::string get_description() const;
00235 };
00236
00240 class MSetIterator {
00241 private:
00242 friend class MSet;
00243 friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00244 friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00245
00246 MSetIterator(Xapian::doccount index_, const MSet & mset_)
00247 : index(index_), mset(mset_) { }
00248
00249 Xapian::doccount index;
00250 MSet mset;
00251
00252 public:
00256 MSetIterator() : index(0), mset() { }
00257
00258 ~MSetIterator() { }
00259
00261 MSetIterator(const MSetIterator &other) {
00262 index = other.index;
00263 mset = other.mset;
00264 }
00265
00267 void operator=(const MSetIterator &other) {
00268 index = other.index;
00269 mset = other.mset;
00270 }
00271
00273 MSetIterator & operator++() {
00274 ++index;
00275 return *this;
00276 }
00277
00279 MSetIterator operator++(int) {
00280 MSetIterator tmp = *this;
00281 ++index;
00282 return tmp;
00283 }
00284
00286 MSetIterator & operator--() {
00287 --index;
00288 return *this;
00289 }
00290
00292 MSetIterator operator--(int) {
00293 MSetIterator tmp = *this;
00294 --index;
00295 return tmp;
00296 }
00297
00299 Xapian::docid operator*() const;
00300
00317 Xapian::Document get_document() const;
00318
00325 Xapian::doccount get_rank() const {
00326 return mset.get_firstitem() + index;
00327 }
00328
00330 Xapian::weight get_weight() const;
00331
00348 Xapian::doccount get_collapse_count() const;
00349
00355 Xapian::percent get_percent() const;
00356
00360 std::string get_description() const;
00361
00363
00364 typedef std::bidirectional_iterator_tag iterator_category;
00365 typedef Xapian::docid value_type;
00366 typedef Xapian::doccount_diff difference_type;
00367 typedef Xapian::docid * pointer;
00368 typedef Xapian::docid & reference;
00370 };
00371
00372 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00373 {
00374 return (a.index == b.index);
00375 }
00376
00377 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00378 {
00379 return (a.index != b.index);
00380 }
00381
00382 class ESetIterator;
00383
00388 class ESet {
00389 public:
00390 class Internal;
00392 Xapian::Internal::RefCntPtr<Internal> internal;
00393
00395 ESet();
00396
00398 ~ESet();
00399
00401 ESet(const ESet & other);
00402
00404 void operator=(const ESet &other);
00405
00410 Xapian::termcount get_ebound() const;
00411
00413 Xapian::termcount size() const;
00414
00416 Xapian::termcount max_size() const { return size(); }
00417
00419 bool empty() const;
00420
00422 void swap(ESet & other);
00423
00425 ESetIterator begin() const;
00426
00428 ESetIterator end() const;
00429
00431 ESetIterator back() const;
00432
00434 ESetIterator operator[](Xapian::termcount i) const;
00435
00440 std::string get_description() const;
00441 };
00442
00444 class ESetIterator {
00445 private:
00446 friend class ESet;
00447 friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00448 friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00449
00450 ESetIterator(Xapian::termcount index_, const ESet & eset_)
00451 : index(index_), eset(eset_) { }
00452
00453 Xapian::termcount index;
00454 ESet eset;
00455
00456 public:
00460 ESetIterator() : index(0), eset() { }
00461
00462 ~ESetIterator() { }
00463
00465 ESetIterator(const ESetIterator &other) {
00466 index = other.index;
00467 eset = other.eset;
00468 }
00469
00471 void operator=(const ESetIterator &other) {
00472 index = other.index;
00473 eset = other.eset;
00474 }
00475
00477 ESetIterator & operator++() {
00478 ++index;
00479 return *this;
00480 }
00481
00483 ESetIterator operator++(int) {
00484 ESetIterator tmp = *this;
00485 ++index;
00486 return tmp;
00487 }
00488
00490 ESetIterator & operator--() {
00491 --index;
00492 return *this;
00493 }
00494
00496 ESetIterator operator--(int) {
00497 ESetIterator tmp = *this;
00498 --index;
00499 return tmp;
00500 }
00501
00503 const std::string & operator *() const;
00504
00506 Xapian::weight get_weight() const;
00507
00511 std::string get_description() const;
00512
00514
00515 typedef std::bidirectional_iterator_tag iterator_category;
00516 typedef std::string value_type;
00517 typedef Xapian::termcount_diff difference_type;
00518 typedef std::string * pointer;
00519 typedef std::string & reference;
00521 };
00522
00523 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00524 {
00525 return (a.index == b.index);
00526 }
00527
00528 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00529 {
00530 return (a.index != b.index);
00531 }
00532
00537 class RSet {
00538 public:
00540 class Internal;
00541
00543 Xapian::Internal::RefCntPtr<Internal> internal;
00544
00546 RSet(const RSet &rset);
00547
00549 void operator=(const RSet &rset);
00550
00552 RSet();
00553
00555 ~RSet();
00556
00558 Xapian::doccount size() const;
00559
00561 bool empty() const;
00562
00564 void add_document(Xapian::docid did);
00565
00567 void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00568
00570 void remove_document(Xapian::docid did);
00571
00573 void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00574
00576 bool contains(Xapian::docid did) const;
00577
00579 bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00580
00585 std::string get_description() const;
00586 };
00587
00590 class MatchDecider {
00591 public:
00594 virtual int operator()(const Xapian::Document &doc) const = 0;
00595
00597 virtual ~MatchDecider() {}
00598 };
00599
00602 class ExpandDecider {
00603 public:
00606 virtual int operator()(const std::string & tname) const = 0;
00607
00609 virtual ~ExpandDecider() {}
00610 };
00611
00622 class Enquire {
00623 private:
00625 Enquire(const Enquire &);
00626
00628 void operator=(const Enquire &);
00629
00630 public:
00631 class Internal;
00633 Xapian::Internal::RefCntPtr<Internal> internal;
00634
00650 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00651
00654 ~Enquire();
00655
00662 void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00663
00670 const Xapian::Query & get_query();
00671
00678 void set_weighting_scheme(const Weight &weight_);
00679
00706 void set_collapse_key(Xapian::valueno collapse_key);
00707
00708 typedef enum {
00709 ASCENDING = 1,
00710 DESCENDING = 0,
00711 DONT_CARE = 2
00712 } docid_order;
00713
00737 void set_docid_order(docid_order order);
00738
00745 XAPIAN_DEPRECATED(void set_sort_forward(bool sort_forward));
00746
00765 void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00766
00783 XAPIAN_DEPRECATED(void set_sorting(Xapian::valueno sort_key, int sort_bands,
00784 bool sort_by_relevance = false));
00785
00788 void set_sort_by_relevance();
00789
00800 void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
00801
00813 void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00814 bool ascending = true);
00815
00833 void set_sort_by_relevance_then_value(Xapian::valueno sort_key,
00834 bool ascending = true);
00835
00847 void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00848
00874 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00875 Xapian::doccount checkatleast = 0,
00876 const RSet * omrset = 0,
00877 const MatchDecider * mdecider = 0) const;
00878 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00879 const RSet * omrset,
00880 const MatchDecider * mdecider = 0) const {
00881 return get_mset(first, maxitems, 0, omrset, mdecider);
00882 }
00883
00884 static const int include_query_terms = 1;
00885 static const int use_exact_termfreq = 2;
00908 ESet get_eset(Xapian::termcount maxitems,
00909 const RSet & omrset,
00910 int flags = 0,
00911 double k = 1.0,
00912 const Xapian::ExpandDecider * edecider = 0) const;
00913
00927 inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00928 const Xapian::ExpandDecider * edecider) const {
00929 return get_eset(maxitems, omrset, 0, 1.0, edecider);
00930 }
00931
00960 TermIterator get_matching_terms_begin(Xapian::docid did) const;
00961
00963 TermIterator get_matching_terms_end(Xapian::docid ) const {
00964 return TermIterator(NULL);
00965 }
00966
00989 TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00990
00992 TermIterator get_matching_terms_end(const MSetIterator &) const {
00993 return TermIterator(NULL);
00994 }
00995
01002 void register_match_decider(const std::string &name,
01003 const MatchDecider *mdecider = NULL);
01004
01008 std::string get_description() const;
01009 };
01010
01011 }
01012
01013 class SocketServer;
01014
01015 namespace Xapian {
01016
01018 class Weight {
01019 friend class Enquire;
01020 friend class ::SocketServer;
01021 public:
01022 class Internal;
01023 protected:
01024 Weight(const Weight &);
01025 private:
01026 void operator=(Weight &);
01027
01037 virtual Weight * clone() const = 0;
01038
01039 protected:
01040 const Internal * internal;
01041 Xapian::doclength querysize;
01042 Xapian::termcount wqf;
01043 std::string tname;
01044
01045 public:
01046 Weight() { }
01047 virtual ~Weight() { }
01048
01061 Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01062 Xapian::termcount wqf_, std::string tname_) const {
01063 Weight * wt = clone();
01064 wt->internal = internal_;
01065 wt->querysize = querysize_;
01066 wt->wqf = wqf_;
01067 wt->tname = tname_;
01068 return wt;
01069 }
01070
01075 virtual std::string name() const = 0;
01076
01078 virtual std::string serialise() const = 0;
01079
01081 virtual Weight * unserialise(const std::string &s) const = 0;
01082
01090 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01091 Xapian::doclength len) const = 0;
01092
01098 virtual Xapian::weight get_maxpart() const = 0;
01099
01108 virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01109
01113 virtual Xapian::weight get_maxextra() const = 0;
01114
01116 virtual bool get_sumpart_needs_doclength() const { return true; }
01117 };
01118
01120 class BoolWeight : public Weight {
01121 public:
01122 BoolWeight * clone() const {
01123 return new BoolWeight;
01124 }
01125 BoolWeight() { }
01126 ~BoolWeight() { }
01127 std::string name() const { return "Bool"; }
01128 std::string serialise() const { return ""; }
01129 BoolWeight * unserialise(const std::string & ) const {
01130 return new BoolWeight;
01131 }
01132 Xapian::weight get_sumpart(Xapian::termcount , Xapian::doclength ) const { return 0; }
01133 Xapian::weight get_maxpart() const { return 0; }
01134
01135 Xapian::weight get_sumextra(Xapian::doclength ) const { return 0; }
01136 Xapian::weight get_maxextra() const { return 0; }
01137
01138 bool get_sumpart_needs_doclength() const { return false; }
01139 };
01140
01153 class BM25Weight : public Weight {
01154 private:
01155 mutable Xapian::weight termweight;
01156 mutable Xapian::doclength lenpart;
01157
01158 double k1, k2, k3, b;
01159 Xapian::doclength min_normlen;
01160
01161 mutable bool weight_calculated;
01162
01163 void calc_termweight() const;
01164
01165 public:
01184 BM25Weight(double k1_, double k2_, double k3_, double b_,
01185 double min_normlen_)
01186 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01187 weight_calculated(false)
01188 {
01189 if (k1 < 0) k1 = 0;
01190 if (k2 < 0) k2 = 0;
01191 if (k3 < 0) k3 = 0;
01192 if (b < 0) b = 0; else if (b > 1) b = 1;
01193 }
01194 BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01195 weight_calculated(false) { }
01196
01197 BM25Weight * clone() const;
01198 ~BM25Weight() { }
01199 std::string name() const;
01200 std::string serialise() const;
01201 BM25Weight * unserialise(const std::string & s) const;
01202 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01203 Xapian::weight get_maxpart() const;
01204
01205 Xapian::weight get_sumextra(Xapian::doclength len) const;
01206 Xapian::weight get_maxextra() const;
01207
01208 bool get_sumpart_needs_doclength() const;
01209 };
01210
01224 class TradWeight : public Weight {
01225 private:
01226 mutable Xapian::weight termweight;
01227 mutable Xapian::doclength lenpart;
01228
01229 double param_k;
01230
01231 mutable bool weight_calculated;
01232
01233 void calc_termweight() const;
01234
01235 public:
01243 explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01244 if (param_k < 0) param_k = 0;
01245 }
01246
01247 TradWeight() : param_k(1.0), weight_calculated(false) { }
01248
01249 TradWeight * clone() const;
01250 ~TradWeight() { }
01251 std::string name() const;
01252 std::string serialise() const;
01253 TradWeight * unserialise(const std::string & s) const;
01254
01255 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01256 Xapian::weight get_maxpart() const;
01257
01258 Xapian::weight get_sumextra(Xapian::doclength len) const;
01259 Xapian::weight get_maxextra() const;
01260
01261 bool get_sumpart_needs_doclength() const;
01262 };
01263
01264 }
01265
01266 #endif