Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
wordrec.h
Go to the documentation of this file.
1 
2 // File: wordrec.h
3 // Description: wordrec class.
4 // Author: Samuel Charron
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_WORDREC_WORDREC_H__
20 #define TESSERACT_WORDREC_WORDREC_H__
21 
22 #include "associate.h"
23 #include "classify.h"
24 #include "dict.h"
25 #include "language_model.h"
26 #include "ratngs.h"
27 #include "matrix.h"
28 #include "matchtab.h"
29 #include "oldheap.h"
30 #include "gradechop.h"
31 #include "seam.h"
32 #include "states.h"
33 #include "findseam.h"
34 #include "callcpp.h"
35 
36 struct CHUNKS_RECORD;
37 struct SEARCH_RECORD;
38 class WERD_RES;
39 
40 // A struct for storing child/parent pairs of the BLOB_CHOICE_LISTs
41 // to be processed by the segmentation search.
42 struct SEG_SEARCH_PENDING : public ELIST_LINK {
43  SEG_SEARCH_PENDING(int child_row_arg,
44  BLOB_CHOICE_LIST *parent_arg,
46  child_row(child_row_arg), parent(parent_arg), changed(changed_arg) {}
47 
48  // Comparator function for add_sorted().
49  static int compare(const void *p1, const void *p2) {
50  const SEG_SEARCH_PENDING *e1 = *reinterpret_cast<
51  const SEG_SEARCH_PENDING * const *>(p1);
52  const SEG_SEARCH_PENDING *e2 = *reinterpret_cast<
53  const SEG_SEARCH_PENDING * const *>(p2);
54  if (e1->child_row == e2->child_row &&
55  e1->parent == e2->parent) return 0;
56  return (e1->child_row < e2->child_row) ? -1 : 1;
57  }
58 
59  int child_row; // row of the child in the ratings matrix
60  BLOB_CHOICE_LIST *parent; // pointer to the parent BLOB_CHOICE_LIST
61  // Flags that indicate which language model components are still active
62  // on the parent path (i.e. recorded some changes to the language model
63  // state) and need to be invoked for this pending entry.
64  // This field is used as an argument to LanguageModel::UpdateState()
65  // in Wordrec::UpdateSegSearchNodes().
67 };
68 
70 
71 
72 namespace tesseract {
73 
74 /* ccmain/tstruct.cpp *********************************************************/
75 class FRAGMENT:public ELIST_LINK
76 {
77  public:
78  FRAGMENT() { //constructor
79  }
80  FRAGMENT(EDGEPT *head_pt, //start
81  EDGEPT *tail_pt); //end
82 
83  ICOORD head; //coords of start
84  ICOORD tail; //coords of end
85  EDGEPT *headpt; //start point
86  EDGEPT *tailpt; //end point
87 };
89 
90 
91 class Wordrec : public Classify {
92  public:
93  // config parameters *******************************************************
94  BOOL_VAR_H(merge_fragments_in_matrix, TRUE,
95  "Merge the fragments in the ratings matrix and delete them "
96  "after merging");
97  BOOL_VAR_H(wordrec_no_block, FALSE, "Don't output block information");
98  BOOL_VAR_H(wordrec_enable_assoc, TRUE, "Associator Enable");
99  BOOL_VAR_H(force_word_assoc, FALSE,
100  "force associator to run regardless of what enable_assoc is."
101  "This is used for CJK where component grouping is necessary.");
102  INT_VAR_H(wordrec_num_seg_states, 30, "Segmentation states");
103  double_VAR_H(wordrec_worst_state, 1, "Worst segmentation state");
104  BOOL_VAR_H(fragments_guide_chopper, FALSE,
105  "Use information from fragments to guide chopping process");
106  INT_VAR_H(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped");
107  double_VAR_H(tessedit_certainty_threshold, -2.25, "Good blob limit");
108  INT_VAR_H(chop_debug, 0, "Chop debug");
109  BOOL_VAR_H(chop_enable, 1, "Chop enable");
110  BOOL_VAR_H(chop_vertical_creep, 0, "Vertical creep");
111  INT_VAR_H(chop_split_length, 10000, "Split Length");
112  INT_VAR_H(chop_same_distance, 2, "Same distance");
113  INT_VAR_H(chop_min_outline_points, 6, "Min Number of Points on Outline");
114  INT_VAR_H(chop_inside_angle, -50, "Min Inside Angle Bend");
115  INT_VAR_H(chop_min_outline_area, 2000, "Min Outline Area");
116  double_VAR_H(chop_split_dist_knob, 0.5, "Split length adjustment");
117  double_VAR_H(chop_overlap_knob, 0.9, "Split overlap adjustment");
118  double_VAR_H(chop_center_knob, 0.15, "Split center adjustment");
119  double_VAR_H(chop_sharpness_knob, 0.06, "Split sharpness adjustment");
120  double_VAR_H(chop_width_change_knob, 5.0, "Width change adjustment");
121  double_VAR_H(chop_ok_split, 100.0, "OK split limit");
122  double_VAR_H(chop_good_split, 50.0, "Good split limit");
123  INT_VAR_H(chop_x_y_weight, 3, "X / Y length weight");
124  INT_VAR_H(segment_adjust_debug, 0, "Segmentation adjustment debug");
125  BOOL_VAR_H(assume_fixed_pitch_char_segment, FALSE,
126  "include fixed-pitch heuristics in char segmentation");
127  BOOL_VAR_H(use_new_state_cost, FALSE,
128  "use new state cost heuristics for segmentation state evaluation");
129  double_VAR_H(heuristic_segcost_rating_base, 1.25,
130  "base factor for adding segmentation cost into word rating."
131  "It's a multiplying factor, the larger the value above 1, "
132  "the bigger the effect of segmentation cost.");
133  double_VAR_H(heuristic_weight_rating, 1,
134  "weight associated with char rating in combined cost of state");
135  double_VAR_H(heuristic_weight_width, 0,
136  "weight associated with width evidence in combined cost of state");
137  double_VAR_H(heuristic_weight_seamcut, 0,
138  "weight associated with seam cut in combined cost of state");
139  double_VAR_H(heuristic_max_char_wh_ratio, 2.0,
140  "max char width-to-height ratio allowed in segmentation");
141  INT_VAR_H(wordrec_debug_level, 0, "Debug level for wordrec");
142  BOOL_VAR_H(wordrec_debug_blamer, false, "Print blamer debug messages");
143  BOOL_VAR_H(wordrec_run_blamer, false, "Try to set the blame for errors");
144  BOOL_VAR_H(enable_new_segsearch, false,
145  "Enable new segmentation search path.");
146  INT_VAR_H(segsearch_debug_level, 0, "SegSearch debug level");
147  INT_VAR_H(segsearch_max_pain_points, 2000,
148  "Maximum number of pain points stored in the queue");
149  INT_VAR_H(segsearch_max_futile_classifications, 10,
150  "Maximum number of pain point classifications per word.");
151  double_VAR_H(segsearch_max_char_wh_ratio, 2.0,
152  "Maximum character width-to-height ratio");
153  double_VAR_H(segsearch_max_fixed_pitch_char_wh_ratio, 2.0,
154  "Maximum character width-to-height ratio for"
155  "fixed pitch fonts");
156  BOOL_VAR_H(save_alt_choices, false,
157  "Save alternative paths found during chopping "
158  "and segmentation search");
159 
160  // methods from wordrec/*.cpp ***********************************************
161  Wordrec();
162  virtual ~Wordrec();
163 
164  void CopyCharChoices(const BLOB_CHOICE_LIST_VECTOR &from,
166 
167  // Returns true if text recorded in choice is the same as truth_text.
168  bool ChoiceIsCorrect(const UNICHARSET& uni_set,
169  const WERD_CHOICE *choice,
170  const GenericVector<STRING> &truth_text);
171 
172  // Fills word->alt_choices with alternative paths found during
173  // chopping/segmentation search that are kept in best_choices.
174  void SaveAltChoices(const LIST &best_choices, WERD_RES *word);
175 
176  // Fills character choice lattice in the given BlamerBundle
177  // using the given ratings matrix and best choice list.
178  void FillLattice(const MATRIX &ratings, const LIST &best_choices,
179  const UNICHARSET &unicharset, BlamerBundle *blamer_bundle);
180 
181  // Calls fill_lattice_ member function
182  // (assumes that fill_lattice_ is not NULL).
183  void CallFillLattice(const MATRIX &ratings, const LIST &best_choices,
184  const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) {
185  (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
186  }
187 
188  // tface.cpp
189  void program_editup(const char *textbase,
190  bool init_classifier,
191  bool init_permute);
192  BLOB_CHOICE_LIST_VECTOR *cc_recog(WERD_RES *word);
193  void program_editdown(inT32 elasped_time);
194  void set_pass1();
195  void set_pass2();
196  int end_recog();
197  BLOB_CHOICE_LIST *call_matcher(const DENORM* denorm, TBLOB* blob);
198  int dict_word(const WERD_CHOICE &word);
199  // wordclass.cpp
200  BLOB_CHOICE_LIST *classify_blob(TBLOB *blob,
201  const DENORM& denorm,
202  const char *string,
203  C_COL color,
204  BlamerBundle *blamer_bundle);
205  BLOB_CHOICE_LIST *fake_classify_blob(UNICHAR_ID class_id,
206  float rating, float certainty);
207  void update_blob_classifications(TWERD *word,
208  const BLOB_CHOICE_LIST_VECTOR &choices);
209 
210  // bestfirst.cpp
211  BLOB_CHOICE_LIST_VECTOR *evaluate_chunks(CHUNKS_RECORD *chunks_record,
212  SEARCH_STATE search_state,
213  BlamerBundle *blamer_bundle);
214  void update_ratings(const BLOB_CHOICE_LIST_VECTOR &new_choices,
215  const CHUNKS_RECORD *chunks_record,
216  const SEARCH_STATE search_state);
217  inT16 evaluate_state(CHUNKS_RECORD *chunks_record,
218  SEARCH_RECORD *the_search,
219  DANGERR *fixpt,
220  BlamerBundle *blamer_bundle);
221  SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
222  int num_joints,
223  BLOB_CHOICE_LIST_VECTOR *best_char_choices,
224  WERD_CHOICE *best_choice,
225  WERD_CHOICE *raw_choice,
226  STATE *state);
227  void best_first_search(CHUNKS_RECORD *chunks_record,
228  BLOB_CHOICE_LIST_VECTOR *best_char_choices,
229  WERD_RES *word,
230  STATE *state,
231  DANGERR *fixpt,
232  STATE *best_state);
233  void delete_search(SEARCH_RECORD *the_search);
234  void expand_node(FLOAT32 worst_priority,
235  CHUNKS_RECORD *chunks_record,
236  SEARCH_RECORD *the_search);
237  void replace_char_widths(CHUNKS_RECORD *chunks_record,
238  SEARCH_STATE state);
239  // Transfers the given state to the word's output fields: rebuild_word,
240  // best_state, box_word, and returns the corresponding blob choices.
241  BLOB_CHOICE_LIST_VECTOR *rebuild_current_state(
242  WERD_RES *word,
243  STATE *state,
244  BLOB_CHOICE_LIST_VECTOR *char_choices,
245  MATRIX *ratings);
246  // Creates a fake blob choice from the combination of the given fragments.
247  // unichar is the class to be made from the combination,
248  // expanded_fragment_lengths[choice_index] is the number of fragments to use.
249  // old_choices[choice_index] has the classifier output for each fragment.
250  // choice index initially indexes the last fragment and should be decremented
251  // expanded_fragment_lengths[choice_index] times to get the earlier fragments.
252  // Guarantees to return something non-null, or abort!
253  BLOB_CHOICE* rebuild_fragments(
254  const char* unichar,
255  const char* expanded_fragment_lengths,
256  int choice_index,
257  BLOB_CHOICE_LIST_VECTOR *old_choices);
258  // Creates a joined copy of the blobs between x and y (inclusive) and
259  // insert into the rebuild_word in word.
260  // Returns a deep copy of the classifier results for the blob.
261  BLOB_CHOICE_LIST *join_blobs_and_classify(
262  WERD_RES* word, int x, int y, int choice_index, MATRIX *ratings,
263  BLOB_CHOICE_LIST_VECTOR *old_choices);
264  STATE *pop_queue(HEAP *queue);
265  void push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority,
266  FLOAT32 priority, bool debug);
267 
268  // segsearch.cpp
269  // SegSearch works on the lower diagonal matrix of BLOB_CHOICE_LISTs.
270  // Each entry in the matrix represents the classification choice
271  // for a chunk, i.e. an entry in row 2, column 1 represents the list
272  // of ratings for the chunks 1 and 2 classified as a single blob.
273  // The entries on the diagonal of the matrix are classifier choice lists
274  // for a single chunk from the maximal segmentation.
275  //
276  // The ratings matrix given to SegSearch represents the segmentation
277  // graph / trellis for the current word. The nodes in the graph are the
278  // individual BLOB_CHOICEs in each of the BLOB_CHOICE_LISTs in the ratings
279  // matrix. The children of each node (nodes connected by outgoing links)
280  // are the entries in the column that is equal to node's row+1. The parents
281  // (nodes connected by the incoming links) are the entries in the row that
282  // is equal to the node's column-1. Here is an example ratings matrix:
283  //
284  // 0 1 2 3 4
285  // -------------------------
286  // 0| c,( |
287  // 1| d l,1 |
288  // 2| o |
289  // 3| c,( |
290  // 4| g,y l,1 |
291  // -------------------------
292  //
293  // In the example above node "o" has children (outgoing connection to nodes)
294  // "c","(","g","y" and parents (incoming connections from nodes) "l","1","d".
295  //
296  // The objective of the search is to find the least cost path, where the cost
297  // is determined by the language model components and the properties of the
298  // cut between the blobs on the path. SegSearch starts by populating the
299  // matrix with the all the entries that were classified by the chopper and
300  // finding the initial best path. Based on the classifier ratings, language
301  // model scores and the properties of each cut, a list of "pain points" is
302  // constructed - those are the points on the path where the choices do not
303  // look consistent with the neighboring choices, the cuts look particularly
304  // problematic, or the certainties of the blobs are low. The most troublesome
305  // "pain point" is picked from the list and the new entry in the ratings
306  // matrix corresponding to this "pain point" is filled in. Then the language
307  // model state is updated to reflect the new classification and the new
308  // "pain points" are added to the list and the next most troublesome
309  // "pain point" is determined. This continues until either the word choice
310  // composed from the best paths in the segmentation graph is "good enough"
311  // (e.g. above a certain certainty threshold, is an unambiguous dictionary
312  // word, etc) or there are no more "pain points" to explore.
313  void SegSearch(CHUNKS_RECORD *chunks_record,
314  WERD_CHOICE *best_choice,
315  BLOB_CHOICE_LIST_VECTOR *best_char_choices,
316  WERD_CHOICE *raw_choice,
317  STATE *output_best_state,
318  BlamerBundle *blamer_bundle);
319 
320  // chop.cpp
321  PRIORITY point_priority(EDGEPT *point);
322  void add_point_to_list(POINT_GROUP point_list, EDGEPT *point);
323  int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3);
324  int is_little_chunk(EDGEPT *point1, EDGEPT *point2);
325  int is_small_area(EDGEPT *point1, EDGEPT *point2);
326  EDGEPT *pick_close_point(EDGEPT *critical_point,
327  EDGEPT *vertical_point,
328  int *best_dist);
329  void prioritize_points(TESSLINE *outline, POINT_GROUP points);
330  void new_min_point(EDGEPT *local_min, POINT_GROUP points);
331  void new_max_point(EDGEPT *local_max, POINT_GROUP points);
332  void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point,
333  EDGEPT** best_point,
334  EDGEPT_CLIST *new_points);
335 
336  // chopper.cpp
337  SEAM *attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number,
338  bool italic_blob, SEAMS seam_list);
339  SEAM *chop_numbered_blob(TWERD *word, inT32 blob_number,
340  bool italic_blob, SEAMS seam_list);
341  SEAM *chop_overlapping_blob(const GenericVector<TBOX>& boxes,
342  WERD_RES *word_res, inT32 *blob_number,
343  bool italic_blob, SEAMS seam_list);
344  bool improve_one_blob(WERD_RES *word_res,
345  BLOB_CHOICE_LIST_VECTOR *char_choices,
346  inT32 *blob_number,
347  SEAMS *seam_list,
348  DANGERR *fixpt,
349  bool split_next_to_fragment,
350  BlamerBundle *blamer_bundle);
351  void modify_blob_choice(BLOB_CHOICE_LIST *answer,
352  int chop_index);
353  bool chop_one_blob(TWERD *word,
354  BLOB_CHOICE_LIST_VECTOR *char_choices,
355  inT32 *blob_number,
356  SEAMS *seam_list,
357  int *right_chop_index);
358  bool chop_one_blob2(const GenericVector<TBOX>& boxes,
359  WERD_RES *word_res, SEAMS *seam_list);
360  BLOB_CHOICE_LIST_VECTOR *chop_word_main(WERD_RES *word);
361  void improve_by_chopping(WERD_RES *word,
362  BLOB_CHOICE_LIST_VECTOR *char_choices,
363  STATE *best_state,
364  BLOB_CHOICE_LIST_VECTOR *best_char_choices,
365  DANGERR *fixpt,
366  bool *updated_best_choice);
367  MATRIX *word_associator(bool only_create_ratings_matrtix,
368  WERD_RES *word,
369  STATE *state,
370  BLOB_CHOICE_LIST_VECTOR *best_char_choices,
371  DANGERR *fixpt,
372  STATE *best_state);
373  inT16 select_blob_to_split(const BLOB_CHOICE_LIST_VECTOR &char_choices,
374  float rating_ceiling,
375  bool split_next_to_fragment);
376  inT16 select_blob_to_split_from_fixpt(DANGERR *fixpt);
377  void set_chopper_blame(WERD_RES *word);
378 
379  // findseam.cpp
380  void junk_worst_seam(SEAM_QUEUE seams, SEAM *new_seam, float new_priority);
381  void choose_best_seam(SEAM_QUEUE seam_queue,
382  SEAM_PILE *seam_pile,
383  SPLIT *split,
384  PRIORITY priority,
385  SEAM **seam_result,
386  TBLOB *blob);
387  void combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, SEAM *seam);
388  inT16 constrained_split(SPLIT *split, TBLOB *blob);
389  void delete_seam_pile(SEAM_PILE seam_pile);
390  SEAM *pick_good_seam(TBLOB *blob);
391  PRIORITY seam_priority(SEAM *seam, inT16 xmin, inT16 xmax);
392  void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS],
393  inT16 num_points,
394  SEAM_QUEUE seam_queue,
395  SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob);
396  void try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
397  inT16 num_points,
398  EDGEPT_CLIST *new_points,
399  SEAM_QUEUE seam_queue,
400  SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob);
401 
402  // gradechop.cpp
403  PRIORITY full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax);
404  PRIORITY grade_center_of_blob(register BOUNDS_RECT rect);
405  PRIORITY grade_overlap(register BOUNDS_RECT rect);
406  PRIORITY grade_split_length(register SPLIT *split);
407  PRIORITY grade_sharpness(register SPLIT *split);
408  PRIORITY grade_width_change(register BOUNDS_RECT rect);
409  void set_outline_bounds(register EDGEPT *point1,
410  register EDGEPT *point2,
411  BOUNDS_RECT rect);
412 
413  // outlines.cpp
414  int crosses_outline(EDGEPT *p0, EDGEPT *p1, EDGEPT *outline);
415  int is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1);
416  int is_same_edgept(EDGEPT *p1, EDGEPT *p2);
417  bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1,
418  EDGEPT **near_pt);
419  void reverse_outline(EDGEPT *outline);
420 
421  // pieces.cpp
422  virtual BLOB_CHOICE_LIST *classify_piece(TBLOB *pieces,
423  const DENORM& denorm,
424  SEAMS seams,
425  inT16 start,
426  inT16 end,
427  BlamerBundle *blamer_bundle);
428  // Try to merge fragments in the ratings matrix and put the result in
429  // the corresponding row and column
430  void merge_fragments(MATRIX *ratings,
431  inT16 num_blobs);
432  // Recursively go through the ratings matrix to find lists of fragments
433  // to be merged in the function merge_and_put_fragment_lists.
434  // current_frag is the postion of the piece we are looking for.
435  // current_row is the row in the rating matrix we are currently at.
436  // start is the row we started initially, so that we can know where
437  // to append the results to the matrix. num_frag_parts is the total
438  // number of pieces we are looking for and num_blobs is the size of the
439  // ratings matrix.
440  void get_fragment_lists(inT16 current_frag,
441  inT16 current_row,
442  inT16 start,
443  inT16 num_frag_parts,
444  inT16 num_blobs,
445  MATRIX *ratings,
446  BLOB_CHOICE_LIST *choice_lists);
447  // Merge the fragment lists in choice_lists and append it to the
448  // ratings matrix
449  void merge_and_put_fragment_lists(inT16 row,
450  inT16 column,
451  inT16 num_frag_parts,
452  BLOB_CHOICE_LIST *choice_lists,
453  MATRIX *ratings);
454  // Filter the fragment list so that the filtered_choices only contain
455  // fragments that are in the correct position. choices is the list
456  // that we are going to filter. fragment_pos is the position in the
457  // fragment that we are looking for and num_frag_parts is the the
458  // total number of pieces. The result will be appended to
459  // filtered_choices.
460  void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices,
461  int fragment_pos,
462  int num_frag_parts,
463  BLOB_CHOICE_LIST *filtered_choices);
464  BLOB_CHOICE_LIST *get_piece_rating(MATRIX *ratings,
465  TBLOB *blobs,
466  const DENORM& denorm,
467  SEAMS seams,
468  inT16 start,
469  inT16 end,
470  BlamerBundle *blamer_bundle);
471  // returns an array of bounding boxes for the given list of blobs.
472  TBOX *record_blob_bounds(TBLOB *blobs);
473  MATRIX *record_piece_ratings(TBLOB *blobs);
474 
475  // heuristic.cpp
476  WIDTH_RECORD* state_char_widths(WIDTH_RECORD *chunk_widths,
477  STATE *state,
478  int num_joints);
479  FLOAT32 get_width_variance(WIDTH_RECORD *wrec, float norm_height);
480  FLOAT32 get_gap_variance(WIDTH_RECORD *wrec, float norm_height);
481  FLOAT32 prioritize_state(CHUNKS_RECORD *chunks_record,
482  SEARCH_RECORD *the_search);
483  FLOAT32 width_priority(CHUNKS_RECORD *chunks_record,
484  STATE *state,
485  int num_joints);
486  FLOAT32 seamcut_priority(SEAMS seams,
487  STATE *state,
488  int num_joints);
489  FLOAT32 rating_priority(CHUNKS_RECORD *chunks_record,
490  STATE *state,
491  int num_joints);
492 
493  // Member variables.
494 
503  // Stores the best choice for the previous word in the paragraph.
504  // This variable is modified by PAGE_RES_IT when iterating over
505  // words to OCR on the page.
507  // Sums of blame reasons computed by the blamer.
509  // Function used to fill char choice lattices.
510  void (Wordrec::*fill_lattice_)(const MATRIX &ratings,
511  const LIST &best_choices,
512  const UNICHARSET &unicharset,
513  BlamerBundle *blamer_bundle);
514 
515  protected:
516  inline bool SegSearchDone(int num_futile_classifications) {
517  return (language_model_->AcceptableChoiceFound() ||
518  num_futile_classifications >=
519  segsearch_max_futile_classifications);
520  }
521 
522  // Updates the language model state recorded for the child entries specified
523  // in pending[starting_col]. Enqueues the children of the updated entries
524  // into pending and proceeds to update (and remove from pending) all the
525  // remaining entries in pending[col] (col >= starting_col). Upon termination
526  // of this function all the pending[col] lists will be empty.
527  //
528  // The arguments:
529  //
530  // starting_col: index of the column in chunks_record->ratings from
531  // which the update should be started
532  //
533  // pending: list of entries listing chunks_record->ratings entries
534  // that should be updated
535  //
536  // pain_points: priority heap listing the pain points generated by
537  // the language model
538  //
539  // temp_pain_points: temporary storage for tentative pain points generated
540  // by the language model after a single call to LanguageModel::UpdateState()
541  // (the argument is passed in rather than created before each
542  // LanguageModel::UpdateState() call to avoid dynamic memory re-allocation)
543  //
544  // best_choice_bundle: a collection of variables that should be updated
545  // if a new best choice is found
546  //
547  void UpdateSegSearchNodes(int starting_col,
548  SEG_SEARCH_PENDING_LIST *pending[],
549  BestPathByColumn *best_path_by_column[],
550  CHUNKS_RECORD *chunks_record,
551  HEAP *pain_points,
552  BestChoiceBundle *best_choice_bundle,
553  BlamerBundle *blamer_bundle);
554 
555  // Process the given pain point: classify the corresponding blob, enqueue
556  // new pain points to join the newly classified blob with its neighbors.
557  void ProcessSegSearchPainPoint(float pain_point_priority,
558  const MATRIX_COORD &pain_point,
559  const WERD_CHOICE *best_choice,
560  SEG_SEARCH_PENDING_LIST *pending[],
561  CHUNKS_RECORD *chunks_record,
562  HEAP *pain_points,
563  BlamerBundle *blamer_bundle);
564 
565  // Add pain points for classifying blobs on the correct segmentation path
566  // (so that we can evaluate correct segmentation path and discover the reason
567  // for incorrect result).
568  void InitBlamerForSegSearch(const WERD_CHOICE *best_choice,
569  CHUNKS_RECORD *chunks_record,
570  HEAP *pain_points,
571  BlamerBundle *blamer_bundle,
572  STRING *blamer_debug);
573 
574  // Analyze the contents of BlamerBundle and set incorrect result reason.
575  void FinishBlamerForSegSearch(const WERD_CHOICE *best_choice,
576  BlamerBundle *blamer_bundle,
577  STRING *blamer_debug);
578 
579 };
580 
581 
582 } // namespace tesseract
583 
584 #endif // TESSERACT_WORDREC_WORDREC_H__