21 #pragma warning(disable:4244) // Conversion warnings
33 #include "config_auto.h"
59 "max fraction of mean blob width allowed for vertical gaps in vertical text");
62 "Fraction of box matches required to declare a line vertical");
69 TabConstraint_LIST* constraints =
new TabConstraint_LIST;
70 TabConstraint_IT it(constraints);
71 it.add_to_end(constraint);
73 vector->set_top_constraints(constraints);
75 vector->set_bottom_constraints(constraints);
80 TabConstraint_LIST* list2) {
86 tprintf(
"Testing constraint compatibility\n");
87 GetConstraints(list1, &y_min, &y_max);
88 GetConstraints(list2, &y_min, &y_max);
90 tprintf(
"Resulting range = [%d,%d]\n", y_min, y_max);
91 return y_max >= y_min;
97 TabConstraint_LIST* list2) {
100 TabConstraint_IT it(list2);
102 tprintf(
"Merging constraints\n");
104 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
107 constraint->vector_->
Print(
"Merge");
108 if (constraint->is_top_)
114 it.add_list_before(list2);
123 GetConstraints(constraints, &y_min, &y_max);
124 int y = (y_min + y_max) / 2;
125 TabConstraint_IT it(constraints);
126 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
129 if (constraint->is_top_) {
141 : vector_(vector), is_top_(is_top) {
143 y_min_ = vector->
endpt().
y();
152 void TabConstraint::GetConstraints(TabConstraint_LIST* constraints,
153 int* y_min,
int* y_max) {
154 TabConstraint_IT it(constraints);
155 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
158 tprintf(
"Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
159 constraint->vector_->Print(
" for");
161 *y_min =
MAX(*y_min, constraint->y_min_);
162 *y_max =
MIN(*y_max, constraint->y_max_);
184 int extended_start_y,
int extended_end_y,
185 BLOBNBOX_CLIST* good_points,
186 int* vertical_x,
int* vertical_y) {
188 alignment, good_points);
189 if (!vector->
Fit(vertical,
false)) {
194 vertical = vector->endpt_ - vector->startpt_;
196 *vertical_x += vertical.
x() * weight;
197 *vertical_y += vertical.
y() * weight;
207 : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
208 sort_key_(0), percent_score_(0), mean_width_(0),
209 needs_refit_(true), needs_evaluation_(true), intersects_other_lines_(false),
210 alignment_(alignment),
211 top_constraints_(
NULL), bottom_constraints_(
NULL) {
212 BLOBNBOX_C_IT it(&boxes_);
222 sort_key_ =
SortKey(vertical_skew,
223 (startpt_.
x() + endpt_.
x()) / 2,
224 (startpt_.
y() + endpt_.
y()) / 2);
226 Print(
"Constructed a new tab vector:");
236 copy->startpt_ = startpt_;
237 copy->endpt_ = endpt_;
238 copy->alignment_ = alignment_;
239 copy->extended_ymax_ = extended_ymax_;
240 copy->extended_ymin_ = extended_ymin_;
241 copy->intersects_other_lines_ = intersects_other_lines_;
249 BLOBNBOX_C_IT it(&boxes_);
253 while (!it.at_last() && box.
top() <= new_box.
top()) {
254 if (blob == new_blob)
260 if (box.
top() >= new_box.
top()) {
261 it.add_before_stay_put(new_blob);
267 it.add_after_stay_put(new_blob);
273 startpt_.
set_y(start_y);
283 startpt_.
rotate(rotation);
285 int dx = endpt_.
x() - startpt_.
x();
286 int dy = endpt_.
y() - startpt_.
y();
287 if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
307 TabVector_C_IT it(&partners_);
309 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
311 if (partner->top_constraints_ ==
NULL ||
312 partner->bottom_constraints_ ==
NULL) {
313 partner->
Print(
"Impossible: has no constraints");
314 Print(
"This vector has it as a partner");
317 if (prev_partner ==
NULL) {
320 partner->bottom_constraints_))
322 partner->bottom_constraints_);
326 partner->bottom_constraints_))
328 partner->bottom_constraints_);
330 prev_partner = partner;
334 partner->top_constraints_))
336 partner->top_constraints_);
344 partner->bottom_constraints_))
346 partner->bottom_constraints_);
348 partner->top_constraints_))
350 partner->top_constraints_);
355 if (top_constraints_ !=
NULL)
357 if (bottom_constraints_ !=
NULL)
363 TabVector_LIST* vectors,
365 TabVector_IT it1(vectors);
366 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
368 TabVector_IT it2(it1);
369 for (it2.forward(); !it2.at_first(); it2.forward()) {
371 if (v2->
SimilarTo(vertical, *v1, grid)) {
375 v2->
Print(
"Merging");
376 v1->
Print(
"by deleting");
380 v2->
Print(
"Producing");
383 merged_vector -= v2->
startpt();
384 if (abs(merged_vector.
x()) > 100) {
385 v2->
Print(
"Garbage result of merge?");
403 int v_scale = abs(vertical.
y());
423 sort_key_ < other.sort_key_) ?
this : &other;
424 int top_y = mover->endpt_.
y();
425 int bottom_y = mover->startpt_.
y();
426 int left =
MIN(mover->
XAtY(top_y), mover->
XAtY(bottom_y));
427 int right =
MAX(mover->
XAtY(top_y), mover->
XAtY(bottom_y));
428 int shift = abs(sort_key_ - other.sort_key_) / v_scale;
440 if (box.
top() > bottom_y)
445 int right_at_box = left_at_box;
447 right_at_box += shift;
449 left_at_box -= shift;
460 extended_ymin_ =
MIN(extended_ymin_, other->extended_ymin_);
461 extended_ymax_ =
MAX(extended_ymax_, other->extended_ymax_);
463 alignment_ = other->alignment_;
466 BLOBNBOX_C_IT it1(&boxes_);
467 BLOBNBOX_C_IT it2(&other->boxes_);
468 while (!it2.empty()) {
474 while (box1.
bottom() < box2.
bottom() && !it1.at_last()) {
480 it1.add_to_end(bbox2);
481 }
else if (bbox1 != bbox2) {
482 it1.add_before_stay_put(bbox2);
496 TabVector_C_IT it(&partners_);
499 if (it.data() == partner)
502 it.add_after_then_move(partner);
507 TabVector_C_IT it(&partners_);
508 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
509 if (it.data() == other)
528 tprintf(
"%s <null>\n", prefix);
530 tprintf(
"%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
533 startpt_.
x(), startpt_.
y(), endpt_.
x(), endpt_.
y(),
534 mean_width_, percent_score_, sort_key_,
535 boxes_.length(), partners_.length());
542 BLOBNBOX_C_IT it(&boxes_);
543 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
546 tprintf(
"Box at (%d,%d)->(%d,%d)\n",
553 #ifndef GRAPHICS_DISABLED
566 tab_win->
Line(startpt_.
x(), startpt_.
y(), endpt_.
x(), endpt_.
y());
568 tab_win->
Line(startpt_.
x(), startpt_.
y(), startpt_.
x(), extended_ymin_);
569 tab_win->
Line(endpt_.
x(), extended_ymax_, endpt_.
x(), endpt_.
y());
571 snprintf(score_buf,
sizeof(score_buf),
"%d", percent_score_);
573 tab_win->
Text(startpt_.
x(), startpt_.
y(), score_buf);
582 if (needs_evaluation_)
594 needs_evaluation_ =
false;
595 int length = endpt_.
y() - startpt_.
y();
596 if (length == 0 || boxes_.empty()) {
598 Print(
"Zero length in evaluate");
602 BLOBNBOX_C_IT it(&boxes_);
604 int height_count = 0;
605 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
608 int height = box.
height();
609 mean_height += height;
612 mean_height /= height_count;
620 STATS gutters(0, max_gutter + 1);
624 int num_deleted_boxes = 0;
625 bool text_on_image =
false;
628 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
631 int mid_y = (box.
top() + box.
bottom()) / 2;
634 tprintf(
"After already deleting %d boxes, ", num_deleted_boxes);
635 Print(
"Starting evaluation");
643 int tab_x =
XAtY(mid_y);
647 bbox, &gutter_width, &neighbour_gap);
649 tprintf(
"Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
651 gutter_width, neighbour_gap);
657 gutters.
add(gutter_width, 1);
661 if (prev_good_box !=
NULL) {
662 int vertical_gap = box.
bottom() - prev_good_box->
top();
663 double size1 = sqrt(static_cast<double>(prev_good_box->
area()));
664 double size2 = sqrt(static_cast<double>(box.
area()));
666 good_length += vertical_gap;
668 tprintf(
"Box and prev good, gap=%d, target %g, goodlength=%d\n",
676 prev_good_box = &box;
678 text_on_image =
true;
682 tprintf(
"Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n",
684 gutter_width, neighbour_gap);
691 Print(
"Evaluating:");
696 int search_top = endpt_.
y();
697 int search_bottom = startpt_.
y();
700 prev_good_box =
NULL;
701 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
704 int mid_y = (box.
top() + box.
bottom()) / 2;
708 int tab_x =
XAtY(mid_y);
718 bbox, &gutter_width, &neighbour_gap);
721 if (prev_good_box ==
NULL) {
724 search_bottom = box.
top();
726 prev_good_box = &box;
727 search_top = box.
bottom();
731 tprintf(
"Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
733 gutter_width, median_gutter);
736 ++num_deleted_boxes =
true;
741 if (prev_good_box !=
NULL) {
744 int length = endpt_.
y() - startpt_.
y();
745 percent_score_ = 100 * good_length / length;
746 if (num_deleted_boxes > 0) {
754 if (search_bottom > search_top) {
755 search_bottom = startpt_.
y();
756 search_top = endpt_.
y();
760 min_gutter_width *= mean_height;
762 if (median_gutter > max_gutter_width)
763 max_gutter_width = median_gutter;
764 int gutter_width = finder->
GutterWidth(search_bottom, search_top, *
this,
765 text_on_image, max_gutter_width,
767 if (gutter_width < min_gutter_width) {
769 tprintf(
"Rejecting bad tab Vector with %d gutter vs %g min\n",
770 gutter_width, min_gutter_width);
772 boxes_.shallow_clear();
775 tprintf(
"Final gutter %d, vs limit of %g, required shift = %d\n",
776 gutter_width, min_gutter_width, required_shift);
784 Print(
"Evaluation complete:");
794 needs_refit_ =
false;
795 if (boxes_.empty()) {
804 sort_key_ =
SortKey(vertical, midpt.
x(), midpt.
y());
805 return startpt_.
y() != endpt_.
y();
807 if (!force_parallel && !
IsRagged()) {
810 BLOBNBOX_C_IT it(&boxes_);
812 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
817 linepoints.
Add(boxpt);
820 linepoints.
Add(top_pt);
823 linepoints.
Fit(&startpt_, &endpt_);
824 if (startpt_.
y() != endpt_.
y()) {
826 vertical -= startpt_;
829 int start_y = startpt_.
y();
830 int end_y = endpt_.
y();
832 BLOBNBOX_C_IT it(&boxes_);
837 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
840 mean_width_ += box.
width();
845 int bottom_y = box.
bottom();
846 int top_y = box.
top();
847 int key =
SortKey(vertical, x1, bottom_y);
850 startpt_ =
ICOORD(x1, bottom_y);
852 key =
SortKey(vertical, x1, top_y);
855 startpt_ =
ICOORD(x1, top_y);
862 if (width_count > 0) {
863 mean_width_ = (mean_width_ + width_count - 1) / width_count;
865 endpt_ = startpt_ + vertical;
866 needs_evaluation_ =
true;
867 if (start_y != end_y) {
869 startpt_.
set_x(
XAtY(vertical, sort_key_, start_y));
870 startpt_.
set_y(start_y);
871 endpt_.
set_x(
XAtY(vertical, sort_key_, end_y));
880 if (!partners_.singleton())
882 TabVector_C_IT partner_it(&partners_);
890 if (!partners_.singleton())
892 TabVector_C_IT partner_it(&partners_);
894 BLOBNBOX_C_IT box_it1(&boxes_);
895 BLOBNBOX_C_IT box_it2(&partner->boxes_);
899 Print(
"Testing for vertical text");
900 partner->
Print(
" partner");
903 int num_unmatched = 0;
904 int total_widths = 0;
908 STATS gaps(0, width * 2);
910 box_it2.mark_cycle_pt();
911 for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
914 if (prev_bbox !=
NULL) {
917 while (!box_it2.cycled_list() && box_it2.data() != bbox &&
921 if (!box_it2.cycled_list() && box_it2.data() == bbox &&
927 total_widths += box.
width();
930 double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
932 int min_box_match =
static_cast<int>((num_matched + num_unmatched) *
934 bool is_vertical = (gaps.
get_total() > 0 &&
935 num_matched >= min_box_match &&
936 gaps.
median() <= max_gap);
938 tprintf(
"gaps=%d, matched=%d, unmatched=%d, min_match=%d "
939 "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
940 gaps.
get_total(), num_matched, num_unmatched, min_box_match,
941 gaps.
median(), avg_width, max_gap, is_vertical?
"Yes":
"No");
943 return (is_vertical) ? partner :
NULL;
949 : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax),
950 sort_key_(0), percent_score_(0), mean_width_(0),
951 needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
952 top_constraints_(
NULL), bottom_constraints_(
NULL) {
953 BLOBNBOX_C_IT it(&boxes_);
954 it.add_list_after(boxes);
960 void TabVector::Delete(TabVector* replacement) {
961 TabVector_C_IT it(&partners_);
962 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
964 TabVector_C_IT p_it(&partner->partners_);
967 TabVector* partner_replacement = replacement;
968 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
970 if (p_partner == partner_replacement) {
971 partner_replacement =
NULL;
976 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
978 if (p_partner ==
this) {
980 if (partner_replacement !=
NULL)
981 p_it.add_before_stay_put(partner_replacement);
984 if (partner_replacement !=
NULL) {
985 partner_replacement->AddPartner(partner);