Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
textlineprojection.cpp
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #include "textlineprojection.h"
15 #include "allheaders.h"
16 #include "bbgrid.h" // Base class.
17 #include "blobbox.h" // BlobNeighourDir.
18 #include "blobs.h"
19 #include "colpartition.h"
20 #include "normalis.h"
21 
22 // Padding factor to use on definitely oriented blobs
23 const int kOrientedPadFactor = 8;
24 // Padding factor to use on not definitely oriented blobs.
25 const int kDefaultPadFactor = 2;
26 // Penalty factor for going away from the line center.
27 const int kWrongWayPenalty = 4;
28 // Ratio between parallel gap and perpendicular gap used to measure total
29 // distance of a box from a target box in curved textline space.
30 // parallel-gap is treated more favorably by this factor to allow catching
31 // quotes and elipsis at the end of textlines.
32 const int kParaPerpDistRatio = 4;
33 // Multiple of scale_factor_ that the inter-line gap must be before we start
34 // padding the increment box perpendicular to the text line.
35 const int kMinLineSpacingFactor = 4;
36 // Maximum tab-stop overrun for horizontal padding, in projection pixels.
37 const int kMaxTabStopOverrun = 6;
38 
39 namespace tesseract {
40 
42  : x_origin_(0), y_origin_(0), pix_(NULL) {
43  // The projection map should be about 100 ppi, whatever the input.
44  scale_factor_ = IntCastRounded(resolution / 100.0);
45  if (scale_factor_ < 1) scale_factor_ = 1;
46 }
48  pixDestroy(&pix_);
49 }
50 
51 // Build the projection profile given the input_block containing lists of
52 // blobs, a rotation to convert to image coords,
53 // and a full-resolution nontext_map, marking out areas to avoid.
54 // During construction, we have the following assumptions:
55 // The rotation is a multiple of 90 degrees, ie no deskew yet.
56 // The blobs have had their left and right rules set to also limit
57 // the range of projection.
59  const FCOORD& rotation,
60  Pix* nontext_map) {
61  pixDestroy(&pix_);
62  TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
63  x_origin_ = 0;
64  y_origin_ = image_box.height();
65  int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
66  int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
67 
68  pix_ = pixCreate(width, height, 8);
69  ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
70  ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
71  Pix* final_pix = pixBlockconv(pix_, 1, 1);
72 // Pix* final_pix = pixBlockconv(pix_, 2, 2);
73  pixDestroy(&pix_);
74  pix_ = final_pix;
75 }
76 
77 // Display the blobs in the window colored according to textline quality.
78 void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
79  ScrollView* win) {
80  #ifndef GRAPHICS_DISABLED
81  BLOBNBOX_IT it(blobs);
82  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
83  BLOBNBOX* blob = it.data();
84  const TBOX& box = blob->bounding_box();
85  bool bad_box = BoxOutOfHTextline(box, NULL, false);
86  if (blob->UniquelyVertical())
87  win->Pen(ScrollView::YELLOW);
88  else
89  win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
90  win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
91  }
92  win->Update();
93  #endif // GRAPHICS_DISABLED
94 }
95 
96 // Moves blobs that look like they don't sit well on a textline from the
97 // input blobs list to the output small_blobs list.
98 // This gets them away from initial textline finding to stop diacritics
99 // from forming incorrect textlines. (Introduced mainly to fix Thai.)
101  BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
102  BLOBNBOX_IT it(blobs);
103  BLOBNBOX_IT small_it(small_blobs);
104  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
105  BLOBNBOX* blob = it.data();
106  const TBOX& box = blob->bounding_box();
107  bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
108  box.bottom());
109  if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) {
110  blob->ClearNeighbours();
111  small_it.add_to_end(it.extract());
112  }
113  }
114 }
115 
116 // Create a window and display the projection in it.
118  int width = pixGetWidth(pix_);
119  int height = pixGetHeight(pix_);
120  Pix* pixc = pixCreate(width, height, 32);
121  int src_wpl = pixGetWpl(pix_);
122  int col_wpl = pixGetWpl(pixc);
123  uinT32* src_data = pixGetData(pix_);
124  uinT32* col_data = pixGetData(pixc);
125  for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
126  for (int x = 0; x < width; ++x) {
127  int pixel = GET_DATA_BYTE(src_data, x);
128  l_uint32 result;
129  if (pixel <= 17)
130  composeRGBPixel(0, 0, pixel * 15, &result);
131  else if (pixel <= 145)
132  composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
133  else
134  composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
135  col_data[x] = result;
136  }
137  }
138 #if 0
139  // TODO(rays) uncomment when scrollview can display non-binary images.
140  ScrollView* win = new ScrollView("Projection", 0, 0,
141  width, height, width, height);
142  win->Image(pixc, 0, 0);
143  win->Update();
144 #else
145  pixWrite("projection.png", pixc, IFF_PNG);
146 #endif
147  pixDestroy(&pixc);
148 }
149 
150 // Compute the distance of the box from the partition using curved projection
151 // space. As DistanceOfBoxFromBox, except that the direction is taken from
152 // the ColPartition and the median bounds of the ColPartition are used as
153 // the to_box.
155  const ColPartition& part,
156  const DENORM* denorm,
157  bool debug) const {
158  // Compute a partition box that uses the median top/bottom of the blobs
159  // within and median left/right for vertical.
160  TBOX part_box = part.bounding_box();
161  if (part.IsHorizontalType()) {
162  part_box.set_top(part.median_top());
163  part_box.set_bottom(part.median_bottom());
164  } else {
165  part_box.set_left(part.median_left());
166  part_box.set_right(part.median_right());
167  }
168  // Now use DistanceOfBoxFromBox to make the actual calculation.
169  return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
170  denorm, debug);
171 }
172 
173 // Compute the distance from the from_box to the to_box using curved
174 // projection space. Separation that involves a decrease in projection
175 // density (moving from the from_box to the to_box) is weighted more heavily
176 // than constant density, and an increase is weighted less.
177 // If horizontal_textline is true, then curved space is used vertically,
178 // as for a diacritic on the edge of a textline.
179 // The projection uses original image coords, so denorm is used to get
180 // back to the image coords from box/part space.
181 // How the calculation works: Think of a diacritic near a textline.
182 // Distance is measured from the far side of the from_box to the near side of
183 // the to_box. Shown is the horizontal textline case.
184 // |------^-----|
185 // | from | box |
186 // |------|-----|
187 // perpendicular |
188 // <------v-------->|--------------------|
189 // parallel | to box |
190 // |--------------------|
191 // Perpendicular distance uses "curved space" See VerticalDistance below.
192 // Parallel distance is linear.
193 // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
195  const TBOX& to_box,
196  bool horizontal_textline,
197  const DENORM* denorm,
198  bool debug) const {
199  // The parallel_gap is the horizontal gap between a horizontal textline and
200  // the box. Analogous for vertical.
201  int parallel_gap = 0;
202  // start_pt is the box end of the line to be modified for curved space.
203  TPOINT start_pt;
204  // end_pt is the partition end of the line to be modified for curved space.
205  TPOINT end_pt;
206  if (horizontal_textline) {
207  parallel_gap = from_box.x_gap(to_box) + from_box.width();
208  start_pt.x = (from_box.left() + from_box.right()) / 2;
209  end_pt.x = start_pt.x;
210  if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
211  start_pt.y = from_box.top();
212  end_pt.y = MIN(to_box.top(), start_pt.y);
213  } else {
214  start_pt.y = from_box.bottom();
215  end_pt.y = MAX(to_box.bottom(), start_pt.y);
216  }
217  } else {
218  parallel_gap = from_box.y_gap(to_box) + from_box.height();
219  if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
220  start_pt.x = from_box.right();
221  end_pt.x = MIN(to_box.right(), start_pt.x);
222  } else {
223  start_pt.x = from_box.left();
224  end_pt.x = MAX(to_box.left(), start_pt.x);
225  }
226  start_pt.y = (from_box.bottom() + from_box.top()) / 2;
227  end_pt.y = start_pt.y;
228  }
229  // The perpendicular gap is the max vertical distance gap out of:
230  // top of from_box to to_box top and bottom of from_box to to_box bottom.
231  // This value is then modified for curved projection space.
232  // Analogous for vertical.
233  int perpendicular_gap = 0;
234  // If start_pt == end_pt, then the from_box lies entirely within the to_box
235  // (in the perpendicular direction), so we don't need to calculate the
236  // perpendicular_gap.
237  if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
238  if (denorm != NULL) {
239  // Denormalize the start and end.
240  denorm->DenormTransform(start_pt, &start_pt);
241  denorm->DenormTransform(end_pt, &end_pt);
242  }
243  if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
244  perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
245  end_pt.y);
246  } else {
247  perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
248  start_pt.y);
249  }
250  }
251  // The parallel_gap weighs less than the perpendicular_gap.
252  return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
253 }
254 
255 // Compute the distance between (x, y1) and (x, y2) using the rule that
256 // a decrease in textline density is weighted more heavily than an increase.
257 // The coordinates are in source image space, ie processed by any denorm
258 // already, but not yet scaled by scale_factor_.
259 // Going from the outside of a textline to the inside should measure much
260 // less distance than going from the inside of a textline to the outside.
261 // How it works:
262 // An increase is cheap (getting closer to a textline).
263 // Constant costs unity.
264 // A decrease is expensive (getting further from a textline).
265 // Pixels in projection map Counted distance
266 // 2
267 // 3 1/x
268 // 3 1
269 // 2 x
270 // 5 1/x
271 // 7 1/x
272 // Total: 1 + x + 3/x where x = kWrongWayPenalty.
274  int y1, int y2) const {
275  x = ImageXToProjectionX(x);
276  y1 = ImageYToProjectionY(y1);
277  y2 = ImageYToProjectionY(y2);
278  if (y1 == y2) return 0;
279  int wpl = pixGetWpl(pix_);
280  int step = y1 < y2 ? 1 : -1;
281  uinT32* data = pixGetData(pix_) + y1 * wpl;
282  wpl *= step;
283  int prev_pixel = GET_DATA_BYTE(data, x);
284  int distance = 0;
285  int right_way_steps = 0;
286  for (int y = y1; y != y2; y += step) {
287  data += wpl;
288  int pixel = GET_DATA_BYTE(data, x);
289  if (debug)
290  tprintf("At (%d,%d), pix = %d, prev=%d\n",
291  x, y + step, pixel, prev_pixel);
292  if (pixel < prev_pixel)
293  distance += kWrongWayPenalty;
294  else if (pixel > prev_pixel)
295  ++right_way_steps;
296  else
297  ++distance;
298  prev_pixel = pixel;
299  }
300  return distance * scale_factor_ +
301  right_way_steps * scale_factor_ / kWrongWayPenalty;
302 }
303 
304 // Compute the distance between (x1, y) and (x2, y) using the rule that
305 // a decrease in textline density is weighted more heavily than an increase.
306 int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
307  int y) const {
308  x1 = ImageXToProjectionX(x1);
309  x2 = ImageXToProjectionX(x2);
310  y = ImageYToProjectionY(y);
311  if (x1 == x2) return 0;
312  int wpl = pixGetWpl(pix_);
313  int step = x1 < x2 ? 1 : -1;
314  uinT32* data = pixGetData(pix_) + y * wpl;
315  int prev_pixel = GET_DATA_BYTE(data, x1);
316  int distance = 0;
317  int right_way_steps = 0;
318  for (int x = x1; x != x2; x += step) {
319  int pixel = GET_DATA_BYTE(data, x + step);
320  if (debug)
321  tprintf("At (%d,%d), pix = %d, prev=%d\n",
322  x + step, y, pixel, prev_pixel);
323  if (pixel < prev_pixel)
324  distance += kWrongWayPenalty;
325  else if (pixel > prev_pixel)
326  ++right_way_steps;
327  else
328  ++distance;
329  prev_pixel = pixel;
330  }
331  return distance * scale_factor_ +
332  right_way_steps * scale_factor_ / kWrongWayPenalty;
333 }
334 
335 // Returns true if the blob appears to be outside of a textline.
336 // Such blobs are potentially diacritics (even if large in Thai) and should
337 // be kept away from initial textline finding.
339  const DENORM* denorm,
340  bool debug) const {
341  int grad1 = 0;
342  int grad2 = 0;
343  EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, NULL, NULL);
344  int worst_result = MIN(grad1, grad2);
345  int total_result = grad1 + grad2;
346  if (total_result >= 6) return false; // Strongly in textline.
347  // Medium strength: if either gradient is negative, it is likely outside
348  // the body of the textline.
349  if (worst_result < 0)
350  return true;
351  return false;
352 }
353 
354 // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
355 // but uses the median top/bottom for horizontal and median left/right for
356 // vertical instead of the bounding box edges.
357 // Evaluates for both horizontal and vertical and returns the best result,
358 // with a positive value for horizontal and a negative value for vertical.
360  const DENORM* denorm,
361  bool debug) const {
362  if (part.IsSingleton())
363  return EvaluateBox(part.bounding_box(), denorm, debug);
364  // Test vertical orientation.
365  TBOX box = part.bounding_box();
366  // Use the partition median for left/right.
367  box.set_left(part.median_left());
368  box.set_right(part.median_right());
369  int vresult = EvaluateBox(box, denorm, debug);
370 
371  // Test horizontal orientation.
372  box = part.bounding_box();
373  // Use the partition median for top/bottom.
374  box.set_top(part.median_top());
375  box.set_bottom(part.median_bottom());
376  int hresult = EvaluateBox(box, denorm, debug);
377  if (debug) {
378  tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
379  part.bounding_box().print();
380  part.Print();
381  }
382  return hresult >= -vresult ? hresult : vresult;
383 }
384 
385 // Computes the mean projection gradients over the horizontal and vertical
386 // edges of the box:
387 // -h-h-h-h-h-h
388 // |------------| mean=htop -v|+v--------+v|-v
389 // |+h+h+h+h+h+h| -v|+v +v|-v
390 // | | -v|+v +v|-v
391 // | box | -v|+v box +v|-v
392 // | | -v|+v +v|-v
393 // |+h+h+h+h+h+h| -v|+v +v|-v
394 // |------------| mean=hbot -v|+v--------+v|-v
395 // -h-h-h-h-h-h
396 // mean=vleft mean=vright
397 //
398 // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
399 // for a horizontal textline, a negative number for a vertical textline,
400 // and near zero for undecided. Undecided is most likely non-text.
401 // All the gradients are truncated to remain non-negative, since negative
402 // horizontal gradients don't give any indication of being vertical and
403 // vice versa.
404 // Additional complexity: The coordinates have to be transformed to original
405 // image coordinates with denorm (if not null), scaled to match the projection
406 // pix, and THEN step out 2 pixels each way from the edge to compute the
407 // gradient, and tries 3 positions, each measuring the gradient over a
408 // 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by
409 // several layers of helpers below.
410 int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
411  bool debug) const {
412  return EvaluateBoxInternal(box, denorm, debug, NULL, NULL, NULL, NULL);
413 }
414 
415 // Internal version of EvaluateBox returns the unclipped gradients as well
416 // as the result of EvaluateBox.
417 // hgrad1 and hgrad2 are the gradients for the horizontal textline.
418 int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
419  const DENORM* denorm, bool debug,
420  int* hgrad1, int* hgrad2,
421  int* vgrad1, int* vgrad2) const {
422  int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
423  box.top(), true);
424  int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
425  box.bottom(), false);
426  int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
427  box.top(), true);
428  int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
429  box.bottom(), box.top(),
430  false);
431  int top_clipped = MAX(top_gradient, 0);
432  int bottom_clipped = MAX(bottom_gradient, 0);
433  int left_clipped = MAX(left_gradient, 0);
434  int right_clipped = MAX(right_gradient, 0);
435  if (debug) {
436  tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
437  top_gradient, bottom_gradient, left_gradient, right_gradient);
438  box.print();
439  }
440  int result = MAX(top_clipped, bottom_clipped) -
441  MAX(left_clipped, right_clipped);
442  if (hgrad1 != NULL && hgrad2 != NULL) {
443  *hgrad1 = top_gradient;
444  *hgrad2 = bottom_gradient;
445  }
446  if (vgrad1 != NULL && vgrad2 != NULL) {
447  *vgrad1 = left_gradient;
448  *vgrad2 = right_gradient;
449  }
450  return result;
451 }
452 
453 // Helper returns the mean gradient value for the horizontal row at the given
454 // y, (in the external coordinates) by subtracting the mean of the transformed
455 // row 2 pixels above from the mean of the transformed row 2 pixels below.
456 // This gives a positive value for a good top edge and negative for bottom.
457 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
458 int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
459  inT16 min_x, inT16 max_x, inT16 y,
460  bool best_is_max) const {
461  TPOINT start_pt(min_x, y);
462  TPOINT end_pt(max_x, y);
463  int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
464  int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
465  int best_gradient = lower - upper;
466  upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
467  lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
468  int gradient = lower - upper;
469  if ((gradient > best_gradient) == best_is_max)
470  best_gradient = gradient;
471  upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
472  lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
473  gradient = lower - upper;
474  if ((gradient > best_gradient) == best_is_max)
475  best_gradient = gradient;
476  return best_gradient;
477 }
478 
479 // Helper returns the mean gradient value for the vertical column at the
480 // given x, (in the external coordinates) by subtracting the mean of the
481 // transformed column 2 pixels left from the mean of the transformed column
482 // 2 pixels to the right.
483 // This gives a positive value for a good left edge and negative for right.
484 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
485 int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, inT16 x,
486  inT16 min_y, inT16 max_y,
487  bool best_is_max) const {
488  TPOINT start_pt(x, min_y);
489  TPOINT end_pt(x, max_y);
490  int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
491  int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
492  int best_gradient = right - left;
493  left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
494  right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
495  int gradient = right - left;
496  if ((gradient > best_gradient) == best_is_max)
497  best_gradient = gradient;
498  left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
499  right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
500  gradient = right - left;
501  if ((gradient > best_gradient) == best_is_max)
502  best_gradient = gradient;
503  return best_gradient;
504 }
505 
506 // Helper returns the mean pixel value over the line between the start_pt and
507 // end_pt (inclusive), but shifted perpendicular to the line in the projection
508 // image by offset pixels. For simplicity, it is assumed that the vector is
509 // either nearly horizontal or nearly vertical. It works on skewed textlines!
510 // The end points are in external coordinates, and will be denormalized with
511 // the denorm if not NULL before further conversion to pix coordinates.
512 // After all the conversions, the offset is added to the direction
513 // perpendicular to the line direction. The offset is thus in projection image
514 // coordinates, which allows the caller to get a guaranteed displacement
515 // between pixels used to calculate gradients.
516 int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
517  int offset,
518  TPOINT start_pt,
519  TPOINT end_pt) const {
520  TransformToPixCoords(denorm, &start_pt);
521  TransformToPixCoords(denorm, &end_pt);
522  TruncateToImageBounds(&start_pt);
523  TruncateToImageBounds(&end_pt);
524  int wpl = pixGetWpl(pix_);
525  uinT32* data = pixGetData(pix_);
526  int total = 0;
527  int count = 0;
528  int x_delta = end_pt.x - start_pt.x;
529  int y_delta = end_pt.y - start_pt.y;
530  if (abs(x_delta) >= abs(y_delta)) {
531  if (x_delta == 0)
532  return 0;
533  // Horizontal line. Add the offset vertically.
534  int x_step = x_delta > 0 ? 1 : -1;
535  // Correct offset for rotation, keeping it anti-clockwise of the delta.
536  offset *= x_step;
537  start_pt.y += offset;
538  end_pt.y += offset;
539  TruncateToImageBounds(&start_pt);
540  TruncateToImageBounds(&end_pt);
541  x_delta = end_pt.x - start_pt.x;
542  y_delta = end_pt.y - start_pt.y;
543  count = x_delta * x_step + 1;
544  for (int x = start_pt.x; x != end_pt.x; x += x_step) {
545  int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
546  total += GET_DATA_BYTE(data + wpl * y, x);
547  }
548  } else {
549  // Vertical line. Add the offset horizontally.
550  int y_step = y_delta > 0 ? 1 : -1;
551  // Correct offset for rotation, keeping it anti-clockwise of the delta.
552  // Pix holds the image with y=0 at the top, so the offset is negated.
553  offset *= -y_step;
554  start_pt.x += offset;
555  end_pt.x += offset;
556  TruncateToImageBounds(&start_pt);
557  TruncateToImageBounds(&end_pt);
558  x_delta = end_pt.x - start_pt.x;
559  y_delta = end_pt.y - start_pt.y;
560  count = y_delta * y_step + 1;
561  for (int y = start_pt.y; y != end_pt.y; y += y_step) {
562  int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
563  total += GET_DATA_BYTE(data + wpl * y, x);
564  }
565  }
566  return DivRounded(total, count);
567 }
568 
569 // Given an input pix, and a box, the sides of the box are shrunk inwards until
570 // they bound any black pixels found within the original box.
571 // The function converts between tesseract coords and the pix coords assuming
572 // that this pix is full resolution equal in size to the original image.
573 // Returns an empty box if there are no black pixels in the source box.
574 static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
575  int im_height = pixGetHeight(pix);
576  Box* input_box = boxCreate(box.left(), im_height - box.top(),
577  box.width(), box.height());
578  Box* output_box = NULL;
579  pixClipBoxToForeground(pix, input_box, NULL, &output_box);
580  TBOX result_box;
581  if (output_box != NULL) {
582  l_int32 x, y, width, height;
583  boxGetGeometry(output_box, &x, &y, &width, &height);
584  result_box.set_left(x);
585  result_box.set_right(x + width);
586  result_box.set_top(im_height - y);
587  result_box.set_bottom(result_box.top() - height);
588  boxDestroy(&output_box);
589  }
590  boxDestroy(&input_box);
591  return result_box;
592 }
593 
594 // Splits the given box in half at x_middle or y_middle according to split_on_x
595 // and checks for nontext_map pixels in each half. Reduces the bbox so that it
596 // still includes the middle point, but does not touch any fg pixels in
597 // nontext_map. An empty box may be returned if there is no such box.
598 static void TruncateBoxToMissNonText(int x_middle, int y_middle,
599  bool split_on_x, Pix* nontext_map,
600  TBOX* bbox) {
601  TBOX box1(*bbox);
602  TBOX box2(*bbox);
603  TBOX im_box;
604  if (split_on_x) {
605  box1.set_right(x_middle);
606  im_box = BoundsWithinBox(nontext_map, box1);
607  if (!im_box.null_box()) box1.set_left(im_box.right());
608  box2.set_left(x_middle);
609  im_box = BoundsWithinBox(nontext_map, box2);
610  if (!im_box.null_box()) box2.set_right(im_box.left());
611  } else {
612  box1.set_bottom(y_middle);
613  im_box = BoundsWithinBox(nontext_map, box1);
614  if (!im_box.null_box()) box1.set_top(im_box.bottom());
615  box2.set_top(y_middle);
616  im_box = BoundsWithinBox(nontext_map, box2);
617  if (!im_box.null_box()) box2.set_bottom(im_box.top());
618  }
619  box1 += box2;
620  *bbox = box1;
621 }
622 
623 
624 // Helper function to add 1 to a rectangle in source image coords to the
625 // internal projection pix_.
626 void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
627  int scaled_left = ImageXToProjectionX(box.left());
628  int scaled_top = ImageYToProjectionY(box.top());
629  int scaled_right = ImageXToProjectionX(box.right());
630  int scaled_bottom = ImageYToProjectionY(box.bottom());
631  int wpl = pixGetWpl(pix_);
632  uinT32* data = pixGetData(pix_) + scaled_top * wpl;
633  for (int y = scaled_top; y <= scaled_bottom; ++y) {
634  for (int x = scaled_left; x <= scaled_right; ++x) {
635  int pixel = GET_DATA_BYTE(data, x);
636  if (pixel < 255)
637  SET_DATA_BYTE(data, x, pixel + 1);
638  }
639  data += wpl;
640  }
641 }
642 
643 // Inserts a list of blobs into the projection.
644 // Rotation is a multiple of 90 degrees to get from blob coords to
645 // nontext_map coords, nontext_map_box is the bounds of the nontext_map.
646 // Blobs are spread horizontally or vertically according to their internal
647 // flags, but the spreading is truncated by set pixels in the nontext_map
648 // and also by the horizontal rule line limits on the blobs.
649 void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
650  const FCOORD& rotation,
651  const TBOX& nontext_map_box,
652  Pix* nontext_map) {
653  BLOBNBOX_IT blob_it(blobs);
654  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
655  BLOBNBOX* blob = blob_it.data();
656  TBOX bbox = blob->bounding_box();
657  ICOORD middle((bbox.left() + bbox.right()) / 2,
658  (bbox.bottom() + bbox.top()) / 2);
659  bool spreading_horizontally = PadBlobBox(blob, &bbox);
660  // Rotate to match the nontext_map.
661  bbox.rotate(rotation);
662  middle.rotate(rotation);
663  if (rotation.x() == 0.0f)
664  spreading_horizontally = !spreading_horizontally;
665  // Clip to the image before applying the increments.
666  bbox &= nontext_map_box; // This is in-place box intersection.
667  // Check for image pixels before spreading.
668  TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
669  nontext_map, &bbox);
670  if (bbox.area() > 0) {
671  IncrementRectangle8Bit(bbox);
672  }
673  }
674 }
675 
676 // Pads the bounding box of the given blob according to whether it is on
677 // a horizontal or vertical text line, taking into account tab-stops near
678 // the blob. Returns true if padding was in the horizontal direction.
679 bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
680  // Determine which direction to spread.
681  // If text is well spaced out, it can be useful to pad perpendicular to
682  // the textline direction, so as to ensure diacritics get absorbed
683  // correctly, but if the text is tightly spaced, this will destroy the
684  // blank space between textlines in the projection map, and that would
685  // be very bad.
686  int pad_limit = scale_factor_ * kMinLineSpacingFactor;
687  int xpad = 0;
688  int ypad = 0;
689  bool padding_horizontally = false;
690  if (blob->UniquelyHorizontal()) {
691  xpad = bbox->height() * kOrientedPadFactor;
692  padding_horizontally = true;
693  // If the text appears to be very well spaced, pad the other direction by a
694  // single pixel in the projection profile space to help join diacritics to
695  // the textline.
696  if ((blob->neighbour(BND_ABOVE) == NULL ||
697  bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
698  (blob->neighbour(BND_BELOW) == NULL ||
699  bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
700  ypad = scale_factor_;
701  }
702  } else if (blob->UniquelyVertical()) {
703  ypad = bbox->width() * kOrientedPadFactor;
704  if ((blob->neighbour(BND_LEFT) == NULL ||
705  bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
706  (blob->neighbour(BND_RIGHT) == NULL ||
707  bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
708  xpad = scale_factor_;
709  }
710  } else {
711  if ((blob->neighbour(BND_ABOVE) != NULL &&
712  blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
713  (blob->neighbour(BND_BELOW) != NULL &&
714  blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
715  ypad = bbox->width() * kDefaultPadFactor;
716  }
717  if ((blob->neighbour(BND_RIGHT) != NULL &&
718  blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
719  (blob->neighbour(BND_LEFT) != NULL &&
720  blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
721  xpad = bbox->height() * kDefaultPadFactor;
722  padding_horizontally = true;
723  }
724  }
725  bbox->pad(xpad, ypad);
726  pad_limit = scale_factor_ * kMaxTabStopOverrun;
727  // Now shrink horizontally to avoid stepping more than pad_limit over a
728  // tab-stop.
729  if (bbox->left() < blob->left_rule() - pad_limit) {
730  bbox->set_left(blob->left_rule() - pad_limit);
731  }
732  if (bbox->right() > blob->right_rule() + pad_limit) {
733  bbox->set_right(blob->right_rule() + pad_limit);
734  }
735  return padding_horizontally;
736 }
737 
738 // Helper denormalizes the TPOINT with the denorm if not NULL, then
739 // converts to pix_ coordinates.
740 void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
741  TPOINT* pt) const {
742  if (denorm != NULL) {
743  // Denormalize the point.
744  denorm->DenormTransform(*pt, pt);
745  }
746  pt->x = ImageXToProjectionX(pt->x);
747  pt->y = ImageYToProjectionY(pt->y);
748 }
749 
750 // Helper truncates the TPOINT to be within the pix_.
751 void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
752  pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
753  pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
754 }
755 
756 // Transform tesseract image coordinates to coordinates used in the projection.
757 int TextlineProjection::ImageXToProjectionX(int x) const {
758  x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
759  return x;
760 }
761 int TextlineProjection::ImageYToProjectionY(int y) const {
762  y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
763  return y;
764 }
765 
766 } // namespace tesseract.