Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::ImageFind Class Reference

#include <imagefind.h>

List of all members.

Static Public Member Functions

static Pix * FindImages (Pix *pix)
static void ConnCompAndRectangularize (Pix *pix, Boxa **boxa, Pixa **pixa)
static bool pixNearlyRectangular (Pix *pix, double min_fraction, double max_fraction, double max_skew_gradient, int *x_start, int *y_start, int *x_end, int *y_end)
static bool BoundsWithinRect (Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end)
static double ColorDistanceFromLine (const uinT8 *line1, const uinT8 *line2, const uinT8 *point)
static uinT32 ComposeRGB (uinT32 r, uinT32 g, uinT32 b)
static uinT8 ClipToByte (double pixel)
static void ComputeRectangleColors (const TBOX &rect, Pix *pix, int factor, Pix *color_map1, Pix *color_map2, Pix *rms_map, uinT8 *color1, uinT8 *color2)
static bool BlankImageInBetween (const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
static int CountPixelsInRotatedBox (TBOX box, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
static void TransferImagePartsToImageMask (const FCOORD &rerotation, ColPartitionGrid *part_grid, Pix *image_mask)
static void FindImagePartitions (Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)

Detailed Description

Definition at line 42 of file imagefind.h.


Member Function Documentation

bool tesseract::ImageFind::BlankImageInBetween ( const TBOX box1,
const TBOX box2,
const TBOX im_box,
const FCOORD rotation,
Pix *  pix 
)
static

Definition at line 553 of file imagefind.cpp.

{
TBOX search_box(box1);
search_box += box2;
if (box1.x_gap(box2) >= box1.y_gap(box2)) {
if (box1.x_gap(box2) <= 0)
return true;
search_box.set_left(MIN(box1.right(), box2.right()));
search_box.set_right(MAX(box1.left(), box2.left()));
} else {
if (box1.y_gap(box2) <= 0)
return true;
search_box.set_top(MAX(box1.bottom(), box2.bottom()));
search_box.set_bottom(MIN(box1.top(), box2.top()));
}
return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
}
bool tesseract::ImageFind::BoundsWithinRect ( Pix *  pix,
int *  x_start,
int *  y_start,
int *  x_end,
int *  y_end 
)
static

Definition at line 309 of file imagefind.cpp.

{
Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
*y_end - *y_start);
Box* output_box = NULL;
pixClipBoxToForeground(pix, input_box, NULL, &output_box);
bool result = output_box != NULL;
if (result) {
l_int32 x, y, width, height;
boxGetGeometry(output_box, &x, &y, &width, &height);
*x_start = x;
*y_start = y;
*x_end = x + width;
*y_end = y + height;
boxDestroy(&output_box);
}
boxDestroy(&input_box);
return result;
}
uinT8 tesseract::ImageFind::ClipToByte ( double  pixel)
static

Definition at line 373 of file imagefind.cpp.

{
if (pixel < 0.0)
return 0;
else if (pixel >= 255.0)
return 255;
return static_cast<uinT8>(pixel);
}
double tesseract::ImageFind::ColorDistanceFromLine ( const uinT8 line1,
const uinT8 line2,
const uinT8 point 
)
static

Definition at line 332 of file imagefind.cpp.

{
int line_vector[kRGBRMSColors];
int point_vector[kRGBRMSColors];
for (int i = 0; i < kRGBRMSColors; ++i) {
line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
}
line_vector[L_ALPHA_CHANNEL] = 0;
// Now the cross product in 3d.
int cross[kRGBRMSColors];
cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
- line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
- line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
- line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
cross[L_ALPHA_CHANNEL] = 0;
// Now the sums of the squares.
double cross_sq = 0.0;
double line_sq = 0.0;
for (int j = 0; j < kRGBRMSColors; ++j) {
cross_sq += static_cast<double>(cross[j]) * cross[j];
line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
}
if (line_sq == 0.0) {
return 0.0;
}
return cross_sq / line_sq; // This is the squared distance.
}
uinT32 tesseract::ImageFind::ComposeRGB ( uinT32  r,
uinT32  g,
uinT32  b 
)
static

Definition at line 366 of file imagefind.cpp.

{
l_uint32 result;
composeRGBPixel(r, g, b, &result);
return result;
}
void tesseract::ImageFind::ComputeRectangleColors ( const TBOX rect,
Pix *  pix,
int  factor,
Pix *  color_map1,
Pix *  color_map2,
Pix *  rms_map,
uinT8 color1,
uinT8 color2 
)
static

Definition at line 391 of file imagefind.cpp.

{
ASSERT_HOST(pix != NULL && pixGetDepth(pix) == 32);
// Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
// background.
int width = pixGetWidth(pix);
int height = pixGetHeight(pix);
int left_pad = MAX(rect.left() - 2 * factor, 0) / factor;
int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
top_pad = MIN(height, top_pad);
int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
right_pad = MIN(width, right_pad);
int bottom_pad = MAX(rect.bottom() - 2 * factor, 0) / factor;
int width_pad = right_pad - left_pad;
int height_pad = top_pad - bottom_pad;
if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
return;
// Now crop the pix to the rectangle.
Box* scaled_box = boxCreate(left_pad, height - top_pad,
width_pad, height_pad);
Pix* scaled = pixClipRectangle(pix, scaled_box, NULL);
// Compute stats over the whole image.
STATS red_stats(0, 256);
STATS green_stats(0, 256);
STATS blue_stats(0, 256);
uinT32* data = pixGetData(scaled);
ASSERT_HOST(pixGetWpl(scaled) == width_pad);
for (int y = 0; y < height_pad; ++y) {
for (int x = 0; x < width_pad; ++x, ++data) {
int r = GET_DATA_BYTE(data, COLOR_RED);
int g = GET_DATA_BYTE(data, COLOR_GREEN);
int b = GET_DATA_BYTE(data, COLOR_BLUE);
red_stats.add(r, 1);
green_stats.add(g, 1);
blue_stats.add(b, 1);
}
}
// Find the RGB component with the greatest 8th-ile-range.
// 8th-iles are used instead of quartiles to get closer to the true
// foreground color, which is going to be faint at best because of the
// pre-scaling of the input image.
int best_l8 = static_cast<int>(red_stats.ile(0.125f));
int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
int best_i8r = best_u8 - best_l8;
int x_color = COLOR_RED;
int y1_color = COLOR_GREEN;
int y2_color = COLOR_BLUE;
int l8 = static_cast<int>(green_stats.ile(0.125f));
int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
if (u8 - l8 > best_i8r) {
best_i8r = u8 - l8;
best_l8 = l8;
best_u8 = u8;
x_color = COLOR_GREEN;
y1_color = COLOR_RED;
}
l8 = static_cast<int>(blue_stats.ile(0.125f));
u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
if (u8 - l8 > best_i8r) {
best_i8r = u8 - l8;
best_l8 = l8;
best_u8 = u8;
x_color = COLOR_BLUE;
y1_color = COLOR_GREEN;
y2_color = COLOR_RED;
}
if (best_i8r >= kMinColorDifference) {
LLSQ line1;
LLSQ line2;
uinT32* data = pixGetData(scaled);
for (int im_y = 0; im_y < height_pad; ++im_y) {
for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
int x = GET_DATA_BYTE(data, x_color);
int y1 = GET_DATA_BYTE(data, y1_color);
int y2 = GET_DATA_BYTE(data, y2_color);
line1.add(x, y1);
line2.add(x, y2);
}
}
double m1 = line1.m();
double c1 = line1.c(m1);
double m2 = line2.m();
double c2 = line2.c(m2);
double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
// Save the results.
color1[x_color] = ClipToByte(best_l8);
color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
color2[x_color] = ClipToByte(best_u8);
color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
} else {
// There is only one color.
color1[COLOR_RED] = ClipToByte(red_stats.median());
color1[COLOR_GREEN] = ClipToByte(green_stats.median());
color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
color1[L_ALPHA_CHANNEL] = 0;
memcpy(color2, color1, 4);
}
if (color_map1 != NULL) {
pixSetInRectArbitrary(color_map1, scaled_box,
ComposeRGB(color1[COLOR_RED],
color1[COLOR_GREEN],
color1[COLOR_BLUE]));
pixSetInRectArbitrary(color_map2, scaled_box,
ComposeRGB(color2[COLOR_RED],
color2[COLOR_GREEN],
color2[COLOR_BLUE]));
pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
}
pixDestroy(&scaled);
boxDestroy(&scaled_box);
}
void tesseract::ImageFind::ConnCompAndRectangularize ( Pix *  pix,
Boxa **  boxa,
Pixa **  pixa 
)
static

Definition at line 134 of file imagefind.cpp.

{
*boxa = NULL;
*pixa = NULL;
pixWrite("junkconncompimage.png", pix, IFF_PNG);
// Find the individual image regions in the mask image.
*boxa = pixConnComp(pix, pixa, 8);
// Rectangularize the individual images. If a sharp edge in vertical and/or
// horizontal occupancy can be found, it indicates a probably rectangular
// image with unwanted bits merged on, so clip to the approximate rectangle.
int npixes = pixaGetCount(*pixa);
for (int i = 0; i < npixes; ++i) {
int x_start, x_end, y_start, y_end;
Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
pixDisplayWrite(img_pix, textord_tabfind_show_images);
&x_start, &y_start, &x_end, &y_end)) {
Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
pixSetAll(simple_pix);
pixDestroy(&img_pix);
// pixaReplacePix takes ownership of the simple_pix.
pixaReplacePix(*pixa, i, simple_pix, NULL);
img_pix = pixaGetPix(*pixa, i, L_CLONE);
// Fix the box to match the new pix.
l_int32 x, y, width, height;
boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
Box* simple_box = boxCreate(x + x_start, y + y_start,
x_end - x_start, y_end - y_start);
boxaReplaceBox(*boxa, i, simple_box);
}
pixDestroy(&img_pix);
}
}
int tesseract::ImageFind::CountPixelsInRotatedBox ( TBOX  box,
const TBOX im_box,
const FCOORD rotation,
Pix *  pix 
)
static

Definition at line 574 of file imagefind.cpp.

{
// Intersect it with the image box.
box &= im_box; // This is in-place box intersection.
if (box.null_box())
return 0;
box.rotate(rotation);
TBOX rotated_im_box(im_box);
rotated_im_box.rotate(rotation);
Pix* rect_pix = pixCreate(box.width(), box.height(), 1);
pixRasterop(rect_pix, 0, 0, box.width(), box.height(),
PIX_SRC, pix, box.left() - rotated_im_box.left(),
rotated_im_box.top() - box.top());
l_int32 result;
pixCountPixels(rect_pix, &result, NULL);
pixDestroy(&rect_pix);
return result;
}
void tesseract::ImageFind::FindImagePartitions ( Pix *  image_pix,
const FCOORD rotation,
const FCOORD rerotation,
TO_BLOCK block,
TabFind tab_grid,
ColPartitionGrid part_grid,
ColPartition_LIST *  big_parts 
)
static

Definition at line 1275 of file imagefind.cpp.

{
int imageheight = pixGetHeight(image_pix);
Boxa* boxa;
Pixa* pixa;
ConnCompAndRectangularize(image_pix, &boxa, &pixa);
// Iterate the connected components in the image regions mask.
int nboxes = boxaGetCount(boxa);
for (int i = 0; i < nboxes; ++i) {
l_int32 x, y, width, height;
boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
Pix* pix = pixaGetPix(pixa, i, L_CLONE);
TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
im_box.rotate(rotation); // Now matches all partitions and blobs.
ColPartitionGridSearch rectsearch(part_grid);
rectsearch.SetUniqueMode(true);
ColPartition_LIST part_list;
DivideImageIntoParts(im_box, rotation, rerotation, pix,
&rectsearch, &part_list);
pixWrite("junkimagecomponent.png", pix, IFF_PNG);
tprintf("Component has %d parts\n", part_list.length());
}
pixDestroy(&pix);
if (!part_list.empty()) {
ColPartition_IT part_it(&part_list);
if (part_list.singleton()) {
// We didn't have to chop it into a polygon to fit around text, so
// try expanding it to merge fragmented image parts, as long as it
// doesn't touch strong text.
ColPartition* part = part_it.extract();
TBOX text_box(im_box);
MaximalImageBoundingBox(part_grid, &text_box);
while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
part_it.set_to_list(&part_list);
part_it.add_after_then_move(part);
im_box = part->bounding_box();
}
EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
// Iterate the part_list and put the parts into the grid.
for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
ColPartition* image_part = part_it.extract();
im_box = image_part->bounding_box();
part_grid->InsertBBox(true, true, image_part);
if (!part_it.at_last()) {
ColPartition* neighbour = part_it.data_relative(1);
image_part->AddPartner(false, neighbour);
neighbour->AddPartner(true, image_part);
}
}
}
}
boxaDestroy(&boxa);
pixaDestroy(&pixa);
DeleteSmallImages(part_grid);
ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
part_grid->DisplayBoxes(images_win_);
}
}
Pix * tesseract::ImageFind::FindImages ( Pix *  pix)
static

Definition at line 66 of file imagefind.cpp.

{
// Not worth looking at small images.
if (pixGetWidth(pix) < kMinImageFindSize ||
pixGetHeight(pix) < kMinImageFindSize)
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
// Reduce by factor 2.
Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
pixDisplayWrite(pixr, textord_tabfind_show_images);
// Get the halftone mask directly from Leptonica.
l_int32 ht_found = 0;
Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, &ht_found,
pixDestroy(&pixr);
if (!ht_found && pixht2 != NULL)
pixDestroy(&pixht2);
if (pixht2 == NULL)
return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
// Expand back up again.
Pix *pixht = pixExpandReplicate(pixht2, 2);
pixDisplayWrite(pixht, textord_tabfind_show_images);
pixDestroy(&pixht2);
// Fill to capture pixels near the mask edges that were missed
Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
pixOr(pixht, pixht, pixt);
pixDestroy(&pixt);
// Eliminate lines and bars that may be joined to images.
Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
pixDisplayWrite(pixfinemask, textord_tabfind_show_images);
Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
pixDestroy(&pixreduced);
pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
pixDestroy(&pixreduced2);
pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images);
// Combine the coarse and fine image masks.
pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
pixDestroy(&pixfinemask);
// Dilate a bit to make sure we get everything.
pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
pixDestroy(&pixcoarsemask);
pixWrite("junkexpandedcoarsemask.png", pixmask, IFF_PNG);
// And the image mask with the line and bar remover.
pixAnd(pixht, pixht, pixmask);
pixDestroy(&pixmask);
pixWrite("junkfinalimagemask.png", pixht, IFF_PNG);
// Make the result image the same size as the input.
Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
pixOr(result, result, pixht);
pixDestroy(&pixht);
return result;
}
bool tesseract::ImageFind::pixNearlyRectangular ( Pix *  pix,
double  min_fraction,
double  max_fraction,
double  max_skew_gradient,
int *  x_start,
int *  y_start,
int *  x_end,
int *  y_end 
)
static

Definition at line 243 of file imagefind.cpp.

{
ASSERT_HOST(pix != NULL);
*x_start = 0;
*x_end = pixGetWidth(pix);
*y_start = 0;
*y_end = pixGetHeight(pix);
uinT32* data = pixGetData(pix);
int wpl = pixGetWpl(pix);
bool any_cut = false;
bool left_done = false;
bool right_done = false;
bool top_done = false;
bool bottom_done = false;
do {
any_cut = false;
// Find the top/bottom edges.
int width = *x_end - *x_start;
int min_count = static_cast<int>(width * min_fraction);
int max_count = static_cast<int>(width * max_fraction);
int edge_width = static_cast<int>(width * max_skew_gradient);
if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
max_count, *y_end, 1, y_start) && !top_done) {
top_done = true;
any_cut = true;
}
--(*y_end);
if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
max_count, *y_start, -1, y_end) && !bottom_done) {
bottom_done = true;
any_cut = true;
}
++(*y_end);
// Find the left/right edges.
int height = *y_end - *y_start;
min_count = static_cast<int>(height * min_fraction);
max_count = static_cast<int>(height * max_fraction);
edge_width = static_cast<int>(height * max_skew_gradient);
if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
max_count, *x_end, 1, x_start) && !left_done) {
left_done = true;
any_cut = true;
}
--(*x_end);
if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
max_count, *x_start, -1, x_end) && !right_done) {
right_done = true;
any_cut = true;
}
++(*x_end);
} while (any_cut);
// All edges must satisfy the condition of sharp gradient in pixel density
// in order for the full rectangle to be present.
return left_done && right_done && top_done && bottom_done;
}
void tesseract::ImageFind::TransferImagePartsToImageMask ( const FCOORD rerotation,
ColPartitionGrid part_grid,
Pix *  image_mask 
)
static

Definition at line 1222 of file imagefind.cpp.

{
// Extract the noise parts from the grid and put them on a temporary list.
ColPartition_LIST parts_list;
ColPartition_IT part_it(&parts_list);
ColPartitionGridSearch gsearch(part_grid);
gsearch.StartFullSearch();
ColPartition* part;
while ((part = gsearch.NextFullSearch()) != NULL) {
BlobRegionType type = part->blob_type();
if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
part_it.add_after_then_move(part);
gsearch.RemoveBBox();
}
}
// Render listed noise partitions to the image mask.
MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
}

The documentation for this class was generated from the following files: