Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
ccnontextdetect.h
Go to the documentation of this file.
1
2
// File: ccnontextdetect.h
3
// Description: Connected-Component-based non-text detection.
4
// Copyright 2011 Google Inc. All Rights Reserved.
5
// Author: rays@google.com (Ray Smith)
6
// Created: Sat Jun 11 09:52:01 PST 2011
7
//
8
// Licensed under the Apache License, Version 2.0 (the "License");
9
// you may not use this file except in compliance with the License.
10
// You may obtain a copy of the License at
11
// http://www.apache.org/licenses/LICENSE-2.0
12
// Unless required by applicable law or agreed to in writing, software
13
// distributed under the License is distributed on an "AS IS" BASIS,
14
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
// See the License for the specific language governing permissions and
16
// limitations under the License.
17
//
19
20
#ifndef TESSERACT_TEXTORD_CCPHOTODETECT_H_
21
#define TESSERACT_TEXTORD_CCPHOTODETECT_H_
22
23
#include "
blobgrid.h
"
24
#include "
scrollview.h
"
25
26
namespace
tesseract
{
27
28
// The CCNonTextDetect class contains grid-based operations on blobs to create
29
// a full-resolution image mask analogous yet complementary to
30
// pixGenHalftoneMask as it is better at line-drawings, graphs and charts.
31
class
CCNonTextDetect
:
public
BlobGrid
{
32
public
:
33
CCNonTextDetect
(
int
gridsize
,
const
ICOORD
&
bleft
,
const
ICOORD
&
tright
);
34
virtual
~CCNonTextDetect
();
35
36
// Creates and returns a Pix with the same resolution as the original
37
// in which 1 (black) pixels represent likely non text (photo, line drawing)
38
// areas of the page, deleting from the blob_block the blobs that were
39
// determined to be non-text.
40
// The photo_map (binary image mask) is used to bias the decision towards
41
// non-text, rather than supplying a definite decision.
42
// The blob_block is the usual result of connected component analysis,
43
// holding the detected blobs.
44
// The returned Pix should be PixDestroyed after use.
45
Pix*
ComputeNonTextMask
(
bool
debug, Pix* photo_map,
TO_BLOCK
* blob_block);
46
47
private
:
48
// Computes and returns the noise_density IntGrid, at the same gridsize as
49
// this by summing the number of small elements in a 3x3 neighbourhood of
50
// each grid cell. good_grid is filled with blobs that are considered most
51
// likely good text, and this is filled with small and medium blobs that are
52
// more likely non-text.
53
// The photo_map is used to bias the decision towards non-text, rather than
54
// supplying definite decision.
55
IntGrid
* ComputeNoiseDensity(
bool
debug, Pix* photo_map,
BlobGrid
* good_grid);
56
57
// Tests each blob in the list to see if it is certain non-text using 2
58
// conditions:
59
// 1. blob overlaps a cell with high value in noise_density_ (previously set
60
// by ComputeNoiseDensity).
61
// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
62
// condition is disabled with max_blob_overlaps == -1.
63
// If it does, the blob is declared non-text, and is used to mark up the
64
// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
65
// neighbours reset, as they may now point to deleted data.
66
// WARNING: The blobs list blobs may be in the *this grid, but they are
67
// not removed. If any deleted blobs might be in *this, then this must be
68
// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
69
// If the win is not NULL, deleted blobs are drawn on it in red, and kept
70
void
MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
71
int
max_blob_overlaps,
72
ScrollView
* win,
ScrollView::Color
ok_color,
73
Pix* nontext_mask);
74
// Returns true if the given blob overlaps more than max_overlaps blobs
75
// in the current grid.
76
bool
BlobOverlapsTooMuch(
BLOBNBOX
* blob,
int
max_overlaps);
77
78
// Max entry in noise_density_ before the cell is declared noisy.
79
int
max_noise_count_;
80
// Completed noise density map, which we keep around to use for secondary
81
// noise detection.
82
IntGrid
* noise_density_;
83
};
84
85
}
// namespace tesseract.
86
87
#endif // TESSERACT_TEXTORD_CCPHOTODETECT_H_
mnt
data
src
tesseract-ocr
textord
ccnontextdetect.h
Generated on Thu Nov 1 2012 20:19:49 for Tesseract by
1.8.1