Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
makerow.h
Go to the documentation of this file.
1
/**********************************************************************
2
* File: makerow.h (Formerly makerows.h)
3
* Description: Code to arrange blobs into rows of text.
4
* Author: Ray Smith
5
* Created: Mon Sep 21 14:34:48 BST 1992
6
*
7
* (C) Copyright 1992, Hewlett-Packard Ltd.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
*
18
**********************************************************************/
19
20
#ifndef MAKEROW_H
21
#define MAKEROW_H
22
23
#include "
params.h
"
24
#include "
ocrblock.h
"
25
#include "
blobs.h
"
26
#include "
blobbox.h
"
27
#include "
statistc.h
"
28
#include "
notdll.h
"
29
30
enum
OVERLAP_STATE
31
{
32
ASSIGN
,
//assign it to row
33
REJECT
,
//reject it - dual overlap
34
NEW_ROW
35
};
36
37
enum
ROW_CATEGORY
{
38
ROW_ASCENDERS_FOUND
,
39
ROW_DESCENDERS_FOUND
,
40
ROW_UNKNOWN
,
41
ROW_INVALID
,
42
};
43
44
extern
BOOL_VAR_H
(
textord_show_initial_rows
,
FALSE
,
45
"Display row accumulation"
);
46
extern
BOOL_VAR_H
(
textord_show_parallel_rows
,
FALSE
,
47
"Display page correlated rows"
);
48
extern
BOOL_VAR_H
(
textord_show_expanded_rows
,
FALSE
,
49
"Display rows after expanding"
);
50
extern
BOOL_VAR_H
(
textord_show_final_rows
,
FALSE
,
51
"Display rows after final fitting"
);
52
extern
BOOL_VAR_H
(
textord_show_final_blobs
,
FALSE
,
53
"Display blob bounds after pre-ass"
);
54
extern
BOOL_VAR_H
(
textord_test_landscape
,
FALSE
,
"Tests refer to land/port"
);
55
extern
BOOL_VAR_H
(
textord_parallel_baselines
,
TRUE
,
56
"Force parallel baselines"
);
57
extern
BOOL_VAR_H
(
textord_straight_baselines
,
FALSE
,
58
"Force straight baselines"
);
59
extern
BOOL_VAR_H
(textord_quadratic_baselines,
FALSE
,
60
"Use quadratic splines"
);
61
extern
BOOL_VAR_H
(
textord_old_baselines
,
TRUE
,
"Use old baseline algorithm"
);
62
extern
BOOL_VAR_H
(
textord_old_xheight
,
TRUE
,
"Use old xheight algorithm"
);
63
extern
BOOL_VAR_H
(
textord_fix_xheight_bug
,
TRUE
,
"Use spline baseline"
);
64
extern
BOOL_VAR_H
(
textord_fix_makerow_bug
,
TRUE
,
65
"Prevent multiple baselines"
);
66
extern
BOOL_VAR_H
(textord_cblob_blockocc,
TRUE
,
67
"Use new projection for underlines"
);
68
extern
BOOL_VAR_H
(
textord_debug_xheights
,
FALSE
,
"Test xheight algorithms"
);
69
extern
INT_VAR_H
(
textord_test_x
, 0,
"coord of test pt"
);
70
extern
INT_VAR_H
(
textord_test_y
, 0,
"coord of test pt"
);
71
extern
INT_VAR_H
(
textord_min_blobs_in_row
, 4,
72
"Min blobs before gradient counted"
);
73
extern
INT_VAR_H
(
textord_spline_minblobs
, 8,
74
"Min blobs in each spline segment"
);
75
extern
INT_VAR_H
(
textord_spline_medianwin
, 6,
76
"Size of window for spline segmentation"
);
77
extern
INT_VAR_H
(
textord_min_xheight
, 10,
"Min credible pixel xheight"
);
78
extern
double_VAR_H
(
textord_spline_shift_fraction
, 0.02,
79
"Fraction of line spacing for quad"
);
80
extern
double_VAR_H
(
textord_spline_outlier_fraction
, 0.1,
81
"Fraction of line spacing for outlier"
);
82
extern
double_VAR_H
(
textord_skew_ile
, 0.5,
"Ile of gradients for page skew"
);
83
extern
double_VAR_H
(
textord_skew_lag
, 0.75,
84
"Lag for skew on row accumulation"
);
85
extern
double_VAR_H
(
textord_linespace_iqrlimit
, 0.2,
86
"Max iqr/median for linespace"
);
87
extern
double_VAR_H
(
textord_width_limit
, 8,
88
"Max width of blobs to make rows"
);
89
extern
double_VAR_H
(
textord_chop_width
, 1.5,
"Max width before chopping"
);
90
extern
double_VAR_H
(
textord_minxh
, 0.25,
91
"fraction of linesize for min xheight"
);
92
extern
double_VAR_H
(
textord_min_linesize
, 1.25,
93
"* blob height for initial linesize"
);
94
extern
double_VAR_H
(
textord_excess_blobsize
, 1.3,
95
"New row made if blob makes row this big"
);
96
extern
double_VAR_H
(
textord_occupancy_threshold
, 0.4,
97
"Fraction of neighbourhood"
);
98
extern
double_VAR_H
(
textord_underline_width
, 2.0,
99
"Multiple of line_size for underline"
);
100
extern
double_VAR_H
(
textord_min_blob_height_fraction
, 0.75,
101
"Min blob height/top to include blob top into xheight stats"
);
102
extern
double_VAR_H
(
textord_xheight_mode_fraction
, 0.4,
103
"Min pile height to make xheight"
);
104
extern
double_VAR_H
(
textord_ascheight_mode_fraction
, 0.15,
105
"Min pile height to make ascheight"
);
106
extern
double_VAR_H
(
textord_ascx_ratio_min
, 1.2,
"Min cap/xheight"
);
107
extern
double_VAR_H
(
textord_ascx_ratio_max
, 1.7,
"Max cap/xheight"
);
108
extern
double_VAR_H
(
textord_descx_ratio_min
, 0.15,
"Min desc/xheight"
);
109
extern
double_VAR_H
(
textord_descx_ratio_max
, 0.6,
"Max desc/xheight"
);
110
extern
double_VAR_H
(
textord_xheight_error_margin
, 0.1,
"Accepted variation"
);
111
extern
INT_VAR_H
(
textord_lms_line_trials
, 12,
"Number of linew fits to do"
);
112
extern
BOOL_VAR_H
(
textord_new_initial_xheight
,
TRUE
,
113
"Use test xheight mechanism"
);
114
115
inline
void
get_min_max_xheight
(
int
block_linesize,
116
int
*min_height,
int
*max_height) {
117
*min_height =
static_cast<
inT32
>
(floor(block_linesize *
textord_minxh
));
118
if
(*min_height <
textord_min_xheight
) *min_height =
textord_min_xheight
;
119
*max_height =
static_cast<
inT32
>
(ceil(block_linesize * 3.0));
120
}
121
122
inline
ROW_CATEGORY
get_row_category
(
const
TO_ROW
*row) {
123
if
(row->
xheight
<= 0)
return
ROW_INVALID
;
124
return
(row->
ascrise
> 0) ?
ROW_ASCENDERS_FOUND
:
125
(row->
descdrop
!= 0) ?
ROW_DESCENDERS_FOUND
:
ROW_UNKNOWN
;
126
}
127
128
inline
bool
within_error_margin
(
float
test,
float
num,
float
margin) {
129
return
(test >= num * (1 - margin) && test <= num * (1 + margin));
130
}
131
132
void
fill_heights
(
TO_ROW
*row,
float
gradient,
int
min_height,
133
int
max_height,
STATS
*heights,
STATS
*floating_heights);
134
135
float
make_single_row
(
ICOORD
page_tr,
TO_BLOCK
* block,
136
TO_BLOCK_LIST* blocks);
137
float
make_rows
(
ICOORD
page_tr,
// top right
138
TO_BLOCK_LIST *port_blocks);
139
void
make_initial_textrows
(
ICOORD
page_tr,
140
TO_BLOCK
*block,
// block to do
141
FCOORD
rotation,
// for drawing
142
BOOL8
testing_on);
// correct orientation
143
void
fit_lms_line
(
TO_ROW
*row);
144
void
compute_page_skew
(TO_BLOCK_LIST *blocks,
// list of blocks
145
float
&page_m,
// average gradient
146
float
&page_err);
// average error
147
void
cleanup_rows_making
(
ICOORD
page_tr,
// top right
148
TO_BLOCK
*block,
// block to do
149
float
gradient,
// gradient to fit
150
FCOORD
rotation,
// for drawing
151
inT32
block_edge,
// edge of block
152
BOOL8
testing_on);
// correct orientation
153
void
delete_non_dropout_rows
(
//find lines
154
TO_BLOCK
*block,
//block to do
155
float
gradient,
//global skew
156
FCOORD
rotation,
//deskew vector
157
inT32
block_edge,
//left edge
158
BOOL8
testing_on
//correct orientation
159
);
160
BOOL8
find_best_dropout_row
(
//find neighbours
161
TO_ROW
*row,
//row to test
162
inT32
distance,
//dropout dist
163
float
dist_limit,
//threshold distance
164
inT32
line_index,
//index of row
165
TO_ROW_IT *row_it,
//current position
166
BOOL8
testing_on
//correct orientation
167
);
168
TBOX
deskew_block_coords
(
//block box
169
TO_BLOCK
*block,
//block to do
170
float
gradient
//global skew
171
);
172
void
compute_line_occupation
(
//project blobs
173
TO_BLOCK
*block,
//block to do
174
float
gradient,
//global skew
175
inT32
min_y,
//min coord in block
176
inT32
max_y,
//in block
177
inT32
*occupation,
//output projection
178
inT32
*deltas
//derivative
179
);
180
void
compute_occupation_threshold
(
//project blobs
181
inT32
low_window,
//below result point
182
inT32
high_window,
//above result point
183
inT32
line_count,
//array sizes
184
inT32
*occupation,
//input projection
185
inT32
*thresholds
//output thresholds
186
);
187
void
compute_dropout_distances
(
//project blobs
188
inT32
*occupation,
//input projection
189
inT32
*thresholds,
//output thresholds
190
inT32
line_count
//array sizes
191
);
192
void
expand_rows
(
//find lines
193
ICOORD
page_tr,
//top right
194
TO_BLOCK
*block,
//block to do
195
float
gradient,
//gradient to fit
196
FCOORD
rotation,
//for drawing
197
inT32
block_edge,
//edge of block
198
BOOL8
testing_on
//correct orientation
199
);
200
void
adjust_row_limits
(
//tidy limits
201
TO_BLOCK
*block
//block to do
202
);
203
void
compute_row_stats
(
//find lines
204
TO_BLOCK
*block,
//block to do
205
BOOL8
testing_on
//correct orientation
206
);
207
float
median_block_xheight
(
//find lines
208
TO_BLOCK
*block,
//block to do
209
float
gradient
//global skew
210
);
211
212
int
compute_xheight_from_modes
(
213
STATS
*heights,
STATS
*floating_heights,
bool
cap_only,
int
min_height,
214
int
max_height,
float
*xheight,
float
*ascrise);
215
216
inT32
compute_row_descdrop
(
TO_ROW
*row,
// row to do
217
float
gradient,
// global skew
218
int
xheight_blob_count,
219
STATS
*heights);
220
inT32
compute_height_modes
(
STATS
*heights,
// stats to search
221
inT32
min_height,
// bottom of range
222
inT32
max_height,
// top of range
223
inT32
*modes,
// output array
224
inT32
maxmodes);
// size of modes
225
void
correct_row_xheight
(
TO_ROW
*row,
// row to fix
226
float
xheight,
// average values
227
float
ascrise,
228
float
descdrop);
229
void
separate_underlines
(
TO_BLOCK
*block,
// block to do
230
float
gradient,
// skew angle
231
FCOORD
rotation,
// inverse landscape
232
BOOL8
testing_on);
// correct orientation
233
void
pre_associate_blobs
(
ICOORD
page_tr,
// top right
234
TO_BLOCK
*block,
// block to do
235
FCOORD
rotation,
// inverse landscape
236
BOOL8
testing_on);
// correct orientation
237
void
fit_parallel_rows
(
TO_BLOCK
*block,
// block to do
238
float
gradient,
// gradient to fit
239
FCOORD
rotation,
// for drawing
240
inT32
block_edge,
// edge of block
241
BOOL8
testing_on);
// correct orientation
242
void
fit_parallel_lms
(
float
gradient,
// forced gradient
243
TO_ROW
*row);
// row to fit
244
void
make_baseline_spline
(
TO_ROW
*row,
// row to fit
245
TO_BLOCK
*block);
// block it came from
246
BOOL8
segment_baseline
(
//split baseline
247
TO_ROW
* row,
//row to fit
248
TO_BLOCK
* block,
//block it came from
249
inT32
& segments,
//no fo segments
250
inT32
xstarts[]
//coords of segments
251
);
252
double
*
linear_spline_baseline
(
//split baseline
253
TO_ROW
* row,
//row to fit
254
TO_BLOCK
* block,
//block it came from
255
inT32
& segments,
//no fo segments
256
inT32
xstarts[]
//coords of segments
257
);
258
void
assign_blobs_to_rows
(
//find lines
259
TO_BLOCK
*block,
//block to do
260
float
*gradient,
//block skew
261
int
pass,
//identification
262
BOOL8
reject_misses,
//chuck big ones out
263
BOOL8
make_new_rows,
//add rows for unmatched
264
BOOL8
drawing_skew
//draw smoothed skew
265
);
266
//find best row
267
OVERLAP_STATE
most_overlapping_row
(TO_ROW_IT *row_it,
//iterator
268
TO_ROW
*&best_row,
//output row
269
float
top,
//top of blob
270
float
bottom,
//bottom of blob
271
float
rowsize,
//max row size
272
BOOL8
testing_blob
//test stuff
273
);
274
int
blob_x_order
(
//sort function
275
const
void
*item1,
//items to compare
276
const
void
*item2);
277
int
row_y_order
(
//sort function
278
const
void
*item1,
//items to compare
279
const
void
*item2);
280
int
row_spacing_order
(
//sort function
281
const
void
*item1,
//items to compare
282
const
void
*item2);
283
284
void
mark_repeated_chars
(
TO_ROW
*row);
285
#endif
mnt
data
src
tesseract-ocr
textord
makerow.h
Generated on Thu Nov 1 2012 20:19:50 for Tesseract by
1.8.1