Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
rejctmap.h
Go to the documentation of this file.
1
/**********************************************************************
2
* File: rejctmap.h (Formerly rejmap.h)
3
* Description: REJ and REJMAP class functions.
4
* Author: Phil Cheatle
5
* Created: Thu Jun 9 13:46:38 BST 1994
6
*
7
* (C) Copyright 1994, Hewlett-Packard Ltd.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
*
18
19
This module may look unneccessarily verbose, but here's the philosophy...
20
21
ALL processing of the reject map is done in this module. There are lots of
22
separate calls to set reject/accept flags. These have DELIBERATELY been kept
23
distinct so that this module can decide what to do.
24
25
Basically, there is a flag for each sort of rejection or acceptance. This
26
provides a history of what has happened to EACH character.
27
28
Determining whether a character is CURRENTLY rejected depends on implicit
29
understanding of the SEQUENCE of possible calls. The flags are defined and
30
grouped in the REJ_FLAGS enum. These groupings are used in determining a
31
characters CURRENT rejection status. Basically, a character is ACCEPTED if
32
33
none of the permanent rej flags are set
34
AND ( the character has never been rejected
35
OR an accept flag is set which is LATER than the latest reject flag )
36
37
IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
38
OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
39
**********************************************************************/
40
41
#ifndef REJCTMAP_H
42
#define REJCTMAP_H
43
44
#ifdef __UNIX__
45
#include <assert.h>
46
#endif
47
#include "
memry.h
"
48
#include "
bits16.h
"
49
#include "
params.h
"
50
#include "
notdll.h
"
51
52
enum
REJ_FLAGS
53
{
54
/* Reject modes which are NEVER overridden */
55
R_TESS_FAILURE
,
// PERM Tess didnt classify
56
R_SMALL_XHT
,
// PERM Xht too small
57
R_EDGE_CHAR
,
// PERM Too close to edge of image
58
R_1IL_CONFLICT
,
// PERM 1Il confusion
59
R_POSTNN_1IL
,
// PERM 1Il unrejected by NN
60
R_REJ_CBLOB
,
// PERM Odd blob
61
R_MM_REJECT
,
// PERM Matrix match rejection (m's)
62
R_BAD_REPETITION
,
// TEMP Repeated char which doesn't match trend
63
64
/* Initial reject modes (pre NN_ACCEPT) */
65
R_POOR_MATCH
,
// TEMP Ray's original heuristic (Not used)
66
R_NOT_TESS_ACCEPTED
,
// TEMP Tess didnt accept WERD
67
R_CONTAINS_BLANKS
,
// TEMP Tess failed on other chs in WERD
68
R_BAD_PERMUTER
,
// POTENTIAL Bad permuter for WERD
69
70
/* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
71
R_HYPHEN
,
// TEMP Post NN dodgy hyphen or full stop
72
R_DUBIOUS
,
// TEMP Post NN dodgy chars
73
R_NO_ALPHANUMS
,
// TEMP No alphanumerics in word after NN
74
R_MOSTLY_REJ
,
// TEMP Most of word rejected so rej the rest
75
R_XHT_FIXUP
,
// TEMP Xht tests unsure
76
77
/* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
78
R_BAD_QUALITY
,
// TEMP Quality metrics bad for WERD
79
80
/* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
81
R_DOC_REJ
,
// TEMP Document rejection
82
R_BLOCK_REJ
,
// TEMP Block rejection
83
R_ROW_REJ
,
// TEMP Row rejection
84
R_UNLV_REJ
,
// TEMP ~ turned to - or ^ turned to space
85
86
/* Accept modes which occur inbetween the above rejection groups */
87
R_NN_ACCEPT
,
//NN acceptance
88
R_HYPHEN_ACCEPT
,
//Hyphen acceptance
89
R_MM_ACCEPT
,
//Matrix match acceptance
90
R_QUALITY_ACCEPT
,
//Accept word in good quality doc
91
R_MINIMAL_REJ_ACCEPT
//Accept EVERYTHING except tess failures
92
};
93
94
/* REJECT MAP VALUES */
95
96
#define MAP_ACCEPT '1'
97
#define MAP_REJECT_PERM '0'
98
#define MAP_REJECT_TEMP '2'
99
#define MAP_REJECT_POTENTIAL '3'
100
101
class
REJ
102
{
103
BITS16
flags1;
104
BITS16
flags2;
105
106
void
set_flag(
REJ_FLAGS
rej_flag) {
107
if
(rej_flag < 16)
108
flags1.
turn_on_bit
(rej_flag);
109
else
110
flags2.
turn_on_bit
(rej_flag - 16);
111
}
112
113
BOOL8
rej_before_nn_accept();
114
BOOL8
rej_between_nn_and_mm();
115
BOOL8
rej_between_mm_and_quality_accept();
116
BOOL8
rej_between_quality_and_minimal_rej_accept();
117
BOOL8
rej_before_mm_accept();
118
BOOL8
rej_before_quality_accept();
119
120
public
:
121
REJ
() {
//constructor
122
}
123
124
REJ
(
//classwise copy
125
const
REJ
&source) {
126
flags1 = source.flags1;
127
flags2 = source.flags2;
128
}
129
130
REJ
&
operator=
(
//assign REJ
131
const
REJ
& source) {
//from this
132
flags1 = source.flags1;
133
flags2 = source.flags2;
134
return
*
this
;
135
}
136
137
BOOL8
flag
(
REJ_FLAGS
rej_flag) {
138
if
(rej_flag < 16)
139
return
flags1.
bit
(rej_flag);
140
else
141
return
flags2.
bit
(rej_flag - 16);
142
}
143
144
char
display_char
() {
145
if
(
perm_rejected
())
146
return
MAP_REJECT_PERM
;
147
else
if
(
accept_if_good_quality
())
148
return
MAP_REJECT_POTENTIAL
;
149
else
if
(
rejected
())
150
return
MAP_REJECT_TEMP
;
151
else
152
return
MAP_ACCEPT
;
153
}
154
155
BOOL8
perm_rejected
();
//Is char perm reject?
156
157
BOOL8
rejected
();
//Is char rejected?
158
159
BOOL8
accepted
() {
//Is char accepted?
160
return
!
rejected
();
161
}
162
163
//potential rej?
164
BOOL8
accept_if_good_quality
();
165
166
BOOL8
recoverable
() {
167
return
(
rejected
() && !
perm_rejected
());
168
}
169
170
void
setrej_tess_failure
();
//Tess generated blank
171
void
setrej_small_xht
();
//Small xht char/wd
172
void
setrej_edge_char
();
//Close to image edge
173
void
setrej_1Il_conflict
();
//Initial reject map
174
void
setrej_postNN_1Il
();
//1Il after NN
175
void
setrej_rej_cblob
();
//Insert duff blob
176
void
setrej_mm_reject
();
//Matrix matcher
177
//Odd repeated char
178
void
setrej_bad_repetition
();
179
void
setrej_poor_match
();
//Failed Rays heuristic
180
//TEMP reject_word
181
void
setrej_not_tess_accepted
();
182
//TEMP reject_word
183
void
setrej_contains_blanks
();
184
void
setrej_bad_permuter
();
//POTENTIAL reject_word
185
void
setrej_hyphen
();
//PostNN dubious hyph or .
186
void
setrej_dubious
();
//PostNN dubious limit
187
void
setrej_no_alphanums
();
//TEMP reject_word
188
void
setrej_mostly_rej
();
//TEMP reject_word
189
void
setrej_xht_fixup
();
//xht fixup
190
void
setrej_bad_quality
();
//TEMP reject_word
191
void
setrej_doc_rej
();
//TEMP reject_word
192
void
setrej_block_rej
();
//TEMP reject_word
193
void
setrej_row_rej
();
//TEMP reject_word
194
void
setrej_unlv_rej
();
//TEMP reject_word
195
void
setrej_nn_accept
();
//NN Flipped a char
196
void
setrej_hyphen_accept
();
//Good aspect ratio
197
void
setrej_mm_accept
();
//Matrix matcher
198
//Quality flip a char
199
void
setrej_quality_accept
();
200
//Accept all except blank
201
void
setrej_minimal_rej_accept
();
202
203
void
full_print
(FILE *fp);
204
};
205
206
class
REJMAP
207
{
208
REJ
*ptr;
//ptr to the chars
209
inT16
len;
//Number of chars
210
211
public
:
212
REJMAP
() {
//constructor
213
ptr =
NULL
;
214
len = 0;
215
}
216
217
REJMAP
(
//classwise copy
218
const
REJMAP
&rejmap);
219
220
REJMAP
&
operator=
(
//assign REJMAP
221
const
REJMAP
& source);
//from this
222
223
~REJMAP
() {
//destructor
224
if
(ptr !=
NULL
)
225
free_struct
(ptr, len *
sizeof
(
REJ
),
"REJ"
);
226
}
227
228
void
initialise
(
//Redefine map
229
inT16
length
);
230
231
REJ
&
operator[]
(
//access function
232
inT16
index)
const
//map index
233
{
234
ASSERT_HOST
(index < len);
235
return
ptr[index];
//no bounds checks
236
}
237
238
inT32
length
()
const
{
//map length
239
return
len;
240
}
241
242
inT16
accept_count
();
//How many accepted?
243
244
inT16
reject_count
() {
//How many rejects?
245
return
len -
accept_count
();
246
}
247
248
void
remove_pos
(
//Cut out an element
249
inT16
pos);
//element to remove
250
251
void
print
(FILE *fp);
252
253
void
full_print
(FILE *fp);
254
255
BOOL8
recoverable_rejects
();
//Any non perm rejs?
256
257
BOOL8
quality_recoverable_rejects
();
258
//Any potential rejs?
259
260
void
rej_word_small_xht
();
//Reject whole word
261
//Reject whole word
262
void
rej_word_tess_failure
();
263
void
rej_word_not_tess_accepted
();
264
//Reject whole word
265
//Reject whole word
266
void
rej_word_contains_blanks
();
267
//Reject whole word
268
void
rej_word_bad_permuter
();
269
void
rej_word_xht_fixup
();
//Reject whole word
270
//Reject whole word
271
void
rej_word_no_alphanums
();
272
void
rej_word_mostly_rej
();
//Reject whole word
273
void
rej_word_bad_quality
();
//Reject whole word
274
void
rej_word_doc_rej
();
//Reject whole word
275
void
rej_word_block_rej
();
//Reject whole word
276
void
rej_word_row_rej
();
//Reject whole word
277
};
278
#endif
mnt
data
src
tesseract-ocr
ccstruct
rejctmap.h
Generated on Thu Nov 1 2012 20:19:45 for Tesseract by
1.8.1