Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
commontraining.h
Go to the documentation of this file.
1
// Copyright 2008 Google Inc. All Rights Reserved.
2
// Author: scharron@google.com (Samuel Charron)
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
// http://www.apache.org/licenses/LICENSE-2.0
8
// Unless required by applicable law or agreed to in writing, software
9
// distributed under the License is distributed on an "AS IS" BASIS,
10
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
// See the License for the specific language governing permissions and
12
// limitations under the License.
13
14
#ifndef TESSERACT_TRAINING_COMMONTRAINING_H__
15
#define TESSERACT_TRAINING_COMMONTRAINING_H__
16
17
#include "
oldlist.h
"
18
#include "
cluster.h
"
19
#include "
intproto.h
"
20
#include "
featdefs.h
"
21
22
// Macros to merge tesseract params with command-line flags.
23
#ifdef USE_STD_NAMESPACE
24
#include "
params.h
"
25
# define INT_PARAM_FLAG(name, val, comment) \
26
INT_VAR(FLAGS_##name, val, comment)
27
# define DECLARE_INT_PARAM_FLAG(name) extern INT_VAR_H(FLAGS_##name, 0, "")
28
# define STRING_PARAM_FLAG(name, val, comment) \
29
STRING_VAR(FLAGS_##name, val, comment)
30
# define DECLARE_STRING_PARAM_FLAG(name) \
31
extern STRING_VAR_H(FLAGS_##name, "", "")
32
# define c_str string
33
#else
34
#include "base/commandlineflags.h"
35
# define INT_PARAM_FLAG(name, val, comment) \
36
DEFINE_int32(name, val, comment)
37
# define DECLARE_INT_PARAM_FLAG(name) DECLARE_int32(name)
38
# define STRING_PARAM_FLAG(name, val, comment) \
39
DEFINE_string(name, val, comment)
40
# define DECLARE_STRING_PARAM_FLAG(name) DECLARE_string(name)
41
#endif
42
43
namespace
tesseract
{
44
class
Classify;
45
class
MasterTrainer;
46
class
ShapeTable;
47
}
48
50
// Globals ///////////////////////////////////////////////////////////////////
52
53
extern
FEATURE_DEFS_STRUCT
feature_defs
;
54
55
// Must be defined in the file that "implements" commonTraining facilities.
56
extern
CLUSTERCONFIG
Config
;
57
59
// Structs ///////////////////////////////////////////////////////////////////
61
typedef
struct
62
{
63
char
*
Label
;
64
int
SampleCount
;
65
int
font_sample_count
;
66
LIST
List
;
67
}
68
LABELEDLISTNODE
, *
LABELEDLIST
;
69
70
typedef
struct
71
{
72
char
*
Label
;
73
int
NumMerged[
MAX_NUM_PROTOS
];
74
CLASS_TYPE
Class
;
75
}
MERGE_CLASS_NODE
;
76
typedef
MERGE_CLASS_NODE
*
MERGE_CLASS
;
77
78
80
// Functions /////////////////////////////////////////////////////////////////
82
void
ParseArguments
(
int
* argc,
char
*** argv);
83
84
namespace
tesseract
{
85
// Helper loads shape table from the given file.
86
ShapeTable*
LoadShapeTable
(
const
STRING
& file_prefix);
87
// Helper to write the shape_table.
88
void
WriteShapeTable
(
const
STRING
& file_prefix,
const
ShapeTable& shape_table);
89
90
// Creates a MasterTraininer and loads the training data into it:
91
// Initializes feature_defs and IntegerFX.
92
// Loads the shape_table if shape_table != NULL.
93
// Loads initial unicharset from -U command-line option.
94
// If FLAGS_input_trainer is set, loads the majority of data from there, else:
95
// Loads font info from -F option.
96
// Loads xheights from -X option.
97
// Loads samples from .tr files in remaining command-line args.
98
// Deletes outliers and computes canonical samples.
99
// If FLAGS_output_trainer is set, saves the trainer for future use.
100
// Computes canonical and cloud features.
101
// If shape_table is not NULL, but failed to load, make a fake flat one,
102
// as shape clustering was not run.
103
MasterTrainer*
LoadTrainingData
(
int
argc,
const
char
*
const
* argv,
104
bool
replication,
105
ShapeTable** shape_table,
106
STRING
* file_prefix);
107
}
// namespace tesseract.
108
109
const
char
*
GetNextFilename
(
int
argc,
const
char
*
const
* argv);
110
111
LABELEDLIST
FindList
(
112
LIST
List,
113
char
*Label);
114
115
LABELEDLIST
NewLabeledList
(
116
const
char
*Label);
117
118
void
ReadTrainingSamples
(
const
FEATURE_DEFS_STRUCT
& feature_defs,
119
const
char
*feature_name,
int
max_samples,
120
UNICHARSET
* unicharset,
121
FILE* file,
LIST
* training_samples);
122
123
void
WriteTrainingSamples
(
124
const
FEATURE_DEFS_STRUCT
&FeatureDefs,
125
char
*Directory,
126
LIST
CharList,
127
const
char
*program_feature_type);
128
129
void
FreeTrainingSamples
(
130
LIST
CharList);
131
132
void
FreeLabeledList
(
133
LABELEDLIST LabeledList);
134
135
void
FreeLabeledClassList
(
136
LIST
ClassListList);
137
138
CLUSTERER
*
SetUpForClustering
(
139
const
FEATURE_DEFS_STRUCT
&FeatureDefs,
140
LABELEDLIST CharSample,
141
const
char
*program_feature_type);
142
143
LIST
RemoveInsignificantProtos
(
144
LIST
ProtoList,
145
BOOL8
KeepSigProtos,
146
BOOL8
KeepInsigProtos,
147
int
N);
148
149
void
CleanUpUnusedData
(
150
LIST
ProtoList);
151
152
void
MergeInsignificantProtos
(
153
LIST
ProtoList,
154
const
char
*label,
155
CLUSTERER
*Clusterer,
156
CLUSTERCONFIG
*Config);
157
158
MERGE_CLASS
FindClass
(
159
LIST
List,
160
const
char
*Label);
161
162
MERGE_CLASS
NewLabeledClass
(
163
const
char
*Label);
164
165
void
FreeTrainingSamples
(
166
LIST
CharList);
167
168
CLASS_STRUCT
*
SetUpForFloat2Int
(
const
UNICHARSET
& unicharset,
169
LIST
LabeledClassList);
170
171
void
Normalize
(
172
float
*Values);
173
174
void
FreeNormProtoList
(
175
LIST
CharList);
176
177
void
AddToNormProtosList
(
178
LIST
* NormProtoList,
179
LIST
ProtoList,
180
char
*CharName);
181
182
int
NumberOfProtos
(
183
LIST
ProtoList,
184
BOOL8
CountSigProtos,
185
BOOL8
CountInsigProtos);
186
187
188
void
allocNormProtos
();
189
#endif // TESSERACT_TRAINING_COMMONTRAINING_H__
mnt
data
src
tesseract-ocr
training
commontraining.h
Generated on Thu Nov 1 2012 20:19:51 for Tesseract by
1.8.1