Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
cluster.h
Go to the documentation of this file.
1
/******************************************************************************
2
** Filename: cluster.h
3
** Purpose: Definition of feature space clustering routines
4
** Author: Dan Johnson
5
** History: 5/29/89, DSJ, Created.
6
**
7
** (c) Copyright Hewlett-Packard Company, 1988.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
******************************************************************************/
18
#ifndef CLUSTER_H
19
#define CLUSTER_H
20
21
#include "
kdtree.h
"
22
#include "
oldlist.h
"
23
24
struct
BUCKETS
;
25
26
#define MINBUCKETS 5
27
#define MAXBUCKETS 39
28
29
/*----------------------------------------------------------------------
30
Types
31
----------------------------------------------------------------------*/
32
typedef
struct
sample
{
33
unsigned
Clustered
:1;
// TRUE if included in a higher cluster
34
unsigned
Prototype
:1;
// TRUE if cluster represented by a proto
35
unsigned
SampleCount
:30;
// number of samples in this cluster
36
struct
sample
*
Left
;
// ptr to left sub-cluster
37
struct
sample
*
Right
;
// ptr to right sub-cluster
38
inT32
CharID
;
// identifier of char sample came from
39
FLOAT32
Mean
[1];
// mean of cluster - SampleSize floats
40
}
CLUSTER
;
41
42
typedef
CLUSTER
SAMPLE
;
// can refer to as either sample or cluster
43
44
typedef
enum
{
45
spherical
,
elliptical
,
mixed
,
automatic
46
}
PROTOSTYLE
;
47
48
typedef
struct
{
// parameters to control clustering
49
PROTOSTYLE
ProtoStyle
;
// specifies types of protos to be made
50
FLOAT32
MinSamples
;
// min # of samples per proto - % of total
51
FLOAT32
MaxIllegal
;
// max percentage of samples in a cluster which have
52
// more than 1 feature in that cluster
53
FLOAT32
Independence
;
// desired independence between dimensions
54
FLOAT64
Confidence
;
// desired confidence in prototypes created
55
int
MagicSamples
;
// Ideal number of samples in a cluster.
56
}
CLUSTERCONFIG
;
57
58
typedef
enum
{
59
normal
,
uniform
,
D_random
,
DISTRIBUTION_COUNT
60
}
DISTRIBUTION
;
61
62
typedef
union
{
63
FLOAT32
Spherical
;
64
FLOAT32
*
Elliptical
;
65
}
FLOATUNION
;
66
67
typedef
struct
{
68
unsigned
Significant:1;
// TRUE if prototype is significant
69
unsigned
Merged:1;
// Merged after clustering so do not output
70
// but kept for display purposes. If it has no
71
// samples then it was actually merged.
72
// Otherwise it matched an already significant
73
// cluster.
74
unsigned
Style:2;
// spherical, elliptical, or mixed
75
unsigned
NumSamples:28;
// number of samples in the cluster
76
CLUSTER
*
Cluster
;
// ptr to cluster which made prototype
77
DISTRIBUTION
*
Distrib
;
// different distribution for each dimension
78
FLOAT32
*
Mean
;
// prototype mean
79
FLOAT32
TotalMagnitude
;
// total magnitude over all dimensions
80
FLOAT32
LogMagnitude
;
// log base e of TotalMagnitude
81
FLOATUNION
Variance
;
// prototype variance
82
FLOATUNION
Magnitude
;
// magnitude of density function
83
FLOATUNION
Weight
;
// weight of density function
84
}
PROTOTYPE
;
85
86
typedef
struct
{
87
inT16
SampleSize
;
// number of parameters per sample
88
PARAM_DESC
*
ParamDesc
;
// description of each parameter
89
inT32
NumberOfSamples
;
// total number of samples being clustered
90
KDTREE
*
KDTree
;
// for optimal nearest neighbor searching
91
CLUSTER
*
Root
;
// ptr to root cluster of cluster tree
92
LIST
ProtoList
;
// list of prototypes
93
inT32
NumChar
;
// # of characters represented by samples
94
// cache of reusable histograms by distribution type and number of buckets.
95
BUCKETS
* bucket_cache[
DISTRIBUTION_COUNT
][
MAXBUCKETS
+ 1 -
MINBUCKETS
];
96
}
CLUSTERER
;
97
98
typedef
struct
{
99
inT32
NumSamples
;
// number of samples in list
100
inT32
MaxNumSamples
;
// maximum size of list
101
SAMPLE
*Sample[1];
// array of ptrs to sample data structures
102
}
SAMPLELIST
;
103
104
// low level cluster tree analysis routines.
105
#define InitSampleSearch(S,C) (((C)==NULL)?(S=NIL_LIST):(S=push(NIL_LIST,(C))))
106
107
/*--------------------------------------------------------------------------
108
Public Function Prototypes
109
--------------------------------------------------------------------------*/
110
CLUSTERER
*
MakeClusterer
(
inT16
SampleSize,
const
PARAM_DESC
ParamDesc[]);
111
112
SAMPLE
*
MakeSample
(
CLUSTERER
* Clusterer,
const
FLOAT32
* Feature,
inT32
CharID);
113
114
LIST
ClusterSamples
(
CLUSTERER
*Clusterer,
CLUSTERCONFIG
*
Config
);
115
116
void
FreeClusterer
(
CLUSTERER
*Clusterer);
117
118
void
FreeProtoList
(
LIST
*ProtoList);
119
120
void
FreePrototype
(
void
*arg);
// PROTOTYPE *Prototype);
121
122
CLUSTER
*
NextSample
(
LIST
*SearchState);
123
124
FLOAT32
Mean
(
PROTOTYPE
*Proto,
uinT16
Dimension);
125
126
FLOAT32
StandardDeviation
(
PROTOTYPE
*Proto,
uinT16
Dimension);
127
128
inT32
MergeClusters
(
inT16
N,
PARAM_DESC
ParamDesc[],
inT32
n1,
inT32
n2,
129
FLOAT32
m[],
FLOAT32
m1[],
FLOAT32
m2[]);
130
131
//--------------Global Data Definitions and Declarations---------------------------
132
// define errors that can be trapped
133
#define ALREADYCLUSTERED 4000
134
#endif
mnt
data
src
tesseract-ocr
classify
cluster.h
Generated on Thu Nov 1 2012 20:19:46 for Tesseract by
1.8.1