Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
adaptive.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: adaptive.c
3  ** Purpose: Adaptive matcher.
4  ** Author: Dan Johnson
5  ** History: Fri Mar 8 10:00:21 1991, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 /*----------------------------------------------------------------------------
20  Include Files and Type Defines
21 ----------------------------------------------------------------------------*/
22 #include "adaptive.h"
23 #include "emalloc.h"
24 #include "freelist.h"
25 #include "globals.h"
26 #include "classify.h"
27 
28 #ifdef __UNIX__
29 #include <assert.h>
30 #endif
31 #include <stdio.h>
32 
33 /*----------------------------------------------------------------------------
34  Public Code
35 ----------------------------------------------------------------------------*/
36 /*---------------------------------------------------------------------------*/
50  ADAPT_CLASS Class,
51  CLASS_ID ClassId) {
52  INT_CLASS IntClass;
53 
54  assert (Templates != NULL);
55  assert (Class != NULL);
56  assert (LegalClassId (ClassId));
57  assert (UnusedClassIdIn (Templates->Templates, ClassId));
58  assert (Class->NumPermConfigs == 0);
59 
60  IntClass = NewIntClass (1, 1);
61  AddIntClass (Templates->Templates, ClassId, IntClass);
62 
63  assert (Templates->Class[ClassId] == NULL);
64  Templates->Class[ClassId] = Class;
65 
66 } /* AddAdaptedClass */
67 
68 
69 /*---------------------------------------------------------------------------*/
81  assert (Config != NULL);
82 
84  FreeBitVector (Config->Protos);
85  free_struct (Config, sizeof (TEMP_CONFIG_STRUCT), "TEMP_CONFIG_STRUCT");
86 
87 } /* FreeTempConfig */
88 
89 /*---------------------------------------------------------------------------*/
90 void FreeTempProto(void *arg) {
91  PROTO proto = (PROTO) arg;
92 
93  free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT");
94 }
95 
97  assert(Config != NULL);
98  Efree(Config->Ambigs);
99  free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
100 }
101 
102 /*---------------------------------------------------------------------------*/
114  ADAPT_CLASS Class;
115  int i;
116 
117  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
118  Class->NumPermConfigs = 0;
119  Class->MaxNumTimesSeen = 0;
120  Class->TempProtos = NIL_LIST;
121 
126 
127  for (i = 0; i < MAX_NUM_CONFIGS; i++)
128  TempConfigFor (Class, i) = NULL;
129 
130  return (Class);
131 
132 } /* NewAdaptedClass */
133 
134 
135 /*-------------------------------------------------------------------------*/
136 void free_adapted_class(ADAPT_CLASS adapt_class) {
137  int i;
138 
139  for (i = 0; i < MAX_NUM_CONFIGS; i++) {
140  if (ConfigIsPermanent (adapt_class, i)
141  && PermConfigFor (adapt_class, i) != NULL)
142  FreePermConfig (PermConfigFor (adapt_class, i));
143  else if (!ConfigIsPermanent (adapt_class, i)
144  && TempConfigFor (adapt_class, i) != NULL)
145  FreeTempConfig (TempConfigFor (adapt_class, i));
146  }
147  FreeBitVector (adapt_class->PermProtos);
148  FreeBitVector (adapt_class->PermConfigs);
149  destroy_nodes (adapt_class->TempProtos, FreeTempProto);
150  Efree(adapt_class);
151 }
152 
153 
154 /*---------------------------------------------------------------------------*/
155 namespace tesseract {
168  ADAPT_TEMPLATES Templates;
169  int i;
170 
171  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
172 
173  Templates->Templates = NewIntTemplates ();
174  Templates->NumPermClasses = 0;
175  Templates->NumNonEmptyClasses = 0;
176 
177  /* Insert an empty class for each unichar id in unicharset */
178  for (i = 0; i < MAX_NUM_CLASSES; i++) {
179  Templates->Class[i] = NULL;
180  if (InitFromUnicharset && i < unicharset.size()) {
181  AddAdaptedClass(Templates, NewAdaptedClass(), i);
182  }
183  }
184 
185  return (Templates);
186 
187 } /* NewAdaptedTemplates */
188 
189 // Returns FontinfoId of the given config of the given adapted class.
191  return (ConfigIsPermanent(Class, ConfigId) ?
192  PermConfigFor(Class, ConfigId)->FontinfoId :
193  TempConfigFor(Class, ConfigId)->FontinfoId);
194 }
195 
196 } // namespace tesseract
197 
198 /*----------------------------------------------------------------------------*/
200 
201  if (templates != NULL) {
202  int i;
203  for (i = 0; i < (templates->Templates)->NumClasses; i++)
204  free_adapted_class (templates->Class[i]);
205  free_int_templates (templates->Templates);
206  Efree(templates);
207  }
208 }
209 
210 
211 /*---------------------------------------------------------------------------*/
223 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
225  int NumProtos = MaxProtoId + 1;
226 
227  Config =
229  "TEMP_CONFIG_STRUCT");
230  Config->Protos = NewBitVector (NumProtos);
231 
232  Config->NumTimesSeen = 1;
233  Config->MaxProtoId = MaxProtoId;
234  Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
235  Config->ContextsSeen = NIL_LIST;
236  zero_all_bits (Config->Protos, Config->ProtoVectorSize);
237  Config->FontinfoId = FontinfoId;
238 
239  return (Config);
240 
241 } /* NewTempConfig */
242 
243 
244 /*---------------------------------------------------------------------------*/
255  return ((TEMP_PROTO)
256  alloc_struct (sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT"));
257 } /* NewTempProto */
258 
259 
260 /*---------------------------------------------------------------------------*/
261 namespace tesseract {
274  int i;
275  INT_CLASS IClass;
276  ADAPT_CLASS AClass;
277 
278  #ifndef SECURE_NAMES
279  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
280  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
281  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
282  fprintf (File, " Id NC NPC NP NPP\n");
283  fprintf (File, "------------------------\n");
284 
285  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
286  IClass = Templates->Templates->Class[i];
287  AClass = Templates->Class[i];
288  if (!IsEmptyAdaptedClass (AClass)) {
289  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
291  IClass->NumConfigs, AClass->NumPermConfigs,
292  IClass->NumProtos,
293  IClass->NumProtos - count (AClass->TempProtos));
294  }
295  }
296  #endif
297  fprintf (File, "\n");
298 
299 } /* PrintAdaptedTemplates */
300 } // namespace tesseract
301 
302 
303 /*---------------------------------------------------------------------------*/
316  int NumTempProtos;
317  int NumConfigs;
318  int i;
319  ADAPT_CLASS Class;
320  TEMP_PROTO TempProto;
321 
322  /* first read high level adapted class structure */
323  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
324  fread ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
325 
326  /* then read in the definitions of the permanent protos and configs */
329  fread ((char *) Class->PermProtos, sizeof (uinT32),
331  fread ((char *) Class->PermConfigs, sizeof (uinT32),
333 
334  /* then read in the list of temporary protos */
335  fread ((char *) &NumTempProtos, sizeof (int), 1, File);
336  Class->TempProtos = NIL_LIST;
337  for (i = 0; i < NumTempProtos; i++) {
338  TempProto =
340  "TEMP_PROTO_STRUCT");
341  fread ((char *) TempProto, sizeof (TEMP_PROTO_STRUCT), 1, File);
342  Class->TempProtos = push_last (Class->TempProtos, TempProto);
343  }
344 
345  /* then read in the adapted configs */
346  fread ((char *) &NumConfigs, sizeof (int), 1, File);
347  for (i = 0; i < NumConfigs; i++)
348  if (test_bit (Class->PermConfigs, i))
349  Class->Config[i].Perm = ReadPermConfig (File);
350  else
351  Class->Config[i].Temp = ReadTempConfig (File);
352 
353  return (Class);
354 
355 } /* ReadAdaptedClass */
356 
357 
358 /*---------------------------------------------------------------------------*/
359 namespace tesseract {
372  int i;
373  ADAPT_TEMPLATES Templates;
374 
375  /* first read the high level adaptive template struct */
376  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
377  fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
378 
379  /* then read in the basic integer templates */
380  Templates->Templates = ReadIntTemplates (File);
381 
382  /* then read in the adaptive info for each class */
383  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
384  Templates->Class[i] = ReadAdaptedClass (File);
385  }
386  return (Templates);
387 
388 } /* ReadAdaptedTemplates */
389 } // namespace tesseract
390 
391 
392 /*---------------------------------------------------------------------------*/
406  "PERM_CONFIG_STRUCT");
407  uinT8 NumAmbigs;
408  fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
409  Config->Ambigs = (UNICHAR_ID *)Emalloc(sizeof(UNICHAR_ID) * (NumAmbigs + 1));
410  fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
411  Config->Ambigs[NumAmbigs] = -1;
412  fread(&(Config->FontinfoId), sizeof(int), 1, File);
413 
414  return (Config);
415 
416 } /* ReadPermConfig */
417 
418 
419 /*---------------------------------------------------------------------------*/
433 
434  Config =
436  "TEMP_CONFIG_STRUCT");
437  fread ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
438 
439  Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
440  fread ((char *) Config->Protos, sizeof (uinT32),
441  Config->ProtoVectorSize, File);
442 
443  return (Config);
444 
445 } /* ReadTempConfig */
446 
447 
448 /*---------------------------------------------------------------------------*/
461 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
462  int NumTempProtos;
463  LIST TempProtos;
464  int i;
465 
466  /* first write high level adapted class structure */
467  fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
468 
469  /* then write out the definitions of the permanent protos and configs */
470  fwrite ((char *) Class->PermProtos, sizeof (uinT32),
472  fwrite ((char *) Class->PermConfigs, sizeof (uinT32),
474 
475  /* then write out the list of temporary protos */
476  NumTempProtos = count (Class->TempProtos);
477  fwrite ((char *) &NumTempProtos, sizeof (int), 1, File);
478  TempProtos = Class->TempProtos;
479  iterate (TempProtos) {
480  void* proto = first_node(TempProtos);
481  fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File);
482  }
483 
484  /* then write out the adapted configs */
485  fwrite ((char *) &NumConfigs, sizeof (int), 1, File);
486  for (i = 0; i < NumConfigs; i++)
487  if (test_bit (Class->PermConfigs, i))
488  WritePermConfig (File, Class->Config[i].Perm);
489  else
490  WriteTempConfig (File, Class->Config[i].Temp);
491 
492 } /* WriteAdaptedClass */
493 
494 
495 /*---------------------------------------------------------------------------*/
496 namespace tesseract {
508  int i;
509 
510  /* first write the high level adaptive template struct */
511  fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
512 
513  /* then write out the basic integer templates */
514  WriteIntTemplates (File, Templates->Templates, unicharset);
515 
516  /* then write out the adaptive info for each class */
517  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
518  WriteAdaptedClass (File, Templates->Class[i],
519  Templates->Templates->Class[i]->NumConfigs);
520  }
521 } /* WriteAdaptedTemplates */
522 } // namespace tesseract
523 
524 
525 /*---------------------------------------------------------------------------*/
537 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
538  uinT8 NumAmbigs = 0;
539 
540  assert (Config != NULL);
541  while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
542 
543  fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File);
544  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
545  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
546 } /* WritePermConfig */
547 
548 
549 /*---------------------------------------------------------------------------*/
561 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
562  assert (Config != NULL);
563  /* contexts not yet implemented */
564  assert (Config->ContextsSeen == NULL);
565 
566  fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
567  fwrite ((char *) Config->Protos, sizeof (uinT32),
568  Config->ProtoVectorSize, File);
569 
570 } /* WriteTempConfig */