Class DTokenizer

Inherits from:
Object
Declared in:
DTokenizer.h

Class Hierarchy

    Object
      |
      +---DTokenizer

Class Description

The class DTokenizer implements a (simple) lexical scanner. The tokenizer scans a text stream for whitespace, strings, keywords, comments, operators, numbers and three to be implemented formats. The scanner scans the text on a line basis. The tokenizer does not use regular expressions, but it uses for every token a method to scan for the token. The method can be overridden for a specific implementation of the token scanner. There are two methods: nextToken and checkToken. The first method checks for a token and if there is a match, the scanner location is moved to the next location in the source. The second method only returns the result of the match, the client must call the service 'next' to move the scanner location. The token methods return one of the following: DTK_UNKNOWN, DTK_EOFF, DTK_EXTRA1, DTK_EXTRA2, DTK_EXTRA3, DTK_WHITESPACE, DTK_COMMENT, DTK_NUMBER, DTK_KEYWORD, DTK_STRING, DTK_OPERATOR.

Example:
#include <stdio.h>
#include <ctype.h>
#include "ofc/DTokenizer.h"
#include "ofc/DFile.h"

@interface MyTokenizer : DTokenizer           // MyTokenizer overrides comment and extraToken1
{
@private
}


- (DText *) comment :(const char *) text;     // Override the comment checker
- (DText *) extraToken1 :(const char *) text; // Override the extra token checker
@end

@implementation MyTokenizer

- (DText *) comment :(const char *) text      // Override the default comment checker ..
{
  DText *scanned = nil;

  if ((*text == '/') && (*(text+1) == '/'))   // .. comment is '//'
  {
    scanned = [DText new];

    while (*text != EOS)
    {
      [scanned push :*text++];
    }
  }
  return scanned;
}

- (DText *) extraToken1 :(const char *) text  // Override the extra token checker for ..
{
  DText *scanned = nil;

  if (*text == '#')                           // .. scanning of directives: #...
  {
    scanned = [DText new];

    while ((!isspace(*text)) && (*text != EOS))
    {
      [scanned push :*text++];
    }
  }
  return scanned;
}

@end


int main(int argc, char *argv[])
{
  MyTokenizer *srce = [MyTokenizer new];
  DFile       *file = [DFile       new];

  if ([file open :"example.tok" :"r"]) // Open the file
  {
    if ([srce source :file :"example.tok"]) // Give the file to the tokenizer
    {
      [srce skipWhiteSpace :YES];      // Skip whitespace

      while (![srce isEof])            // Do for the whole
      {
        int token = [srce nextToken];  // Get the next token and ..

        switch (token)                 // .. print the tokenized info
        {
          case DTK_EXTRA1    : printf("Directive: %s\n", [srce text]); break;
          case DTK_COMMENT   : printf("Comment  : %s\n", [srce text]); break;
          case DTK_NUMBER    : printf("Number   : %s\n", [srce text]); break;
          case DTK_KEYWORD   : printf("Keyword  : %s\n", [srce text]); break;
          case DTK_STRING    : printf("String   : %s\n", [srce text]); break;
          case DTK_OPERATOR  : printf("Operator : %s\n", [srce text]); break;
          default:             printf("Unkown   : %s\n", [srce text]); break;
        }
      }
    }
    else
      printf("File \"example.tok\" could not be used by tokenizer.\n");

    [file close];
  }
  else
    printf("File \"example.tok\" could not be opened:%d.\n", [file error]);

  [file free];                         // Cleanup
  [srce free];

  return 0;
}
Last modified:
07-Aug-2008 (DTokenizer.h)

Instance Variables

private DList *_sources
the stack with the sources
private DSource *_source
the source for the tokenizer
private DText *_text
the last scanned text
private BOOL _eoff
is end of file reached ?
private unsigned _scanned
the length of the scanned text
private BOOL _skipWhiteSpace
should whitespace be skipped ? (def. YES)

Method Index


generated 06-Sep-2008 by ObjcDoc 3.0.0