- Inherits from:
- Object
- Declared in:
- DTokenizer.h
Object
|
+---DTokenizer
Class Description
The class DTokenizer implements a (simple) lexical scanner. The
tokenizer scans a text stream for whitespace, strings, keywords,
comments, operators, numbers and three to be implemented formats. The
scanner scans the text on a line basis.
The tokenizer does not use regular expressions, but it uses for every token
a method to scan for the token. The method can be overridden for a specific
implementation of the token scanner.
There are two methods: nextToken and checkToken. The first method checks
for a token and if there is a match, the scanner location is moved to
the next location in the source. The second method only returns
the result of the match, the client must call the service
'next' to move the scanner location.
The token methods return one of the following: DTK_UNKNOWN, DTK_EOFF,
DTK_EXTRA1, DTK_EXTRA2, DTK_EXTRA3, DTK_WHITESPACE, DTK_COMMENT,
DTK_NUMBER, DTK_KEYWORD, DTK_STRING, DTK_OPERATOR.
- Example:
#include <stdio.h>
#include <ctype.h>
#include "ofc/DTokenizer.h"
#include "ofc/DFile.h"
@interface MyTokenizer : DTokenizer // MyTokenizer overrides comment and extraToken1
{
@private
}
- (DText *) comment :(const char *) text; // Override the comment checker
- (DText *) extraToken1 :(const char *) text; // Override the extra token checker
@end
@implementation MyTokenizer
- (DText *) comment :(const char *) text // Override the default comment checker ..
{
DText *scanned = nil;
if ((*text == '/') && (*(text+1) == '/')) // .. comment is '//'
{
scanned = [DText new];
while (*text != EOS)
{
[scanned push :*text++];
}
}
return scanned;
}
- (DText *) extraToken1 :(const char *) text // Override the extra token checker for ..
{
DText *scanned = nil;
if (*text == '#') // .. scanning of directives: #...
{
scanned = [DText new];
while ((!isspace(*text)) && (*text != EOS))
{
[scanned push :*text++];
}
}
return scanned;
}
@end
int main(int argc, char *argv[])
{
MyTokenizer *srce = [MyTokenizer new];
DFile *file = [DFile new];
if ([file open :"example.tok" :"r"]) // Open the file
{
if ([srce source :file :"example.tok"]) // Give the file to the tokenizer
{
[srce skipWhiteSpace :YES]; // Skip whitespace
while (![srce isEof]) // Do for the whole
{
int token = [srce nextToken]; // Get the next token and ..
switch (token) // .. print the tokenized info
{
case DTK_EXTRA1 : printf("Directive: %s\n", [srce text]); break;
case DTK_COMMENT : printf("Comment : %s\n", [srce text]); break;
case DTK_NUMBER : printf("Number : %s\n", [srce text]); break;
case DTK_KEYWORD : printf("Keyword : %s\n", [srce text]); break;
case DTK_STRING : printf("String : %s\n", [srce text]); break;
case DTK_OPERATOR : printf("Operator : %s\n", [srce text]); break;
default: printf("Unkown : %s\n", [srce text]); break;
}
}
}
else
printf("File \"example.tok\" could not be used by tokenizer.\n");
[file close];
}
else
printf("File \"example.tok\" could not be opened:%d.\n", [file error]);
[file free]; // Cleanup
[srce free];
return 0;
}
- Last modified:
- 07-Aug-2008 (DTokenizer.h)
Instance Variables
- private DList *_sources
- the stack with the sources
- private DSource *_source
- the source for the tokenizer
- private DText *_text
- the last scanned text
- private BOOL _eoff
- is end of file reached ?
- private unsigned _scanned
- the length of the scanned text
- private BOOL _skipWhiteSpace
- should whitespace be skipped ? (def. YES)
- Constructors
- - (DTokenizer *) init
- Initialise a tokenizer
- Returns:
- the object
- - (DTokenizer *) init :(id <DTextReadable>) source :(const char *) name
- Initialise a tokenizer with a file
- Parameters:
- source - the source
name - the name for the source
- Returns:
- the object
- Copy related methods
- - shallowCopy
- Do a shallow copy of the object (not implemented)
- Returns:
- the object
- Deconstructor
- - free
- Free the tokenizer (but *NOT* the TextReadables)
- Returns:
- the object
- Member methods
- - (int) columnNumber
- Get the current column number
- Returns:
- the current column number
- - (BOOL) isEof
- Is the end of file reached ?
- Returns:
- is it ?
- - (int) lineNumber
- Get the current line number
- Returns:
- the current line number
- - (const char *) name
- Get the name of the current file
- Returns:
- the name of the file (or NULL)
- - (BOOL) skipWhiteSpace
- Return whether white space is be skipped
- Returns:
- is it ?
- - (DTokenizer *) skipWhiteSpace :(BOOL) skip
- Set whether white space should be skipped
- Parameters:
- skip - should white space be skipped
- Returns:
- the object
- - (const char *) text
- Get the last scanned text
- Returns:
- the last scanned text (or NULL)
- Basic scanner methods (to be overridden)
- - (DText *) comment :(const char *) text
- Check for an comment (start with '#' till eofl)
- Parameters:
- text - the text to be scanned
- Returns:
- the scanned text (or nil)
- - (DText *) extraToken1 :(const char *) text
- Check for extra token 1 (to be overridden)
- Parameters:
- text - the text to be scanned
- Returns:
- the scanned text (or nil)
- - (DText *) extraToken2 :(const char *) text
- Check for extra token 2 (to be overridden)
- Parameters:
- text - the text to be scanned
- Returns:
- the scanned text (or nil)
- - (DText *) extraToken3 :(const char *) text
- Check for extra token 3 (to be overridden)
- Parameters:
- text - the text to be scanned
- Returns:
- the scanned text (or nil)
- - (DText *) keyword :(const char *) text
- Check for a keyword ([A-Za-z_][A-Za-z0-9_]*)
- Parameters:
- text - the text to be scanned
- Returns:
- the scanned text (or nil)
- - (DText *) number :(const char *) text
- Check for a number (isdigit)
- Parameters:
- text - the text to be scanned
- Returns:
- the scanned text (or nil)
- - (DText *) operator :(const char *) text
- Check for an operator (ispunct)
- Parameters:
- text - the text to be scanned
- Returns:
- the scanned text (or nil)
- - (DText *) string :(const char *) text
- Check for a string (".." or '..', " and ' allowed)
- Parameters:
- text - the text to be scanned
- Returns:
- the scanned text (or nil)
- - (DText *) whiteSpace :(const char *) text
- Check for white space (based on isspace)
- Parameters:
- text - the text to be scanned
- Returns:
- the scanned text (or nil)
- Source methods
- - (BOOL) popSource
- Pop a source from the sources stack
- Returns:
- is a source popped from the sources stack ?
- - (BOOL) source :(id <DTextReadable>) source :(const char *) name
- Start using a source (an already open source is pushed on the sources stack)
- Parameters:
- source - the source
name - the name for the source
- Returns:
- success
- Scanner methods
- - (int) checkToken
- Check the source for the next token (white space is not skipped)
- Returns:
- the found token
- - (DTokenizer *) next
- Move the scanned indicator after a checkToken method
- Returns:
- the object
- - (DTokenizer *) next :(unsigned) positions
- Move the scanned indicator a number of positions (private)
- Parameters:
- positions - the number of position to move
- Returns:
- the object
- - (int) nextToken
- Scan the source for the next token (white space is skipped if
this is active)
- Returns:
- the next token
- Error methods
- - (DTokenizer *) error :(const char *) msg
- Generate an error on stderr for the current file and line
- Parameters:
- msg - the error message
- Returns:
- the object
generated 06-Sep-2008 by ObjcDoc 3.0.0