|
__init__(self,
defaulttokenlist=None,
whitespacechars=' \t\r\n ' ,
includewhitespacetokens=0) |
source code
|
|
|
stringtokenize(self,
text)
makes strings in text into tokens... |
source code
|
|
|
keeptogether(self,
text)
checks whether a token should be kept together |
source code
|
|
|
isstringtoken(self,
text)
checks whether a token is a string token |
source code
|
|
|
separatetokens(self,
text,
tokenlist=None)
this separates out tokens in tokenlist from whitespace etc |
source code
|
|
|
removewhitespace(self,
text)
this removes whitespace but lets it separate things out into separate
tokens |
source code
|
|
|
applytokenizer(self,
inputlist,
tokenizer)
apply a tokenizer to a set of text, flattening the result |
source code
|
|
|
applytokenizers(self,
inputlist,
tokenizers)
apply a set of tokenizers to a set of text, flattening each time |
source code
|
|
|
tokenize(self,
source,
tokenizers=None)
tokenize the text string with the standard tokenizers |
source code
|
|
|
findtokenpos(self,
tokennum)
finds the position of the given token in the text |
source code
|
|
|
getlinepos(self,
tokenpos)
finds the line and character position of the given character |
source code
|
|
|
raiseerror(self,
message,
tokennum)
raises a ParserError |
source code
|
|