Package pyparsing :: Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing.pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2010  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24  #from __future__ import generators 
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.5" 
  62  __versionTime__ = "12 Aug 2010 03:56" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy 
  68  import sys 
  69  import warnings 
  70  import re 
  71  import sre_constants 
  72  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 'originalTextFor', 
  92  ] 
  93   
  94  """ 
  95  Detect if we are running version 3.X and make appropriate changes 
  96  Robert A. Clark 
  97  """ 
  98  _PY3K = sys.version_info[0] > 2 
  99  if _PY3K: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104      _str2dict = set 
 105      alphas = string.ascii_lowercase + string.ascii_uppercase 
 106  else: 
 107      _MAX_INT = sys.maxint 
 108      range = xrange 
 109   
110 - def _ustr(obj):
111 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 112 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 113 then < returns the unicode object | encodes it with the default encoding | ... >. 114 """ 115 if isinstance(obj,unicode): 116 return obj 117 118 try: 119 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 120 # it won't break any existing code. 121 return str(obj) 122 123 except UnicodeEncodeError: 124 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 125 # state that "The return value must be a string object". However, does a 126 # unicode object (being a subclass of basestring) count as a "string 127 # object"? 128 # If so, then return a unicode object: 129 return unicode(obj)
130 # Else encode it... but how? There are many choices... :) 131 # Replace unprintables with escape codes? 132 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 133 # Replace unprintables with question marks? 134 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 135 # ... 136
137 - def _str2dict(strg):
138 return dict( [(c,0) for c in strg] )
139 140 alphas = string.lowercase + string.uppercase 141 142 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 143 singleArgBuiltins = [] 144 import __builtin__ 145 for fname in "sum len enumerate sorted reversed list tuple set any all".split(): 146 try: 147 singleArgBuiltins.append(getattr(__builtin__,fname)) 148 except AttributeError: 149 continue 150
151 -def _xml_escape(data):
152 """Escape &, <, >, ", ', etc. in a string of data.""" 153 154 # ampersand must be replaced first 155 from_symbols = '&><"\'' 156 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 157 for from_,to_ in zip(from_symbols, to_symbols): 158 data = data.replace(from_, to_) 159 return data
160
161 -class _Constants(object):
162 pass
163 164 nums = string.digits 165 hexnums = nums + "ABCDEFabcdef" 166 alphanums = alphas + nums 167 _bslash = chr(92) 168 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 169
170 -class ParseBaseException(Exception):
171 """base exception class for all parsing runtime exceptions""" 172 # Performance tuning: we construct a *lot* of these, so keep this 173 # constructor as small and fast as possible
174 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
175 self.loc = loc 176 if msg is None: 177 self.msg = pstr 178 self.pstr = "" 179 else: 180 self.msg = msg 181 self.pstr = pstr 182 self.parserElement = elem
183
184 - def __getattr__( self, aname ):
185 """supported attributes by name are: 186 - lineno - returns the line number of the exception text 187 - col - returns the column number of the exception text 188 - line - returns the line containing the exception text 189 """ 190 if( aname == "lineno" ): 191 return lineno( self.loc, self.pstr ) 192 elif( aname in ("col", "column") ): 193 return col( self.loc, self.pstr ) 194 elif( aname == "line" ): 195 return line( self.loc, self.pstr ) 196 else: 197 raise AttributeError(aname)
198
199 - def __str__( self ):
200 return "%s (at char %d), (line:%d, col:%d)" % \ 201 ( self.msg, self.loc, self.lineno, self.column )
202 - def __repr__( self ):
203 return _ustr(self)
204 - def markInputline( self, markerString = ">!<" ):
205 """Extracts the exception line from the input string, and marks 206 the location of the exception with a special symbol. 207 """ 208 line_str = self.line 209 line_column = self.column - 1 210 if markerString: 211 line_str = "".join( [line_str[:line_column], 212 markerString, line_str[line_column:]]) 213 return line_str.strip()
214 - def __dir__(self):
215 return "loc msg pstr parserElement lineno col line " \ 216 "markInputLine __str__ __repr__".split()
217
218 -class ParseException(ParseBaseException):
219 """exception thrown when parse expressions don't match class; 220 supported attributes by name are: 221 - lineno - returns the line number of the exception text 222 - col - returns the column number of the exception text 223 - line - returns the line containing the exception text 224 """ 225 pass
226
227 -class ParseFatalException(ParseBaseException):
228 """user-throwable exception thrown when inconsistent parse content 229 is found; stops all parsing immediately""" 230 pass
231
232 -class ParseSyntaxException(ParseFatalException):
233 """just like C{ParseFatalException}, but thrown internally when an 234 C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because 235 an unbacktrackable syntax error has been found"""
236 - def __init__(self, pe):
237 super(ParseSyntaxException, self).__init__( 238 pe.pstr, pe.loc, pe.msg, pe.parserElement)
239 240 #~ class ReparseException(ParseBaseException): 241 #~ """Experimental class - parse actions can raise this exception to cause 242 #~ pyparsing to reparse the input string: 243 #~ - with a modified input string, and/or 244 #~ - with a modified start location 245 #~ Set the values of the ReparseException in the constructor, and raise the 246 #~ exception in a parse action to cause pyparsing to use the new string/location. 247 #~ Setting the values as None causes no change to be made. 248 #~ """ 249 #~ def __init_( self, newstring, restartLoc ): 250 #~ self.newParseText = newstring 251 #~ self.reparseLoc = restartLoc 252
253 -class RecursiveGrammarException(Exception):
254 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
255 - def __init__( self, parseElementList ):
256 self.parseElementTrace = parseElementList
257
258 - def __str__( self ):
259 return "RecursiveGrammarException: %s" % self.parseElementTrace
260
261 -class _ParseResultsWithOffset(object):
262 - def __init__(self,p1,p2):
263 self.tup = (p1,p2)
264 - def __getitem__(self,i):
265 return self.tup[i]
266 - def __repr__(self):
267 return repr(self.tup)
268 - def setOffset(self,i):
269 self.tup = (self.tup[0],i)
270
271 -class ParseResults(object):
272 """Structured parse results, to provide multiple means of access to the parsed data: 273 - as a list (C{len(results)}) 274 - by list index (C{results[0], results[1]}, etc.) 275 - by attribute (C{results.<resultsName>}) 276 """ 277 #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
278 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
279 if isinstance(toklist, cls): 280 return toklist 281 retobj = object.__new__(cls) 282 retobj.__doinit = True 283 return retobj
284 285 # Performance tuning: we construct a *lot* of these, so keep this 286 # constructor as small and fast as possible
287 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
288 if self.__doinit: 289 self.__doinit = False 290 self.__name = None 291 self.__parent = None 292 self.__accumNames = {} 293 if isinstance(toklist, list): 294 self.__toklist = toklist[:] 295 else: 296 self.__toklist = [toklist] 297 self.__tokdict = dict() 298 299 if name is not None and name: 300 if not modal: 301 self.__accumNames[name] = 0 302 if isinstance(name,int): 303 name = _ustr(name) # will always return a str, but use _ustr for consistency 304 self.__name = name 305 if not toklist in (None,'',[]): 306 if isinstance(toklist,basestring): 307 toklist = [ toklist ] 308 if asList: 309 if isinstance(toklist,ParseResults): 310 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 311 else: 312 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 313 self[name].__name = name 314 else: 315 try: 316 self[name] = toklist[0] 317 except (KeyError,TypeError,IndexError): 318 self[name] = toklist
319
320 - def __getitem__( self, i ):
321 if isinstance( i, (int,slice) ): 322 return self.__toklist[i] 323 else: 324 if i not in self.__accumNames: 325 return self.__tokdict[i][-1][0] 326 else: 327 return ParseResults([ v[0] for v in self.__tokdict[i] ])
328
329 - def __setitem__( self, k, v, isinstance=isinstance ):
330 if isinstance(v,_ParseResultsWithOffset): 331 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 332 sub = v[0] 333 elif isinstance(k,int): 334 self.__toklist[k] = v 335 sub = v 336 else: 337 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 338 sub = v 339 if isinstance(sub,ParseResults): 340 sub.__parent = wkref(self)
341
342 - def __delitem__( self, i ):
343 if isinstance(i,(int,slice)): 344 mylen = len( self.__toklist ) 345 del self.__toklist[i] 346 347 # convert int to slice 348 if isinstance(i, int): 349 if i < 0: 350 i += mylen 351 i = slice(i, i+1) 352 # get removed indices 353 removed = list(range(*i.indices(mylen))) 354 removed.reverse() 355 # fixup indices in token dictionary 356 for name in self.__tokdict: 357 occurrences = self.__tokdict[name] 358 for j in removed: 359 for k, (value, position) in enumerate(occurrences): 360 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 361 else: 362 del self.__tokdict[i]
363
364 - def __contains__( self, k ):
365 return k in self.__tokdict
366
367 - def __len__( self ): return len( self.__toklist )
368 - def __bool__(self): return len( self.__toklist ) > 0
369 __nonzero__ = __bool__
370 - def __iter__( self ): return iter( self.__toklist )
371 - def __reversed__( self ): return iter( self.__toklist[::-1] )
372 - def keys( self ):
373 """Returns all named result keys.""" 374 return self.__tokdict.keys()
375
376 - def pop( self, index=-1 ):
377 """Removes and returns item at specified index (default=last). 378 Will work with either numeric indices or dict-key indicies.""" 379 ret = self[index] 380 del self[index] 381 return ret
382
383 - def get(self, key, defaultValue=None):
384 """Returns named result matching the given key, or if there is no 385 such name, then returns the given C{defaultValue} or C{None} if no 386 C{defaultValue} is specified.""" 387 if key in self: 388 return self[key] 389 else: 390 return defaultValue
391
392 - def insert( self, index, insStr ):
393 """Inserts new element at location index in the list of parsed tokens.""" 394 self.__toklist.insert(index, insStr) 395 # fixup indices in token dictionary 396 for name in self.__tokdict: 397 occurrences = self.__tokdict[name] 398 for k, (value, position) in enumerate(occurrences): 399 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
400
401 - def items( self ):
402 """Returns all named result keys and values as a list of tuples.""" 403 return [(k,self[k]) for k in self.__tokdict]
404
405 - def values( self ):
406 """Returns all named result values.""" 407 return [ v[-1][0] for v in self.__tokdict.values() ]
408
409 - def __getattr__( self, name ):
410 if True: #name not in self.__slots__: 411 if name in self.__tokdict: 412 if name not in self.__accumNames: 413 return self.__tokdict[name][-1][0] 414 else: 415 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 416 else: 417 return "" 418 return None
419
420 - def __add__( self, other ):
421 ret = self.copy() 422 ret += other 423 return ret
424
425 - def __iadd__( self, other ):
426 if other.__tokdict: 427 offset = len(self.__toklist) 428 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 429 otheritems = other.__tokdict.items() 430 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 431 for (k,vlist) in otheritems for v in vlist] 432 for k,v in otherdictitems: 433 self[k] = v 434 if isinstance(v[0],ParseResults): 435 v[0].__parent = wkref(self) 436 437 self.__toklist += other.__toklist 438 self.__accumNames.update( other.__accumNames ) 439 return self
440
441 - def __radd__(self, other):
442 if isinstance(other,int) and other == 0: 443 return self.copy()
444
445 - def __repr__( self ):
446 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
447
448 - def __str__( self ):
449 out = "[" 450 sep = "" 451 for i in self.__toklist: 452 if isinstance(i, ParseResults): 453 out += sep + _ustr(i) 454 else: 455 out += sep + repr(i) 456 sep = ", " 457 out += "]" 458 return out
459
460 - def _asStringList( self, sep='' ):
461 out = [] 462 for item in self.__toklist: 463 if out and sep: 464 out.append(sep) 465 if isinstance( item, ParseResults ): 466 out += item._asStringList() 467 else: 468 out.append( _ustr(item) ) 469 return out
470
471 - def asList( self ):
472 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 473 out = [] 474 for res in self.__toklist: 475 if isinstance(res,ParseResults): 476 out.append( res.asList() ) 477 else: 478 out.append( res ) 479 return out
480
481 - def asDict( self ):
482 """Returns the named parse results as dictionary.""" 483 return dict( self.items() )
484
485 - def copy( self ):
486 """Returns a new copy of a C{ParseResults} object.""" 487 ret = ParseResults( self.__toklist ) 488 ret.__tokdict = self.__tokdict.copy() 489 ret.__parent = self.__parent 490 ret.__accumNames.update( self.__accumNames ) 491 ret.__name = self.__name 492 return ret
493
494 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
495 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 496 nl = "\n" 497 out = [] 498 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 499 for v in vlist ] ) 500 nextLevelIndent = indent + " " 501 502 # collapse out indents if formatting is not desired 503 if not formatted: 504 indent = "" 505 nextLevelIndent = "" 506 nl = "" 507 508 selfTag = None 509 if doctag is not None: 510 selfTag = doctag 511 else: 512 if self.__name: 513 selfTag = self.__name 514 515 if not selfTag: 516 if namedItemsOnly: 517 return "" 518 else: 519 selfTag = "ITEM" 520 521 out += [ nl, indent, "<", selfTag, ">" ] 522 523 worklist = self.__toklist 524 for i,res in enumerate(worklist): 525 if isinstance(res,ParseResults): 526 if i in namedItems: 527 out += [ res.asXML(namedItems[i], 528 namedItemsOnly and doctag is None, 529 nextLevelIndent, 530 formatted)] 531 else: 532 out += [ res.asXML(None, 533 namedItemsOnly and doctag is None, 534 nextLevelIndent, 535 formatted)] 536 else: 537 # individual token, see if there is a name for it 538 resTag = None 539 if i in namedItems: 540 resTag = namedItems[i] 541 if not resTag: 542 if namedItemsOnly: 543 continue 544 else: 545 resTag = "ITEM" 546 xmlBodyText = _xml_escape(_ustr(res)) 547 out += [ nl, nextLevelIndent, "<", resTag, ">", 548 xmlBodyText, 549 "</", resTag, ">" ] 550 551 out += [ nl, indent, "</", selfTag, ">" ] 552 return "".join(out)
553
554 - def __lookup(self,sub):
555 for k,vlist in self.__tokdict.items(): 556 for v,loc in vlist: 557 if sub is v: 558 return k 559 return None
560
561 - def getName(self):
562 """Returns the results name for this token expression.""" 563 if self.__name: 564 return self.__name 565 elif self.__parent: 566 par = self.__parent() 567 if par: 568 return par.__lookup(self) 569 else: 570 return None 571 elif (len(self) == 1 and 572 len(self.__tokdict) == 1 and 573 self.__tokdict.values()[0][0][1] in (0,-1)): 574 return self.__tokdict.keys()[0] 575 else: 576 return None
577
578 - def dump(self,indent='',depth=0):
579 """Diagnostic method for listing out the contents of a C{ParseResults}. 580 Accepts an optional C{indent} argument so that this string can be embedded 581 in a nested display of other data.""" 582 out = [] 583 out.append( indent+_ustr(self.asList()) ) 584 keys = self.items() 585 keys.sort() 586 for k,v in keys: 587 if out: 588 out.append('\n') 589 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 590 if isinstance(v,ParseResults): 591 if v.keys(): 592 out.append( v.dump(indent,depth+1) ) 593 else: 594 out.append(_ustr(v)) 595 else: 596 out.append(_ustr(v)) 597 return "".join(out)
598 599 # add support for pickle protocol
600 - def __getstate__(self):
601 return ( self.__toklist, 602 ( self.__tokdict.copy(), 603 self.__parent is not None and self.__parent() or None, 604 self.__accumNames, 605 self.__name ) )
606
607 - def __setstate__(self,state):
608 self.__toklist = state[0] 609 self.__tokdict, \ 610 par, \ 611 inAccumNames, \ 612 self.__name = state[1] 613 self.__accumNames = {} 614 self.__accumNames.update(inAccumNames) 615 if par is not None: 616 self.__parent = wkref(par) 617 else: 618 self.__parent = None
619
620 - def __dir__(self):
621 return dir(super(ParseResults,self)) + self.keys()
622
623 -def col (loc,strg):
624 """Returns current column within a string, counting newlines as line separators. 625 The first column is number 1. 626 627 Note: the default parsing behavior is to expand tabs in the input string 628 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 629 on parsing strings containing <TAB>s, and suggested methods to maintain a 630 consistent view of the parsed string, the parse location, and line and column 631 positions within the parsed string. 632 """ 633 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
634
635 -def lineno(loc,strg):
636 """Returns current line number within a string, counting newlines as line separators. 637 The first line is number 1. 638 639 Note: the default parsing behavior is to expand tabs in the input string 640 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 641 on parsing strings containing <TAB>s, and suggested methods to maintain a 642 consistent view of the parsed string, the parse location, and line and column 643 positions within the parsed string. 644 """ 645 return strg.count("\n",0,loc) + 1
646
647 -def line( loc, strg ):
648 """Returns the line of text containing loc within a string, counting newlines as line separators. 649 """ 650 lastCR = strg.rfind("\n", 0, loc) 651 nextCR = strg.find("\n", loc) 652 if nextCR >= 0: 653 return strg[lastCR+1:nextCR] 654 else: 655 return strg[lastCR+1:]
656
657 -def _defaultStartDebugAction( instring, loc, expr ):
658 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
659
660 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
661 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
662
663 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
664 print ("Exception raised:" + _ustr(exc))
665
666 -def nullDebugAction(*args):
667 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 668 pass
669
670 -class ParserElement(object):
671 """Abstract base level parser element class.""" 672 DEFAULT_WHITE_CHARS = " \n\t\r" 673 verbose_stacktrace = False 674
675 - def setDefaultWhitespaceChars( chars ):
676 """Overrides the default whitespace chars 677 """ 678 ParserElement.DEFAULT_WHITE_CHARS = chars
679 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 680
681 - def __init__( self, savelist=False ):
682 self.parseAction = list() 683 self.failAction = None 684 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 685 self.strRepr = None 686 self.resultsName = None 687 self.saveAsList = savelist 688 self.skipWhitespace = True 689 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 690 self.copyDefaultWhiteChars = True 691 self.mayReturnEmpty = False # used when checking for left-recursion 692 self.keepTabs = False 693 self.ignoreExprs = list() 694 self.debug = False 695 self.streamlined = False 696 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 697 self.errmsg = "" 698 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 699 self.debugActions = ( None, None, None ) #custom debug actions 700 self.re = None 701 self.callPreparse = True # used to avoid redundant calls to preParse 702 self.callDuringTry = False
703
704 - def copy( self ):
705 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 706 for the same parsing pattern, using copies of the original parse element.""" 707 cpy = copy.copy( self ) 708 cpy.parseAction = self.parseAction[:] 709 cpy.ignoreExprs = self.ignoreExprs[:] 710 if self.copyDefaultWhiteChars: 711 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 712 return cpy
713
714 - def setName( self, name ):
715 """Define name for this expression, for use in debugging.""" 716 self.name = name 717 self.errmsg = "Expected " + self.name 718 if hasattr(self,"exception"): 719 self.exception.msg = self.errmsg 720 return self
721
722 - def setResultsName( self, name, listAllMatches=False ):
723 """Define name for referencing matching tokens as a nested attribute 724 of the returned parse results. 725 NOTE: this returns a *copy* of the original C{ParserElement} object; 726 this is so that the client can define a basic element, such as an 727 integer, and reference it in multiple places with different names. 728 729 You can also set results names using the abbreviated syntax, 730 C{expr("name")} in place of C{expr.setResultsName("name")} - 731 see L{I{__call__}<__call__>}. 732 """ 733 newself = self.copy() 734 newself.resultsName = name 735 newself.modalResults = not listAllMatches 736 return newself
737
738 - def setBreak(self,breakFlag = True):
739 """Method to invoke the Python pdb debugger when this element is 740 about to be parsed. Set C{breakFlag} to True to enable, False to 741 disable. 742 """ 743 if breakFlag: 744 _parseMethod = self._parse 745 def breaker(instring, loc, doActions=True, callPreParse=True): 746 import pdb 747 pdb.set_trace() 748 return _parseMethod( instring, loc, doActions, callPreParse )
749 breaker._originalParseMethod = _parseMethod 750 self._parse = breaker 751 else: 752 if hasattr(self._parse,"_originalParseMethod"): 753 self._parse = self._parse._originalParseMethod 754 return self
755
756 - def _normalizeParseActionArgs( f ):
757 """Internal method used to decorate parse actions that take fewer than 3 arguments, 758 so that all parse actions can be called as C{f(s,l,t)}.""" 759 STAR_ARGS = 4 760 761 # special handling for single-argument builtins 762 if (f in singleArgBuiltins): 763 numargs = 1 764 else: 765 try: 766 restore = None 767 if isinstance(f,type): 768 restore = f 769 f = f.__init__ 770 if not _PY3K: 771 codeObj = f.func_code 772 else: 773 codeObj = f.code 774 if codeObj.co_flags & STAR_ARGS: 775 return f 776 numargs = codeObj.co_argcount 777 if not _PY3K: 778 if hasattr(f,"im_self"): 779 numargs -= 1 780 else: 781 if hasattr(f,"__self__"): 782 numargs -= 1 783 if restore: 784 f = restore 785 except AttributeError: 786 try: 787 if not _PY3K: 788 call_im_func_code = f.__call__.im_func.func_code 789 else: 790 call_im_func_code = f.__code__ 791 792 # not a function, must be a callable object, get info from the 793 # im_func binding of its bound __call__ method 794 if call_im_func_code.co_flags & STAR_ARGS: 795 return f 796 numargs = call_im_func_code.co_argcount 797 if not _PY3K: 798 if hasattr(f.__call__,"im_self"): 799 numargs -= 1 800 else: 801 if hasattr(f.__call__,"__self__"): 802 numargs -= 0 803 except AttributeError: 804 if not _PY3K: 805 call_func_code = f.__call__.func_code 806 else: 807 call_func_code = f.__call__.__code__ 808 # not a bound method, get info directly from __call__ method 809 if call_func_code.co_flags & STAR_ARGS: 810 return f 811 numargs = call_func_code.co_argcount 812 if not _PY3K: 813 if hasattr(f.__call__,"im_self"): 814 numargs -= 1 815 else: 816 if hasattr(f.__call__,"__self__"): 817 numargs -= 1 818 819 820 #~ print ("adding function %s with %d args" % (f.func_name,numargs)) 821 if numargs == 3: 822 return f 823 else: 824 if numargs > 3: 825 def tmp(s,l,t): 826 return f(f.__call__.__self__, s,l,t)
827 if numargs == 2: 828 def tmp(s,l,t): 829 return f(l,t) 830 elif numargs == 1: 831 def tmp(s,l,t): 832 return f(t) 833 else: #~ numargs == 0: 834 def tmp(s,l,t): 835 return f() 836 try: 837 tmp.__name__ = f.__name__ 838 except (AttributeError,TypeError): 839 # no need for special handling if attribute doesnt exist 840 pass 841 try: 842 tmp.__doc__ = f.__doc__ 843 except (AttributeError,TypeError): 844 # no need for special handling if attribute doesnt exist 845 pass 846 try: 847 tmp.__dict__.update(f.__dict__) 848 except (AttributeError,TypeError): 849 # no need for special handling if attribute doesnt exist 850 pass 851 return tmp 852 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 853
854 - def setParseAction( self, *fns, **kwargs ):
855 """Define action to perform when successfully matching parse element definition. 856 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 857 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 858 - s = the original string being parsed (see note below) 859 - loc = the location of the matching substring 860 - toks = a list of the matched tokens, packaged as a ParseResults object 861 If the functions in fns modify the tokens, they can return them as the return 862 value from fn, and the modified list of tokens will replace the original. 863 Otherwise, fn does not need to return any value. 864 865 Note: the default parsing behavior is to expand tabs in the input string 866 before starting the parsing process. See L{I{parseString}<parseString>} for more information 867 on parsing strings containing <TAB>s, and suggested methods to maintain a 868 consistent view of the parsed string, the parse location, and line and column 869 positions within the parsed string. 870 """ 871 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 872 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 873 return self
874
875 - def addParseAction( self, *fns, **kwargs ):
876 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 877 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 878 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 879 return self
880
881 - def setFailAction( self, fn ):
882 """Define action to perform if parsing fails at this expression. 883 Fail acton fn is a callable function that takes the arguments 884 C{fn(s,loc,expr,err)} where: 885 - s = string being parsed 886 - loc = location where expression match was attempted and failed 887 - expr = the parse expression that failed 888 - err = the exception thrown 889 The function returns no value. It may throw C{ParseFatalException} 890 if it is desired to stop parsing immediately.""" 891 self.failAction = fn 892 return self
893
894 - def _skipIgnorables( self, instring, loc ):
895 exprsFound = True 896 while exprsFound: 897 exprsFound = False 898 for e in self.ignoreExprs: 899 try: 900 while 1: 901 loc,dummy = e._parse( instring, loc ) 902 exprsFound = True 903 except ParseException: 904 pass 905 return loc
906
907 - def preParse( self, instring, loc ):
908 if self.ignoreExprs: 909 loc = self._skipIgnorables( instring, loc ) 910 911 if self.skipWhitespace: 912 wt = self.whiteChars 913 instrlen = len(instring) 914 while loc < instrlen and instring[loc] in wt: 915 loc += 1 916 917 return loc
918
919 - def parseImpl( self, instring, loc, doActions=True ):
920 return loc, []
921
922 - def postParse( self, instring, loc, tokenlist ):
923 return tokenlist
924 925 #~ @profile
926 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
927 debugging = ( self.debug ) #and doActions ) 928 929 if debugging or self.failAction: 930 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 931 if (self.debugActions[0] ): 932 self.debugActions[0]( instring, loc, self ) 933 if callPreParse and self.callPreparse: 934 preloc = self.preParse( instring, loc ) 935 else: 936 preloc = loc 937 tokensStart = preloc 938 try: 939 try: 940 loc,tokens = self.parseImpl( instring, preloc, doActions ) 941 except IndexError: 942 raise ParseException( instring, len(instring), self.errmsg, self ) 943 except ParseBaseException: 944 #~ print ("Exception raised:", err) 945 err = None 946 if self.debugActions[2]: 947 err = sys.exc_info()[1] 948 self.debugActions[2]( instring, tokensStart, self, err ) 949 if self.failAction: 950 if err is None: 951 err = sys.exc_info()[1] 952 self.failAction( instring, tokensStart, self, err ) 953 raise 954 else: 955 if callPreParse and self.callPreparse: 956 preloc = self.preParse( instring, loc ) 957 else: 958 preloc = loc 959 tokensStart = preloc 960 if self.mayIndexError or loc >= len(instring): 961 try: 962 loc,tokens = self.parseImpl( instring, preloc, doActions ) 963 except IndexError: 964 raise ParseException( instring, len(instring), self.errmsg, self ) 965 else: 966 loc,tokens = self.parseImpl( instring, preloc, doActions ) 967 968 tokens = self.postParse( instring, loc, tokens ) 969 970 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 971 if self.parseAction and (doActions or self.callDuringTry): 972 if debugging: 973 try: 974 for fn in self.parseAction: 975 tokens = fn( instring, tokensStart, retTokens ) 976 if tokens is not None: 977 retTokens = ParseResults( tokens, 978 self.resultsName, 979 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 980 modal=self.modalResults ) 981 except ParseBaseException: 982 #~ print "Exception raised in user parse action:", err 983 if (self.debugActions[2] ): 984 err = sys.exc_info()[1] 985 self.debugActions[2]( instring, tokensStart, self, err ) 986 raise 987 else: 988 for fn in self.parseAction: 989 tokens = fn( instring, tokensStart, retTokens ) 990 if tokens is not None: 991 retTokens = ParseResults( tokens, 992 self.resultsName, 993 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 994 modal=self.modalResults ) 995 996 if debugging: 997 #~ print ("Matched",self,"->",retTokens.asList()) 998 if (self.debugActions[1] ): 999 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1000 1001 return loc, retTokens
1002
1003 - def tryParse( self, instring, loc ):
1004 try: 1005 return self._parse( instring, loc, doActions=False )[0] 1006 except ParseFatalException: 1007 raise ParseException( instring, loc, self.errmsg, self)
1008 1009 # this method gets repeatedly called during backtracking with the same arguments - 1010 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1011 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1012 lookup = (self,instring,loc,callPreParse,doActions) 1013 if lookup in ParserElement._exprArgCache: 1014 value = ParserElement._exprArgCache[ lookup ] 1015 if isinstance(value,Exception): 1016 raise value 1017 return value 1018 else: 1019 try: 1020 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1021 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1022 return value 1023 except ParseBaseException: 1024 pe = sys.exc_info()[1] 1025 ParserElement._exprArgCache[ lookup ] = pe 1026 raise
1027 1028 _parse = _parseNoCache 1029 1030 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1031 _exprArgCache = {}
1032 - def resetCache():
1033 ParserElement._exprArgCache.clear()
1034 resetCache = staticmethod(resetCache) 1035 1036 _packratEnabled = False
1037 - def enablePackrat():
1038 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1039 Repeated parse attempts at the same string location (which happens 1040 often in many complex grammars) can immediately return a cached value, 1041 instead of re-executing parsing/validating code. Memoizing is done of 1042 both valid results and parsing exceptions. 1043 1044 This speedup may break existing programs that use parse actions that 1045 have side-effects. For this reason, packrat parsing is disabled when 1046 you first import pyparsing. To activate the packrat feature, your 1047 program must call the class method C{ParserElement.enablePackrat()}. If 1048 your program uses C{psyco} to "compile as you go", you must call 1049 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1050 Python will crash. For best results, call C{enablePackrat()} immediately 1051 after importing pyparsing. 1052 """ 1053 if not ParserElement._packratEnabled: 1054 ParserElement._packratEnabled = True 1055 ParserElement._parse = ParserElement._parseCache
1056 enablePackrat = staticmethod(enablePackrat) 1057
1058 - def parseString( self, instring, parseAll=False ):
1059 """Execute the parse expression with the given string. 1060 This is the main interface to the client code, once the complete 1061 expression has been built. 1062 1063 If you want the grammar to require that the entire input string be 1064 successfully parsed, then set C{parseAll} to True (equivalent to ending 1065 the grammar with C{StringEnd()}). 1066 1067 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1068 in order to report proper column numbers in parse actions. 1069 If the input string contains tabs and 1070 the grammar uses parse actions that use the C{loc} argument to index into the 1071 string being parsed, you can ensure you have a consistent view of the input 1072 string by: 1073 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1074 (see L{I{parseWithTabs}<parseWithTabs>}) 1075 - define your parse action using the full C{(s,loc,toks)} signature, and 1076 reference the input string using the parse action's C{s} argument 1077 - explictly expand the tabs in your input string before calling 1078 C{parseString} 1079 """ 1080 ParserElement.resetCache() 1081 if not self.streamlined: 1082 self.streamline() 1083 #~ self.saveAsList = True 1084 for e in self.ignoreExprs: 1085 e.streamline() 1086 if not self.keepTabs: 1087 instring = instring.expandtabs() 1088 try: 1089 loc, tokens = self._parse( instring, 0 ) 1090 if parseAll: 1091 #loc = self.preParse( instring, loc ) 1092 se = StringEnd() 1093 se._parse( instring, loc ) 1094 except ParseBaseException: 1095 if ParserElement.verbose_stacktrace: 1096 raise 1097 else: 1098 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1099 exc = sys.exc_info()[1] 1100 raise exc 1101 else: 1102 return tokens
1103
1104 - def scanString( self, instring, maxMatches=_MAX_INT ):
1105 """Scan the input string for expression matches. Each match will return the 1106 matching tokens, start location, and end location. May be called with optional 1107 C{maxMatches} argument, to clip scanning after 'n' matches are found. 1108 1109 Note that the start and end locations are reported relative to the string 1110 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1111 strings with embedded tabs.""" 1112 if not self.streamlined: 1113 self.streamline() 1114 for e in self.ignoreExprs: 1115 e.streamline() 1116 1117 if not self.keepTabs: 1118 instring = _ustr(instring).expandtabs() 1119 instrlen = len(instring) 1120 loc = 0 1121 preparseFn = self.preParse 1122 parseFn = self._parse 1123 ParserElement.resetCache() 1124 matches = 0 1125 try: 1126 while loc <= instrlen and matches < maxMatches: 1127 try: 1128 preloc = preparseFn( instring, loc ) 1129 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1130 except ParseException: 1131 loc = preloc+1 1132 else: 1133 if nextLoc > loc: 1134 matches += 1 1135 yield tokens, preloc, nextLoc 1136 loc = nextLoc 1137 else: 1138 loc = preloc+1 1139 except ParseBaseException: 1140 if ParserElement.verbose_stacktrace: 1141 raise 1142 else: 1143 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1144 exc = sys.exc_info()[1] 1145 raise exc
1146
1147 - def transformString( self, instring ):
1148 """Extension to C{scanString}, to modify matching text with modified tokens that may 1149 be returned from a parse action. To use C{transformString}, define a grammar and 1150 attach a parse action to it that modifies the returned token list. 1151 Invoking C{transformString()} on a target string will then scan for matches, 1152 and replace the matched text patterns according to the logic in the parse 1153 action. C{transformString()} returns the resulting transformed string.""" 1154 out = [] 1155 lastE = 0 1156 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1157 # keep string locs straight between transformString and scanString 1158 self.keepTabs = True 1159 try: 1160 for t,s,e in self.scanString( instring ): 1161 out.append( instring[lastE:s] ) 1162 if t: 1163 if isinstance(t,ParseResults): 1164 out += t.asList() 1165 elif isinstance(t,list): 1166 out += t 1167 else: 1168 out.append(t) 1169 lastE = e 1170 out.append(instring[lastE:]) 1171 return "".join(map(_ustr,_flatten(out))) 1172 except ParseBaseException: 1173 if ParserElement.verbose_stacktrace: 1174 raise 1175 else: 1176 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1177 exc = sys.exc_info()[1] 1178 raise exc
1179
1180 - def searchString( self, instring, maxMatches=_MAX_INT ):
1181 """Another extension to C{scanString}, simplifying the access to the tokens found 1182 to match the given parse expression. May be called with optional 1183 C{maxMatches} argument, to clip searching after 'n' matches are found. 1184 """ 1185 try: 1186 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1187 except ParseBaseException: 1188 if ParserElement.verbose_stacktrace: 1189 raise 1190 else: 1191 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1192 exc = sys.exc_info()[1] 1193 raise exc
1194
1195 - def __add__(self, other ):
1196 """Implementation of + operator - returns And""" 1197 if isinstance( other, basestring ): 1198 other = Literal( other ) 1199 if not isinstance( other, ParserElement ): 1200 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1201 SyntaxWarning, stacklevel=2) 1202 return None 1203 return And( [ self, other ] )
1204
1205 - def __radd__(self, other ):
1206 """Implementation of + operator when left operand is not a C{ParserElement}""" 1207 if isinstance( other, basestring ): 1208 other = Literal( other ) 1209 if not isinstance( other, ParserElement ): 1210 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1211 SyntaxWarning, stacklevel=2) 1212 return None 1213 return other + self
1214
1215 - def __sub__(self, other):
1216 """Implementation of - operator, returns C{And} with error stop""" 1217 if isinstance( other, basestring ): 1218 other = Literal( other ) 1219 if not isinstance( other, ParserElement ): 1220 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1221 SyntaxWarning, stacklevel=2) 1222 return None 1223 return And( [ self, And._ErrorStop(), other ] )
1224
1225 - def __rsub__(self, other ):
1226 """Implementation of - operator when left operand is not a C{ParserElement}""" 1227 if isinstance( other, basestring ): 1228 other = Literal( other ) 1229 if not isinstance( other, ParserElement ): 1230 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1231 SyntaxWarning, stacklevel=2) 1232 return None 1233 return other - self
1234
1235 - def __mul__(self,other):
1236 """Implementation of * operator, allows use of C{expr * 3} in place of 1237 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1238 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1239 may also include C{None} as in: 1240 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1241 to C{expr*n + ZeroOrMore(expr)} 1242 (read as "at least n instances of C{expr}") 1243 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1244 (read as "0 to n instances of C{expr}") 1245 - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} 1246 - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} 1247 1248 Note that C{expr*(None,n)} does not raise an exception if 1249 more than n exprs exist in the input stream; that is, 1250 C{expr*(None,n)} does not enforce a maximum number of expr 1251 occurrences. If this behavior is desired, then write 1252 C{expr*(None,n) + ~expr} 1253 1254 """ 1255 if isinstance(other,int): 1256 minElements, optElements = other,0 1257 elif isinstance(other,tuple): 1258 other = (other + (None, None))[:2] 1259 if other[0] is None: 1260 other = (0, other[1]) 1261 if isinstance(other[0],int) and other[1] is None: 1262 if other[0] == 0: 1263 return ZeroOrMore(self) 1264 if other[0] == 1: 1265 return OneOrMore(self) 1266 else: 1267 return self*other[0] + ZeroOrMore(self) 1268 elif isinstance(other[0],int) and isinstance(other[1],int): 1269 minElements, optElements = other 1270 optElements -= minElements 1271 else: 1272 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1273 else: 1274 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1275 1276 if minElements < 0: 1277 raise ValueError("cannot multiply ParserElement by negative value") 1278 if optElements < 0: 1279 raise ValueError("second tuple value must be greater or equal to first tuple value") 1280 if minElements == optElements == 0: 1281 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1282 1283 if (optElements): 1284 def makeOptionalList(n): 1285 if n>1: 1286 return Optional(self + makeOptionalList(n-1)) 1287 else: 1288 return Optional(self)
1289 if minElements: 1290 if minElements == 1: 1291 ret = self + makeOptionalList(optElements) 1292 else: 1293 ret = And([self]*minElements) + makeOptionalList(optElements) 1294 else: 1295 ret = makeOptionalList(optElements) 1296 else: 1297 if minElements == 1: 1298 ret = self 1299 else: 1300 ret = And([self]*minElements) 1301 return ret 1302
1303 - def __rmul__(self, other):
1304 return self.__mul__(other)
1305
1306 - def __or__(self, other ):
1307 """Implementation of | operator - returns C{MatchFirst}""" 1308 if isinstance( other, basestring ): 1309 other = Literal( other ) 1310 if not isinstance( other, ParserElement ): 1311 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1312 SyntaxWarning, stacklevel=2) 1313 return None 1314 return MatchFirst( [ self, other ] )
1315
1316 - def __ror__(self, other ):
1317 """Implementation of | operator when left operand is not a C{ParserElement}""" 1318 if isinstance( other, basestring ): 1319 other = Literal( other ) 1320 if not isinstance( other, ParserElement ): 1321 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1322 SyntaxWarning, stacklevel=2) 1323 return None 1324 return other | self
1325
1326 - def __xor__(self, other ):
1327 """Implementation of ^ operator - returns C{Or}""" 1328 if isinstance( other, basestring ): 1329 other = Literal( other ) 1330 if not isinstance( other, ParserElement ): 1331 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1332 SyntaxWarning, stacklevel=2) 1333 return None 1334 return Or( [ self, other ] )
1335
1336 - def __rxor__(self, other ):
1337 """Implementation of ^ operator when left operand is not a C{ParserElement}""" 1338 if isinstance( other, basestring ): 1339 other = Literal( other ) 1340 if not isinstance( other, ParserElement ): 1341 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1342 SyntaxWarning, stacklevel=2) 1343 return None 1344 return other ^ self
1345
1346 - def __and__(self, other ):
1347 """Implementation of & operator - returns C{Each}""" 1348 if isinstance( other, basestring ): 1349 other = Literal( other ) 1350 if not isinstance( other, ParserElement ): 1351 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1352 SyntaxWarning, stacklevel=2) 1353 return None 1354 return Each( [ self, other ] )
1355
1356 - def __rand__(self, other ):
1357 """Implementation of & operator when left operand is not a C{ParserElement}""" 1358 if isinstance( other, basestring ): 1359 other = Literal( other ) 1360 if not isinstance( other, ParserElement ): 1361 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1362 SyntaxWarning, stacklevel=2) 1363 return None 1364 return other & self
1365
1366 - def __invert__( self ):
1367 """Implementation of ~ operator - returns C{NotAny}""" 1368 return NotAny( self )
1369
1370 - def __call__(self, name):
1371 """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: 1372 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1373 could be written as:: 1374 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1375 """ 1376 return self.setResultsName(name)
1377
1378 - def suppress( self ):
1379 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1380 cluttering up returned output. 1381 """ 1382 return Suppress( self )
1383
1384 - def leaveWhitespace( self ):
1385 """Disables the skipping of whitespace before matching the characters in the 1386 C{ParserElement}'s defined pattern. This is normally only used internally by 1387 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1388 """ 1389 self.skipWhitespace = False 1390 return self
1391
1392 - def setWhitespaceChars( self, chars ):
1393 """Overrides the default whitespace chars 1394 """ 1395 self.skipWhitespace = True 1396 self.whiteChars = chars 1397 self.copyDefaultWhiteChars = False 1398 return self
1399
1400 - def parseWithTabs( self ):
1401 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 1402 Must be called before C{parseString} when the input grammar contains elements that 1403 match <TAB> characters.""" 1404 self.keepTabs = True 1405 return self
1406
1407 - def ignore( self, other ):
1408 """Define expression to be ignored (e.g., comments) while doing pattern 1409 matching; may be called repeatedly, to define multiple comment or other 1410 ignorable patterns. 1411 """ 1412 if isinstance( other, Suppress ): 1413 if other not in self.ignoreExprs: 1414 self.ignoreExprs.append( other.copy() ) 1415 else: 1416 self.ignoreExprs.append( Suppress( other.copy() ) ) 1417 return self
1418
1419 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1420 """Enable display of debugging messages while doing pattern matching.""" 1421 self.debugActions = (startAction or _defaultStartDebugAction, 1422 successAction or _defaultSuccessDebugAction, 1423 exceptionAction or _defaultExceptionDebugAction) 1424 self.debug = True 1425 return self
1426
1427 - def setDebug( self, flag=True ):
1428 """Enable display of debugging messages while doing pattern matching. 1429 Set C{flag} to True to enable, False to disable.""" 1430 if flag: 1431 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1432 else: 1433 self.debug = False 1434 return self
1435
1436 - def __str__( self ):
1437 return self.name
1438
1439 - def __repr__( self ):
1440 return _ustr(self)
1441
1442 - def streamline( self ):
1443 self.streamlined = True 1444 self.strRepr = None 1445 return self
1446
1447 - def checkRecursion( self, parseElementList ):
1448 pass
1449
1450 - def validate( self, validateTrace=[] ):
1451 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1452 self.checkRecursion( [] )
1453
1454 - def parseFile( self, file_or_filename, parseAll=False ):
1455 """Execute the parse expression on the given file or filename. 1456 If a filename is specified (instead of a file object), 1457 the entire file is opened, read, and closed before parsing. 1458 """ 1459 try: 1460 file_contents = file_or_filename.read() 1461 except AttributeError: 1462 f = open(file_or_filename, "rb") 1463 file_contents = f.read() 1464 f.close() 1465 try: 1466 return self.parseString(file_contents, parseAll) 1467 except ParseBaseException: 1468 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1469 exc = sys.exc_info()[1] 1470 raise exc
1471
1472 - def getException(self):
1473 return ParseException("",0,self.errmsg,self)
1474
1475 - def __getattr__(self,aname):
1476 if aname == "myException": 1477 self.myException = ret = self.getException(); 1478 return ret; 1479 else: 1480 raise AttributeError("no such attribute " + aname)
1481
1482 - def __eq__(self,other):
1483 if isinstance(other, ParserElement): 1484 return self is other or self.__dict__ == other.__dict__ 1485 elif isinstance(other, basestring): 1486 try: 1487 self.parseString(_ustr(other), parseAll=True) 1488 return True 1489 except ParseBaseException: 1490 return False 1491 else: 1492 return super(ParserElement,self)==other
1493
1494 - def __ne__(self,other):
1495 return not (self == other)
1496
1497 - def __hash__(self):
1498 return hash(id(self))
1499
1500 - def __req__(self,other):
1501 return self == other
1502
1503 - def __rne__(self,other):
1504 return not (self == other)
1505 1506
1507 -class Token(ParserElement):
1508 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1509 - def __init__( self ):
1510 super(Token,self).__init__( savelist=False )
1511 #self.myException = ParseException("",0,"",self) 1512
1513 - def setName(self, name):
1514 s = super(Token,self).setName(name) 1515 self.errmsg = "Expected " + self.name 1516 #s.myException.msg = self.errmsg 1517 return s
1518 1519
1520 -class Empty(Token):
1521 """An empty token, will always match."""
1522 - def __init__( self ):
1523 super(Empty,self).__init__() 1524 self.name = "Empty" 1525 self.mayReturnEmpty = True 1526 self.mayIndexError = False
1527 1528
1529 -class NoMatch(Token):
1530 """A token that will never match."""
1531 - def __init__( self ):
1532 super(NoMatch,self).__init__() 1533 self.name = "NoMatch" 1534 self.mayReturnEmpty = True 1535 self.mayIndexError = False 1536 self.errmsg = "Unmatchable token"
1537 #self.myException.msg = self.errmsg 1538
1539 - def parseImpl( self, instring, loc, doActions=True ):
1540 exc = self.myException 1541 exc.loc = loc 1542 exc.pstr = instring 1543 raise exc
1544 1545
1546 -class Literal(Token):
1547 """Token to exactly match a specified string."""
1548 - def __init__( self, matchString ):
1549 super(Literal,self).__init__() 1550 self.match = matchString 1551 self.matchLen = len(matchString) 1552 try: 1553 self.firstMatchChar = matchString[0] 1554 except IndexError: 1555 warnings.warn("null string passed to Literal; use Empty() instead", 1556 SyntaxWarning, stacklevel=2) 1557 self.__class__ = Empty 1558 self.name = '"%s"' % _ustr(self.match) 1559 self.errmsg = "Expected " + self.name 1560 self.mayReturnEmpty = False 1561 #self.myException.msg = self.errmsg 1562 self.mayIndexError = False
1563 1564 # Performance tuning: this routine gets called a *lot* 1565 # if this is a single character match string and the first character matches, 1566 # short-circuit as quickly as possible, and avoid calling startswith 1567 #~ @profile
1568 - def parseImpl( self, instring, loc, doActions=True ):
1569 if (instring[loc] == self.firstMatchChar and 1570 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1571 return loc+self.matchLen, self.match 1572 #~ raise ParseException( instring, loc, self.errmsg ) 1573 exc = self.myException 1574 exc.loc = loc 1575 exc.pstr = instring 1576 raise exc
1577 _L = Literal 1578
1579 -class Keyword(Token):
1580 """Token to exactly match a specified string as a keyword, that is, it must be 1581 immediately followed by a non-keyword character. Compare with C{Literal}:: 1582 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 1583 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 1584 Accepts two optional constructor arguments in addition to the keyword string: 1585 C{identChars} is a string of characters that would be valid identifier characters, 1586 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1587 matching, default is False. 1588 """ 1589 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1590
1591 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1592 super(Keyword,self).__init__() 1593 self.match = matchString 1594 self.matchLen = len(matchString) 1595 try: 1596 self.firstMatchChar = matchString[0] 1597 except IndexError: 1598 warnings.warn("null string passed to Keyword; use Empty() instead", 1599 SyntaxWarning, stacklevel=2) 1600 self.name = '"%s"' % self.match 1601 self.errmsg = "Expected " + self.name 1602 self.mayReturnEmpty = False 1603 #self.myException.msg = self.errmsg 1604 self.mayIndexError = False 1605 self.caseless = caseless 1606 if caseless: 1607 self.caselessmatch = matchString.upper() 1608 identChars = identChars.upper() 1609 self.identChars = _str2dict(identChars)
1610
1611 - def parseImpl( self, instring, loc, doActions=True ):
1612 if self.caseless: 1613 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1614 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1615 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1616 return loc+self.matchLen, self.match 1617 else: 1618 if (instring[loc] == self.firstMatchChar and 1619 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1620 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1621 (loc == 0 or instring[loc-1] not in self.identChars) ): 1622 return loc+self.matchLen, self.match 1623 #~ raise ParseException( instring, loc, self.errmsg ) 1624 exc = self.myException 1625 exc.loc = loc 1626 exc.pstr = instring 1627 raise exc
1628
1629 - def copy(self):
1630 c = super(Keyword,self).copy() 1631 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1632 return c
1633
1634 - def setDefaultKeywordChars( chars ):
1635 """Overrides the default Keyword chars 1636 """ 1637 Keyword.DEFAULT_KEYWORD_CHARS = chars
1638 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1639
1640 -class CaselessLiteral(Literal):
1641 """Token to match a specified string, ignoring case of letters. 1642 Note: the matched results will always be in the case of the given 1643 match string, NOT the case of the input text. 1644 """
1645 - def __init__( self, matchString ):
1646 super(CaselessLiteral,self).__init__( matchString.upper() ) 1647 # Preserve the defining literal. 1648 self.returnString = matchString 1649 self.name = "'%s'" % self.returnString 1650 self.errmsg = "Expected " + self.name
1651 #self.myException.msg = self.errmsg 1652
1653 - def parseImpl( self, instring, loc, doActions=True ):
1654 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1655 return loc+self.matchLen, self.returnString 1656 #~ raise ParseException( instring, loc, self.errmsg ) 1657 exc = self.myException 1658 exc.loc = loc 1659 exc.pstr = instring 1660 raise exc
1661
1662 -class CaselessKeyword(Keyword):
1663 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1664 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1665
1666 - def parseImpl( self, instring, loc, doActions=True ):
1667 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1668 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1669 return loc+self.matchLen, self.match 1670 #~ raise ParseException( instring, loc, self.errmsg ) 1671 exc = self.myException 1672 exc.loc = loc 1673 exc.pstr = instring 1674 raise exc
1675
1676 -class Word(Token):
1677 """Token for matching words composed of allowed character sets. 1678 Defined with string containing all allowed initial characters, 1679 an optional string containing allowed body characters (if omitted, 1680 defaults to the initial character set), and an optional minimum, 1681 maximum, and/or exact length. The default value for C{min} is 1 (a 1682 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1683 are 0, meaning no maximum or exact length restriction. 1684 """
1685 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1686 super(Word,self).__init__() 1687 self.initCharsOrig = initChars 1688 self.initChars = _str2dict(initChars) 1689 if bodyChars : 1690 self.bodyCharsOrig = bodyChars 1691 self.bodyChars = _str2dict(bodyChars) 1692 else: 1693 self.bodyCharsOrig = initChars 1694 self.bodyChars = _str2dict(initChars) 1695 1696 self.maxSpecified = max > 0 1697 1698 if min < 1: 1699 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1700 1701 self.minLen = min 1702 1703 if max > 0: 1704 self.maxLen = max 1705 else: 1706 self.maxLen = _MAX_INT 1707 1708 if exact > 0: 1709 self.maxLen = exact 1710 self.minLen = exact 1711 1712 self.name = _ustr(self) 1713 self.errmsg = "Expected " + self.name 1714 #self.myException.msg = self.errmsg 1715 self.mayIndexError = False 1716 self.asKeyword = asKeyword 1717 1718 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1719 if self.bodyCharsOrig == self.initCharsOrig: 1720 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1721 elif len(self.bodyCharsOrig) == 1: 1722 self.reString = "%s[%s]*" % \ 1723 (re.escape(self.initCharsOrig), 1724 _escapeRegexRangeChars(self.bodyCharsOrig),) 1725 else: 1726 self.reString = "[%s][%s]*" % \ 1727 (_escapeRegexRangeChars(self.initCharsOrig), 1728 _escapeRegexRangeChars(self.bodyCharsOrig),) 1729 if self.asKeyword: 1730 self.reString = r"\b"+self.reString+r"\b" 1731 try: 1732 self.re = re.compile( self.reString ) 1733 except: 1734 self.re = None
1735
1736 - def parseImpl( self, instring, loc, doActions=True ):
1737 if self.re: 1738 result = self.re.match(instring,loc) 1739 if not result: 1740 exc = self.myException 1741 exc.loc = loc 1742 exc.pstr = instring 1743 raise exc 1744 1745 loc = result.end() 1746 return loc,result.group() 1747 1748 if not(instring[ loc ] in self.initChars): 1749 #~ raise ParseException( instring, loc, self.errmsg ) 1750 exc = self.myException 1751 exc.loc = loc 1752 exc.pstr = instring 1753 raise exc 1754 start = loc 1755 loc += 1 1756 instrlen = len(instring) 1757 bodychars = self.bodyChars 1758 maxloc = start + self.maxLen 1759 maxloc = min( maxloc, instrlen ) 1760 while loc < maxloc and instring[loc] in bodychars: 1761 loc += 1 1762 1763 throwException = False 1764 if loc - start < self.minLen: 1765 throwException = True 1766 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1767 throwException = True 1768 if self.asKeyword: 1769 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1770 throwException = True 1771 1772 if throwException: 1773 #~ raise ParseException( instring, loc, self.errmsg ) 1774 exc = self.myException 1775 exc.loc = loc 1776 exc.pstr = instring 1777 raise exc 1778 1779 return loc, instring[start:loc]
1780
1781 - def __str__( self ):
1782 try: 1783 return super(Word,self).__str__() 1784 except: 1785 pass 1786 1787 1788 if self.strRepr is None: 1789 1790 def charsAsStr(s): 1791 if len(s)>4: 1792 return s[:4]+"..." 1793 else: 1794 return s
1795 1796 if ( self.initCharsOrig != self.bodyCharsOrig ): 1797 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1798 else: 1799 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1800 1801 return self.strRepr
1802 1803
1804 -class Regex(Token):
1805 """Token for matching strings that match a given regular expression. 1806 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1807 """ 1808 compiledREtype = type(re.compile("[A-Z]"))
1809 - def __init__( self, pattern, flags=0):
1810 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 1811 super(Regex,self).__init__() 1812 1813 if isinstance(pattern, basestring): 1814 if len(pattern) == 0: 1815 warnings.warn("null string passed to Regex; use Empty() instead", 1816 SyntaxWarning, stacklevel=2) 1817 1818 self.pattern = pattern 1819 self.flags = flags 1820 1821 try: 1822 self.re = re.compile(self.pattern, self.flags) 1823 self.reString = self.pattern 1824 except sre_constants.error: 1825 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1826 SyntaxWarning, stacklevel=2) 1827 raise 1828 1829 elif isinstance(pattern, Regex.compiledREtype): 1830 self.re = pattern 1831 self.pattern = \ 1832 self.reString = str(pattern) 1833 self.flags = flags 1834 1835 else: 1836 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1837 1838 self.name = _ustr(self) 1839 self.errmsg = "Expected " + self.name 1840 #self.myException.msg = self.errmsg 1841 self.mayIndexError = False 1842 self.mayReturnEmpty = True
1843
1844 - def parseImpl( self, instring, loc, doActions=True ):
1845 result = self.re.match(instring,loc) 1846 if not result: 1847 exc = self.myException 1848 exc.loc = loc 1849 exc.pstr = instring 1850 raise exc 1851 1852 loc = result.end() 1853 d = result.groupdict() 1854 ret = ParseResults(result.group()) 1855 if d: 1856 for k in d: 1857 ret[k] = d[k] 1858 return loc,ret
1859
1860 - def __str__( self ):
1861 try: 1862 return super(Regex,self).__str__() 1863 except: 1864 pass 1865 1866 if self.strRepr is None: 1867 self.strRepr = "Re:(%s)" % repr(self.pattern) 1868 1869 return self.strRepr
1870 1871
1872 -class QuotedString(Token):
1873 """Token for matching strings that are delimited by quoting characters. 1874 """
1875 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1876 """ 1877 Defined with the following parameters: 1878 - quoteChar - string of one or more characters defining the quote delimiting string 1879 - escChar - character to escape quotes, typically backslash (default=None) 1880 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1881 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1882 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1883 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1884 """ 1885 super(QuotedString,self).__init__() 1886 1887 # remove white space from quote chars - wont work anyway 1888 quoteChar = quoteChar.strip() 1889 if len(quoteChar) == 0: 1890 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1891 raise SyntaxError() 1892 1893 if endQuoteChar is None: 1894 endQuoteChar = quoteChar 1895 else: 1896 endQuoteChar = endQuoteChar.strip() 1897 if len(endQuoteChar) == 0: 1898 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1899 raise SyntaxError() 1900 1901 self.quoteChar = quoteChar 1902 self.quoteCharLen = len(quoteChar) 1903 self.firstQuoteChar = quoteChar[0] 1904 self.endQuoteChar = endQuoteChar 1905 self.endQuoteCharLen = len(endQuoteChar) 1906 self.escChar = escChar 1907 self.escQuote = escQuote 1908 self.unquoteResults = unquoteResults 1909 1910 if multiline: 1911 self.flags = re.MULTILINE | re.DOTALL 1912 self.pattern = r'%s(?:[^%s%s]' % \ 1913 ( re.escape(self.quoteChar), 1914 _escapeRegexRangeChars(self.endQuoteChar[0]), 1915 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1916 else: 1917 self.flags = 0 1918 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1919 ( re.escape(self.quoteChar), 1920 _escapeRegexRangeChars(self.endQuoteChar[0]), 1921 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1922 if len(self.endQuoteChar) > 1: 1923 self.pattern += ( 1924 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1925 _escapeRegexRangeChars(self.endQuoteChar[i])) 1926 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1927 ) 1928 if escQuote: 1929 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1930 if escChar: 1931 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1932 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1933 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1934 1935 try: 1936 self.re = re.compile(self.pattern, self.flags) 1937 self.reString = self.pattern 1938 except sre_constants.error: 1939 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1940 SyntaxWarning, stacklevel=2) 1941 raise 1942 1943 self.name = _ustr(self) 1944 self.errmsg = "Expected " + self.name 1945 #self.myException.msg = self.errmsg 1946 self.mayIndexError = False 1947 self.mayReturnEmpty = True
1948
1949 - def parseImpl( self, instring, loc, doActions=True ):
1950 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1951 if not result: 1952 exc = self.myException 1953 exc.loc = loc 1954 exc.pstr = instring 1955 raise exc 1956 1957 loc = result.end() 1958 ret = result.group() 1959 1960 if self.unquoteResults: 1961 1962 # strip off quotes 1963 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1964 1965 if isinstance(ret,basestring): 1966 # replace escaped characters 1967 if self.escChar: 1968 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1969 1970 # replace escaped quotes 1971 if self.escQuote: 1972 ret = ret.replace(self.escQuote, self.endQuoteChar) 1973 1974 return loc, ret
1975
1976 - def __str__( self ):
1977 try: 1978 return super(QuotedString,self).__str__() 1979 except: 1980 pass 1981 1982 if self.strRepr is None: 1983 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1984 1985 return self.strRepr
1986 1987
1988 -class CharsNotIn(Token):
1989 """Token for matching words composed of characters *not* in a given set. 1990 Defined with string containing all disallowed characters, and an optional 1991 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 1992 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1993 are 0, meaning no maximum or exact length restriction. 1994 """
1995 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1996 super(CharsNotIn,self).__init__() 1997 self.skipWhitespace = False 1998 self.notChars = notChars 1999 2000 if min < 1: 2001 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2002 2003 self.minLen = min 2004 2005 if max > 0: 2006 self.maxLen = max 2007 else: 2008 self.maxLen = _MAX_INT 2009 2010 if exact > 0: 2011 self.maxLen = exact 2012 self.minLen = exact 2013 2014 self.name = _ustr(self) 2015 self.errmsg = "Expected " + self.name 2016 self.mayReturnEmpty = ( self.minLen == 0 ) 2017 #self.myException.msg = self.errmsg 2018 self.mayIndexError = False
2019
2020 - def parseImpl( self, instring, loc, doActions=True ):
2021 if instring[loc] in self.notChars: 2022 #~ raise ParseException( instring, loc, self.errmsg ) 2023 exc = self.myException 2024 exc.loc = loc 2025 exc.pstr = instring 2026 raise exc 2027 2028 start = loc 2029 loc += 1 2030 notchars = self.notChars 2031 maxlen = min( start+self.maxLen, len(instring) ) 2032 while loc < maxlen and \ 2033 (instring[loc] not in notchars): 2034 loc += 1 2035 2036 if loc - start < self.minLen: 2037 #~ raise ParseException( instring, loc, self.errmsg ) 2038 exc = self.myException 2039 exc.loc = loc 2040 exc.pstr = instring 2041 raise exc 2042 2043 return loc, instring[start:loc]
2044
2045 - def __str__( self ):
2046 try: 2047 return super(CharsNotIn, self).__str__() 2048 except: 2049 pass 2050 2051 if self.strRepr is None: 2052 if len(self.notChars) > 4: 2053 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2054 else: 2055 self.strRepr = "!W:(%s)" % self.notChars 2056 2057 return self.strRepr
2058
2059 -class White(Token):
2060 """Special matching class for matching whitespace. Normally, whitespace is ignored 2061 by pyparsing grammars. This class is included when some whitespace structures 2062 are significant. Define with a string containing the whitespace characters to be 2063 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2064 as defined for the C{Word} class.""" 2065 whiteStrs = { 2066 " " : "<SPC>", 2067 "\t": "<TAB>", 2068 "\n": "<LF>", 2069 "\r": "<CR>", 2070 "\f": "<FF>", 2071 }
2072 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2073 super(White,self).__init__() 2074 self.matchWhite = ws 2075 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 2076 #~ self.leaveWhitespace() 2077 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 2078 self.mayReturnEmpty = True 2079 self.errmsg = "Expected " + self.name 2080 #self.myException.msg = self.errmsg 2081 2082 self.minLen = min 2083 2084 if max > 0: 2085 self.maxLen = max 2086 else: 2087 self.maxLen = _MAX_INT 2088 2089 if exact > 0: 2090 self.maxLen = exact 2091 self.minLen = exact
2092
2093 - def parseImpl( self, instring, loc, doActions=True ):
2094 if not(instring[ loc ] in self.matchWhite): 2095 #~ raise ParseException( instring, loc, self.errmsg ) 2096 exc = self.myException 2097 exc.loc = loc 2098 exc.pstr = instring 2099 raise exc 2100 start = loc 2101 loc += 1 2102 maxloc = start + self.maxLen 2103 maxloc = min( maxloc, len(instring) ) 2104 while loc < maxloc and instring[loc] in self.matchWhite: 2105 loc += 1 2106 2107 if loc - start < self.minLen: 2108 #~ raise ParseException( instring, loc, self.errmsg ) 2109 exc = self.myException 2110 exc.loc = loc 2111 exc.pstr = instring 2112 raise exc 2113 2114 return loc, instring[start:loc]
2115 2116
2117 -class _PositionToken(Token):
2118 - def __init__( self ):
2119 super(_PositionToken,self).__init__() 2120 self.name=self.__class__.__name__ 2121 self.mayReturnEmpty = True 2122 self.mayIndexError = False
2123
2124 -class GoToColumn(_PositionToken):
2125 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2126 - def __init__( self, colno ):
2127 super(GoToColumn,self).__init__() 2128 self.col = colno
2129
2130 - def preParse( self, instring, loc ):
2131 if col(loc,instring) != self.col: 2132 instrlen = len(instring) 2133 if self.ignoreExprs: 2134 loc = self._skipIgnorables( instring, loc ) 2135 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2136 loc += 1 2137 return loc
2138
2139 - def parseImpl( self, instring, loc, doActions=True ):
2140 thiscol = col( loc, instring ) 2141 if thiscol > self.col: 2142 raise ParseException( instring, loc, "Text not in expected column", self ) 2143 newloc = loc + self.col - thiscol 2144 ret = instring[ loc: newloc ] 2145 return newloc, ret
2146
2147 -class LineStart(_PositionToken):
2148 """Matches if current position is at the beginning of a line within the parse string"""
2149 - def __init__( self ):
2150 super(LineStart,self).__init__() 2151 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2152 self.errmsg = "Expected start of line"
2153 #self.myException.msg = self.errmsg 2154
2155 - def preParse( self, instring, loc ):
2156 preloc = super(LineStart,self).preParse(instring,loc) 2157 if instring[preloc] == "\n": 2158 loc += 1 2159 return loc
2160
2161 - def parseImpl( self, instring, loc, doActions=True ):
2162 if not( loc==0 or 2163 (loc == self.preParse( instring, 0 )) or 2164 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2165 #~ raise ParseException( instring, loc, "Expected start of line" ) 2166 exc = self.myException 2167 exc.loc = loc 2168 exc.pstr = instring 2169 raise exc 2170 return loc, []
2171
2172 -class LineEnd(_PositionToken):
2173 """Matches if current position is at the end of a line within the parse string"""
2174 - def __init__( self ):
2175 super(LineEnd,self).__init__() 2176 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2177 self.errmsg = "Expected end of line"
2178 #self.myException.msg = self.errmsg 2179
2180 - def parseImpl( self, instring, loc, doActions=True ):
2181 if loc<len(instring): 2182 if instring[loc] == "\n": 2183 return loc+1, "\n" 2184 else: 2185 #~ raise ParseException( instring, loc, "Expected end of line" ) 2186 exc = self.myException 2187 exc.loc = loc 2188 exc.pstr = instring 2189 raise exc 2190 elif loc == len(instring): 2191 return loc+1, [] 2192 else: 2193 exc = self.myException 2194 exc.loc = loc 2195 exc.pstr = instring 2196 raise exc
2197
2198 -class StringStart(_PositionToken):
2199 """Matches if current position is at the beginning of the parse string"""
2200 - def __init__( self ):
2201 super(StringStart,self).__init__() 2202 self.errmsg = "Expected start of text"
2203 #self.myException.msg = self.errmsg 2204
2205 - def parseImpl( self, instring, loc, doActions=True ):
2206 if loc != 0: 2207 # see if entire string up to here is just whitespace and ignoreables 2208 if loc != self.preParse( instring, 0 ): 2209 #~ raise ParseException( instring, loc, "Expected start of text" ) 2210 exc = self.myException 2211 exc.loc = loc 2212 exc.pstr = instring 2213 raise exc 2214 return loc, []
2215
2216 -class StringEnd(_PositionToken):
2217 """Matches if current position is at the end of the parse string"""
2218 - def __init__( self ):
2219 super(StringEnd,self).__init__() 2220 self.errmsg = "Expected end of text"
2221 #self.myException.msg = self.errmsg 2222
2223 - def parseImpl( self, instring, loc, doActions=True ):
2224 if loc < len(instring): 2225 #~ raise ParseException( instring, loc, "Expected end of text" ) 2226 exc = self.myException 2227 exc.loc = loc 2228 exc.pstr = instring 2229 raise exc 2230 elif loc == len(instring): 2231 return loc+1, [] 2232 elif loc > len(instring): 2233 return loc, [] 2234 else: 2235 exc = self.myException 2236 exc.loc = loc 2237 exc.pstr = instring 2238 raise exc
2239
2240 -class WordStart(_PositionToken):
2241 """Matches if the current position is at the beginning of a Word, and 2242 is not preceded by any character in a given set of wordChars 2243 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2244 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2245 the string being parsed, or at the beginning of a line. 2246 """
2247 - def __init__(self, wordChars = printables):
2248 super(WordStart,self).__init__() 2249 self.wordChars = _str2dict(wordChars) 2250 self.errmsg = "Not at the start of a word"
2251
2252 - def parseImpl(self, instring, loc, doActions=True ):
2253 if loc != 0: 2254 if (instring[loc-1] in self.wordChars or 2255 instring[loc] not in self.wordChars): 2256 exc = self.myException 2257 exc.loc = loc 2258 exc.pstr = instring 2259 raise exc 2260 return loc, []
2261
2262 -class WordEnd(_PositionToken):
2263 """Matches if the current position is at the end of a Word, and 2264 is not followed by any character in a given set of wordChars 2265 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2266 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2267 the string being parsed, or at the end of a line. 2268 """
2269 - def __init__(self, wordChars = printables):
2270 super(WordEnd,self).__init__() 2271 self.wordChars = _str2dict(wordChars) 2272 self.skipWhitespace = False 2273 self.errmsg = "Not at the end of a word"
2274
2275 - def parseImpl(self, instring, loc, doActions=True ):
2276 instrlen = len(instring) 2277 if instrlen>0 and loc<instrlen: 2278 if (instring[loc] in self.wordChars or 2279 instring[loc-1] not in self.wordChars): 2280 #~ raise ParseException( instring, loc, "Expected end of word" ) 2281 exc = self.myException 2282 exc.loc = loc 2283 exc.pstr = instring 2284 raise exc 2285 return loc, []
2286 2287
2288 -class ParseExpression(ParserElement):
2289 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2290 - def __init__( self, exprs, savelist = False ):
2291 super(ParseExpression,self).__init__(savelist) 2292 if isinstance( exprs, list ): 2293 self.exprs = exprs 2294 elif isinstance( exprs, basestring ): 2295 self.exprs = [ Literal( exprs ) ] 2296 else: 2297 try: 2298 self.exprs = list( exprs ) 2299 except TypeError: 2300 self.exprs = [ exprs ] 2301 self.callPreparse = False
2302
2303 - def __getitem__( self, i ):
2304 return self.exprs[i]
2305
2306 - def append( self, other ):
2307 self.exprs.append( other ) 2308 self.strRepr = None 2309 return self
2310
2311 - def leaveWhitespace( self ):
2312 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 2313 all contained expressions.""" 2314 self.skipWhitespace = False 2315 self.exprs = [ e.copy() for e in self.exprs ] 2316 for e in self.exprs: 2317 e.leaveWhitespace() 2318 return self
2319
2320 - def ignore( self, other ):
2321 if isinstance( other, Suppress ): 2322 if other not in self.ignoreExprs: 2323 super( ParseExpression, self).ignore( other ) 2324 for e in self.exprs: 2325 e.ignore( self.ignoreExprs[-1] ) 2326 else: 2327 super( ParseExpression, self).ignore( other ) 2328 for e in self.exprs: 2329 e.ignore( self.ignoreExprs[-1] ) 2330 return self
2331
2332 - def __str__( self ):
2333 try: 2334 return super(ParseExpression,self).__str__() 2335 except: 2336 pass 2337 2338 if self.strRepr is None: 2339 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2340 return self.strRepr
2341
2342 - def streamline( self ):
2343 super(ParseExpression,self).streamline() 2344 2345 for e in self.exprs: 2346 e.streamline() 2347 2348 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2349 # but only if there are no parse actions or resultsNames on the nested And's 2350 # (likewise for Or's and MatchFirst's) 2351 if ( len(self.exprs) == 2 ): 2352 other = self.exprs[0] 2353 if ( isinstance( other, self.__class__ ) and 2354 not(other.parseAction) and 2355 other.resultsName is None and 2356 not other.debug ): 2357 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2358 self.strRepr = None 2359 self.mayReturnEmpty |= other.mayReturnEmpty 2360 self.mayIndexError |= other.mayIndexError 2361 2362 other = self.exprs[-1] 2363 if ( isinstance( other, self.__class__ ) and 2364 not(other.parseAction) and 2365 other.resultsName is None and 2366 not other.debug ): 2367 self.exprs = self.exprs[:-1] + other.exprs[:] 2368 self.strRepr = None 2369 self.mayReturnEmpty |= other.mayReturnEmpty 2370 self.mayIndexError |= other.mayIndexError 2371 2372 return self
2373
2374 - def setResultsName( self, name, listAllMatches=False ):
2375 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2376 return ret
2377
2378 - def validate( self, validateTrace=[] ):
2379 tmp = validateTrace[:]+[self] 2380 for e in self.exprs: 2381 e.validate(tmp) 2382 self.checkRecursion( [] )
2383
2384 -class And(ParseExpression):
2385 """Requires all given C{ParseExpressions} to be found in the given order. 2386 Expressions may be separated by whitespace. 2387 May be constructed using the '+' operator. 2388 """ 2389
2390 - class _ErrorStop(Empty):
2391 - def __init__(self, *args, **kwargs):
2392 super(Empty,self).__init__(*args, **kwargs) 2393 self.leaveWhitespace()
2394
2395 - def __init__( self, exprs, savelist = True ):
2396 super(And,self).__init__(exprs, savelist) 2397 self.mayReturnEmpty = True 2398 for e in self.exprs: 2399 if not e.mayReturnEmpty: 2400 self.mayReturnEmpty = False 2401 break 2402 self.setWhitespaceChars( exprs[0].whiteChars ) 2403 self.skipWhitespace = exprs[0].skipWhitespace 2404 self.callPreparse = True
2405
2406 - def parseImpl( self, instring, loc, doActions=True ):
2407 # pass False as last arg to _parse for first element, since we already 2408 # pre-parsed the string as part of our And pre-parsing 2409 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2410 errorStop = False 2411 for e in self.exprs[1:]: 2412 if isinstance(e, And._ErrorStop): 2413 errorStop = True 2414 continue 2415 if errorStop: 2416 try: 2417 loc, exprtokens = e._parse( instring, loc, doActions ) 2418 except ParseSyntaxException: 2419 raise 2420 except ParseBaseException: 2421 pe = sys.exc_info()[1] 2422 raise ParseSyntaxException(pe) 2423 except IndexError: 2424 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2425 else: 2426 loc, exprtokens = e._parse( instring, loc, doActions ) 2427 if exprtokens or exprtokens.keys(): 2428 resultlist += exprtokens 2429 return loc, resultlist
2430
2431 - def __iadd__(self, other ):
2432 if isinstance( other, basestring ): 2433 other = Literal( other ) 2434 return self.append( other ) #And( [ self, other ] )
2435
2436 - def checkRecursion( self, parseElementList ):
2437 subRecCheckList = parseElementList[:] + [ self ] 2438 for e in self.exprs: 2439 e.checkRecursion( subRecCheckList ) 2440 if not e.mayReturnEmpty: 2441 break
2442
2443 - def __str__( self ):
2444 if hasattr(self,"name"): 2445 return self.name 2446 2447 if self.strRepr is None: 2448 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2449 2450 return self.strRepr
2451 2452
2453 -class Or(ParseExpression):
2454 """Requires that at least one C{ParseExpression} is found. 2455 If two expressions match, the expression that matches the longest string will be used. 2456 May be constructed using the '^' operator. 2457 """
2458 - def __init__( self, exprs, savelist = False ):
2459 super(Or,self).__init__(exprs, savelist) 2460 self.mayReturnEmpty = False 2461 for e in self.exprs: 2462 if e.mayReturnEmpty: 2463 self.mayReturnEmpty = True 2464 break
2465
2466 - def parseImpl( self, instring, loc, doActions=True ):
2467 maxExcLoc = -1 2468 maxMatchLoc = -1 2469 maxException = None 2470 for e in self.exprs: 2471 try: 2472 loc2 = e.tryParse( instring, loc ) 2473 except ParseException: 2474 err = sys.exc_info()[1] 2475 if err.loc > maxExcLoc: 2476 maxException = err 2477 maxExcLoc = err.loc 2478 except IndexError: 2479 if len(instring) > maxExcLoc: 2480 maxException = ParseException(instring,len(instring),e.errmsg,self) 2481 maxExcLoc = len(instring) 2482 else: 2483 if loc2 > maxMatchLoc: 2484 maxMatchLoc = loc2 2485 maxMatchExp = e 2486 2487 if maxMatchLoc < 0: 2488 if maxException is not None: 2489 raise maxException 2490 else: 2491 raise ParseException(instring, loc, "no defined alternatives to match", self) 2492 2493 return maxMatchExp._parse( instring, loc, doActions )
2494
2495 - def __ixor__(self, other ):
2496 if isinstance( other, basestring ): 2497 other = Literal( other ) 2498 return self.append( other ) #Or( [ self, other ] )
2499
2500 - def __str__( self ):
2501 if hasattr(self,"name"): 2502 return self.name 2503 2504 if self.strRepr is None: 2505 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2506 2507 return self.strRepr
2508
2509 - def checkRecursion( self, parseElementList ):
2510 subRecCheckList = parseElementList[:] + [ self ] 2511 for e in self.exprs: 2512 e.checkRecursion( subRecCheckList )
2513 2514
2515 -class MatchFirst(ParseExpression):
2516 """Requires that at least one C{ParseExpression} is found. 2517 If two expressions match, the first one listed is the one that will match. 2518 May be constructed using the '|' operator. 2519 """
2520 - def __init__( self, exprs, savelist = False ):
2521 super(MatchFirst,self).__init__(exprs, savelist) 2522 if exprs: 2523 self.mayReturnEmpty = False 2524 for e in self.exprs: 2525 if e.mayReturnEmpty: 2526 self.mayReturnEmpty = True 2527 break 2528 else: 2529 self.mayReturnEmpty = True
2530
2531 - def parseImpl( self, instring, loc, doActions=True ):
2532 maxExcLoc = -1 2533 maxException = None 2534 for e in self.exprs: 2535 try: 2536 ret = e._parse( instring, loc, doActions ) 2537 return ret 2538 except ParseException, err: 2539 if err.loc > maxExcLoc: 2540 maxException = err 2541 maxExcLoc = err.loc 2542 except IndexError: 2543 if len(instring) > maxExcLoc: 2544 maxException = ParseException(instring,len(instring),e.errmsg,self) 2545 maxExcLoc = len(instring) 2546 2547 # only got here if no expression matched, raise exception for match that made it the furthest 2548 else: 2549 if maxException is not None: 2550 raise maxException 2551 else: 2552 raise ParseException(instring, loc, "no defined alternatives to match", self)
2553
2554 - def __ior__(self, other ):
2555 if isinstance( other, basestring ): 2556 other = Literal( other ) 2557 return self.append( other ) #MatchFirst( [ self, other ] )
2558
2559 - def __str__( self ):
2560 if hasattr(self,"name"): 2561 return self.name 2562 2563 if self.strRepr is None: 2564 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2565 2566 return self.strRepr
2567
2568 - def checkRecursion( self, parseElementList ):
2569 subRecCheckList = parseElementList[:] + [ self ] 2570 for e in self.exprs: 2571 e.checkRecursion( subRecCheckList )
2572 2573
2574 -class Each(ParseExpression):
2575 """Requires all given C{ParseExpressions} to be found, but in any order. 2576 Expressions may be separated by whitespace. 2577 May be constructed using the '&' operator. 2578 """
2579 - def __init__( self, exprs, savelist = True ):
2580 super(Each,self).__init__(exprs, savelist) 2581 self.mayReturnEmpty = True 2582 for e in self.exprs: 2583 if not e.mayReturnEmpty: 2584 self.mayReturnEmpty = False 2585 break 2586 self.skipWhitespace = True 2587 self.initExprGroups = True
2588
2589 - def parseImpl( self, instring, loc, doActions=True ):
2590 if self.initExprGroups: 2591 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2592 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 2593 self.optionals = opt1 + opt2 2594 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2595 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2596 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2597 self.required += self.multirequired 2598 self.initExprGroups = False 2599 tmpLoc = loc 2600 tmpReqd = self.required[:] 2601 tmpOpt = self.optionals[:] 2602 matchOrder = [] 2603 2604 keepMatching = True 2605 while keepMatching: 2606 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2607 failed = [] 2608 for e in tmpExprs: 2609 try: 2610 tmpLoc = e.tryParse( instring, tmpLoc ) 2611 except ParseException: 2612 failed.append(e) 2613 else: 2614 matchOrder.append(e) 2615 if e in tmpReqd: 2616 tmpReqd.remove(e) 2617 elif e in tmpOpt: 2618 tmpOpt.remove(e) 2619 if len(failed) == len(tmpExprs): 2620 keepMatching = False 2621 2622 if tmpReqd: 2623 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2624 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2625 2626 # add any unmatched Optionals, in case they have default values defined 2627 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2628 2629 resultlist = [] 2630 for e in matchOrder: 2631 loc,results = e._parse(instring,loc,doActions) 2632 resultlist.append(results) 2633 2634 finalResults = ParseResults([]) 2635 for r in resultlist: 2636 dups = {} 2637 for k in r.keys(): 2638 if k in finalResults.keys(): 2639 tmp = ParseResults(finalResults[k]) 2640 tmp += ParseResults(r[k]) 2641 dups[k] = tmp 2642 finalResults += ParseResults(r) 2643 for k,v in dups.items(): 2644 finalResults[k] = v 2645 return loc, finalResults
2646
2647 - def __str__( self ):
2648 if hasattr(self,"name"): 2649 return self.name 2650 2651 if self.strRepr is None: 2652 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2653 2654 return self.strRepr
2655
2656 - def checkRecursion( self, parseElementList ):
2657 subRecCheckList = parseElementList[:] + [ self ] 2658 for e in self.exprs: 2659 e.checkRecursion( subRecCheckList )
2660 2661
2662 -class ParseElementEnhance(ParserElement):
2663 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2664 - def __init__( self, expr, savelist=False ):
2665 super(ParseElementEnhance,self).__init__(savelist) 2666 if isinstance( expr, basestring ): 2667 expr = Literal(expr) 2668 self.expr = expr 2669 self.strRepr = None 2670 if expr is not None: 2671 self.mayIndexError = expr.mayIndexError 2672 self.mayReturnEmpty = expr.mayReturnEmpty 2673 self.setWhitespaceChars( expr.whiteChars ) 2674 self.skipWhitespace = expr.skipWhitespace 2675 self.saveAsList = expr.saveAsList 2676 self.callPreparse = expr.callPreparse 2677 self.ignoreExprs.extend(expr.ignoreExprs)
2678
2679 - def parseImpl( self, instring, loc, doActions=True ):
2680 if self.expr is not None: 2681 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2682 else: 2683 raise ParseException("",loc,self.errmsg,self)
2684
2685 - def leaveWhitespace( self ):
2686 self.skipWhitespace = False 2687 self.expr = self.expr.copy() 2688 if self.expr is not None: 2689 self.expr.leaveWhitespace() 2690 return self
2691
2692 - def ignore( self, other ):
2693 if isinstance( other, Suppress ): 2694 if other not in self.ignoreExprs: 2695 super( ParseElementEnhance, self).ignore( other ) 2696 if self.expr is not None: 2697 self.expr.ignore( self.ignoreExprs[-1] ) 2698 else: 2699 super( ParseElementEnhance, self).ignore( other ) 2700 if self.expr is not None: 2701 self.expr.ignore( self.ignoreExprs[-1] ) 2702 return self
2703
2704 - def streamline( self ):
2705 super(ParseElementEnhance,self).streamline() 2706 if self.expr is not None: 2707 self.expr.streamline() 2708 return self
2709
2710 - def checkRecursion( self, parseElementList ):
2711 if self in parseElementList: 2712 raise RecursiveGrammarException( parseElementList+[self] ) 2713 subRecCheckList = parseElementList[:] + [ self ] 2714 if self.expr is not None: 2715 self.expr.checkRecursion( subRecCheckList )
2716
2717 - def validate( self, validateTrace=[] ):
2718 tmp = validateTrace[:]+[self] 2719 if self.expr is not None: 2720 self.expr.validate(tmp) 2721 self.checkRecursion( [] )
2722
2723 - def __str__( self ):
2724 try: 2725 return super(ParseElementEnhance,self).__str__() 2726 except: 2727 pass 2728 2729 if self.strRepr is None and self.expr is not None: 2730 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2731 return self.strRepr
2732 2733
2734 -class FollowedBy(ParseElementEnhance):
2735 """Lookahead matching of the given parse expression. C{FollowedBy} 2736 does *not* advance the parsing position within the input string, it only 2737 verifies that the specified parse expression matches at the current 2738 position. C{FollowedBy} always returns a null token list."""
2739 - def __init__( self, expr ):
2740 super(FollowedBy,self).__init__(expr) 2741 self.mayReturnEmpty = True
2742
2743 - def parseImpl( self, instring, loc, doActions=True ):
2744 self.expr.tryParse( instring, loc ) 2745 return loc, []
2746 2747
2748 -class NotAny(ParseElementEnhance):
2749 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2750 does *not* advance the parsing position within the input string, it only 2751 verifies that the specified parse expression does *not* match at the current 2752 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2753 always returns a null token list. May be constructed using the '~' operator."""
2754 - def __init__( self, expr ):
2755 super(NotAny,self).__init__(expr) 2756 #~ self.leaveWhitespace() 2757 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2758 self.mayReturnEmpty = True 2759 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2760 #self.myException = ParseException("",0,self.errmsg,self) 2761
2762 - def parseImpl( self, instring, loc, doActions=True ):
2763 try: 2764 self.expr.tryParse( instring, loc ) 2765 except (ParseException,IndexError): 2766 pass 2767 else: 2768 #~ raise ParseException(instring, loc, self.errmsg ) 2769 exc = self.myException 2770 exc.loc = loc 2771 exc.pstr = instring 2772 raise exc 2773 return loc, []
2774
2775 - def __str__( self ):
2776 if hasattr(self,"name"): 2777 return self.name 2778 2779 if self.strRepr is None: 2780 self.strRepr = "~{" + _ustr(self.expr) + "}" 2781 2782 return self.strRepr
2783 2784
2785 -class ZeroOrMore(ParseElementEnhance):
2786 """Optional repetition of zero or more of the given expression."""
2787 - def __init__( self, expr ):
2788 super(ZeroOrMore,self).__init__(expr) 2789 self.mayReturnEmpty = True
2790
2791 - def parseImpl( self, instring, loc, doActions=True ):
2792 tokens = [] 2793 try: 2794 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2795 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2796 while 1: 2797 if hasIgnoreExprs: 2798 preloc = self._skipIgnorables( instring, loc ) 2799 else: 2800 preloc = loc 2801 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2802 if tmptokens or tmptokens.keys(): 2803 tokens += tmptokens 2804 except (ParseException,IndexError): 2805 pass 2806 2807 return loc, tokens
2808
2809 - def __str__( self ):
2810 if hasattr(self,"name"): 2811 return self.name 2812 2813 if self.strRepr is None: 2814 self.strRepr = "[" + _ustr(self.expr) + "]..." 2815 2816 return self.strRepr
2817
2818 - def setResultsName( self, name, listAllMatches=False ):
2819 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2820 ret.saveAsList = True 2821 return ret
2822 2823
2824 -class OneOrMore(ParseElementEnhance):
2825 """Repetition of one or more of the given expression."""
2826 - def parseImpl( self, instring, loc, doActions=True ):
2827 # must be at least one 2828 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2829 try: 2830 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2831 while 1: 2832 if hasIgnoreExprs: 2833 preloc = self._skipIgnorables( instring, loc ) 2834 else: 2835 preloc = loc 2836 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2837 if tmptokens or tmptokens.keys(): 2838 tokens += tmptokens 2839 except (ParseException,IndexError): 2840 pass 2841 2842 return loc, tokens
2843
2844 - def __str__( self ):
2845 if hasattr(self,"name"): 2846 return self.name 2847 2848 if self.strRepr is None: 2849 self.strRepr = "{" + _ustr(self.expr) + "}..." 2850 2851 return self.strRepr
2852
2853 - def setResultsName( self, name, listAllMatches=False ):
2854 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2855 ret.saveAsList = True 2856 return ret
2857
2858 -class _NullToken(object):
2859 - def __bool__(self):
2860 return False
2861 __nonzero__ = __bool__
2862 - def __str__(self):
2863 return ""
2864 2865 _optionalNotMatched = _NullToken()
2866 -class Optional(ParseElementEnhance):
2867 """Optional matching of the given expression. 2868 A default return string can also be specified, if the optional expression 2869 is not found. 2870 """
2871 - def __init__( self, exprs, default=_optionalNotMatched ):
2872 super(Optional,self).__init__( exprs, savelist=False ) 2873 self.defaultValue = default 2874 self.mayReturnEmpty = True
2875
2876 - def parseImpl( self, instring, loc, doActions=True ):
2877 try: 2878 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2879 except (ParseException,IndexError): 2880 if self.defaultValue is not _optionalNotMatched: 2881 if self.expr.resultsName: 2882 tokens = ParseResults([ self.defaultValue ]) 2883 tokens[self.expr.resultsName] = self.defaultValue 2884 else: 2885 tokens = [ self.defaultValue ] 2886 else: 2887 tokens = [] 2888 return loc, tokens
2889
2890 - def __str__( self ):
2891 if hasattr(self,"name"): 2892 return self.name 2893 2894 if self.strRepr is None: 2895 self.strRepr = "[" + _ustr(self.expr) + "]" 2896 2897 return self.strRepr
2898 2899
2900 -class SkipTo(ParseElementEnhance):
2901 """Token for skipping over all undefined text until the matched expression is found. 2902 If C{include} is set to true, the matched expression is also parsed (the skipped text 2903 and matched expression are returned as a 2-element list). The C{ignore} 2904 argument is used to define grammars (typically quoted strings and comments) that 2905 might contain false matches. 2906 """
2907 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2908 super( SkipTo, self ).__init__( other ) 2909 self.ignoreExpr = ignore 2910 self.mayReturnEmpty = True 2911 self.mayIndexError = False 2912 self.includeMatch = include 2913 self.asList = False 2914 if failOn is not None and isinstance(failOn, basestring): 2915 self.failOn = Literal(failOn) 2916 else: 2917 self.failOn = failOn 2918 self.errmsg = "No match found for "+_ustr(self.expr)
2919 #self.myException = ParseException("",0,self.errmsg,self) 2920
2921 - def parseImpl( self, instring, loc, doActions=True ):
2922 startLoc = loc 2923 instrlen = len(instring) 2924 expr = self.expr 2925 failParse = False 2926 while loc <= instrlen: 2927 try: 2928 if self.failOn: 2929 try: 2930 self.failOn.tryParse(instring, loc) 2931 except ParseBaseException: 2932 pass 2933 else: 2934 failParse = True 2935 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2936 failParse = False 2937 if self.ignoreExpr is not None: 2938 while 1: 2939 try: 2940 loc = self.ignoreExpr.tryParse(instring,loc) 2941 # print "found ignoreExpr, advance to", loc 2942 except ParseBaseException: 2943 break 2944 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2945 skipText = instring[startLoc:loc] 2946 if self.includeMatch: 2947 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2948 if mat: 2949 skipRes = ParseResults( skipText ) 2950 skipRes += mat 2951 return loc, [ skipRes ] 2952 else: 2953 return loc, [ skipText ] 2954 else: 2955 return loc, [ skipText ] 2956 except (ParseException,IndexError): 2957 if failParse: 2958 raise 2959 else: 2960 loc += 1 2961 exc = self.myException 2962 exc.loc = loc 2963 exc.pstr = instring 2964 raise exc
2965
2966 -class Forward(ParseElementEnhance):
2967 """Forward declaration of an expression to be defined later - 2968 used for recursive grammars, such as algebraic infix notation. 2969 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2970 2971 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 2972 Specifically, '|' has a lower precedence than '<<', so that:: 2973 fwdExpr << a | b | c 2974 will actually be evaluated as:: 2975 (fwdExpr << a) | b | c 2976 thereby leaving b and c out as parseable alternatives. It is recommended that you 2977 explicitly group the values inserted into the C{Forward}:: 2978 fwdExpr << (a | b | c) 2979 """
2980 - def __init__( self, other=None ):
2981 super(Forward,self).__init__( other, savelist=False )
2982
2983 - def __lshift__( self, other ):
2984 if isinstance( other, basestring ): 2985 other = Literal(other) 2986 self.expr = other 2987 self.mayReturnEmpty = other.mayReturnEmpty 2988 self.strRepr = None 2989 self.mayIndexError = self.expr.mayIndexError 2990 self.mayReturnEmpty = self.expr.mayReturnEmpty 2991 self.setWhitespaceChars( self.expr.whiteChars ) 2992 self.skipWhitespace = self.expr.skipWhitespace 2993 self.saveAsList = self.expr.saveAsList 2994 self.ignoreExprs.extend(self.expr.ignoreExprs) 2995 return None
2996
2997 - def leaveWhitespace( self ):
2998 self.skipWhitespace = False 2999 return self
3000
3001 - def streamline( self ):
3002 if not self.streamlined: 3003 self.streamlined = True 3004 if self.expr is not None: 3005 self.expr.streamline() 3006 return self
3007
3008 - def validate( self, validateTrace=[] ):
3009 if self not in validateTrace: 3010 tmp = validateTrace[:]+[self] 3011 if self.expr is not None: 3012 self.expr.validate(tmp) 3013 self.checkRecursion([])
3014
3015 - def __str__( self ):
3016 if hasattr(self,"name"): 3017 return self.name 3018 3019 self._revertClass = self.__class__ 3020 self.__class__ = _ForwardNoRecurse 3021 try: 3022 if self.expr is not None: 3023 retString = _ustr(self.expr) 3024 else: 3025 retString = "None" 3026 finally: 3027 self.__class__ = self._revertClass 3028 return self.__class__.__name__ + ": " + retString
3029
3030 - def copy(self):
3031 if self.expr is not None: 3032 return super(Forward,self).copy() 3033 else: 3034 ret = Forward() 3035 ret << self 3036 return ret
3037
3038 -class _ForwardNoRecurse(Forward):
3039 - def __str__( self ):
3040 return "..."
3041
3042 -class TokenConverter(ParseElementEnhance):
3043 """Abstract subclass of ParseExpression, for converting parsed results."""
3044 - def __init__( self, expr, savelist=False ):
3045 super(TokenConverter,self).__init__( expr )#, savelist ) 3046 self.saveAsList = False
3047
3048 -class Upcase(TokenConverter):
3049 """Converter to upper case all matching tokens."""
3050 - def __init__(self, *args):
3051 super(Upcase,self).__init__(*args) 3052 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 3053 DeprecationWarning,stacklevel=2)
3054
3055 - def postParse( self, instring, loc, tokenlist ):
3056 return list(map( string.upper, tokenlist ))
3057 3058
3059 -class Combine(TokenConverter):
3060 """Converter to concatenate all matching tokens to a single string. 3061 By default, the matching patterns must also be contiguous in the input string; 3062 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3063 """
3064 - def __init__( self, expr, joinString="", adjacent=True ):
3065 super(Combine,self).__init__( expr ) 3066 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3067 if adjacent: 3068 self.leaveWhitespace() 3069 self.adjacent = adjacent 3070 self.skipWhitespace = True 3071 self.joinString = joinString 3072 self.callPreparse = True
3073
3074 - def ignore( self, other ):
3075 if self.adjacent: 3076 ParserElement.ignore(self, other) 3077 else: 3078 super( Combine, self).ignore( other ) 3079 return self
3080
3081 - def postParse( self, instring, loc, tokenlist ):
3082 retToks = tokenlist.copy() 3083 del retToks[:] 3084 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3085 3086 if self.resultsName and len(retToks.keys())>0: 3087 return [ retToks ] 3088 else: 3089 return retToks
3090
3091 -class Group(TokenConverter):
3092 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3093 - def __init__( self, expr ):
3094 super(Group,self).__init__( expr ) 3095 self.saveAsList = True
3096
3097 - def postParse( self, instring, loc, tokenlist ):
3098 return [ tokenlist ]
3099
3100 -class Dict(TokenConverter):
3101 """Converter to return a repetitive expression as a list, but also as a dictionary. 3102 Each element can also be referenced using the first token in the expression as its key. 3103 Useful for tabular report scraping when the first column can be used as a item key. 3104 """
3105 - def __init__( self, exprs ):
3106 super(Dict,self).__init__( exprs ) 3107 self.saveAsList = True
3108
3109 - def postParse( self, instring, loc, tokenlist ):
3110 for i,tok in enumerate(tokenlist): 3111 if len(tok) == 0: 3112 continue 3113 ikey = tok[0] 3114 if isinstance(ikey,int): 3115 ikey = _ustr(tok[0]).strip() 3116 if len(tok)==1: 3117 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3118 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3119 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3120 else: 3121 dictvalue = tok.copy() #ParseResults(i) 3122 del dictvalue[0] 3123 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 3124 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3125 else: 3126 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3127 3128 if self.resultsName: 3129 return [ tokenlist ] 3130 else: 3131 return tokenlist
3132 3133
3134 -class Suppress(TokenConverter):
3135 """Converter for ignoring the results of a parsed expression."""
3136 - def postParse( self, instring, loc, tokenlist ):
3137 return []
3138
3139 - def suppress( self ):
3140 return self
3141 3142
3143 -class OnlyOnce(object):
3144 """Wrapper for parse actions, to ensure they are only called once."""
3145 - def __init__(self, methodCall):
3146 self.callable = ParserElement._normalizeParseActionArgs(methodCall) 3147 self.called = False
3148 - def __call__(self,s,l,t):
3149 if not self.called: 3150 results = self.callable(s,l,t) 3151 self.called = True 3152 return results 3153 raise ParseException(s,l,"")
3154 - def reset(self):
3155 self.called = False
3156
3157 -def traceParseAction(f):
3158 """Decorator for debugging parse actions.""" 3159 f = ParserElement._normalizeParseActionArgs(f) 3160 def z(*paArgs): 3161 thisFunc = f.func_name 3162 s,l,t = paArgs[-3:] 3163 if len(paArgs)>3: 3164 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3165 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3166 try: 3167 ret = f(*paArgs) 3168 except Exception: 3169 exc = sys.exc_info()[1] 3170 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3171 raise 3172 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3173 return ret
3174 try: 3175 z.__name__ = f.__name__ 3176 except AttributeError: 3177 pass 3178 return z 3179 3180 # 3181 # global helpers 3182 #
3183 -def delimitedList( expr, delim=",", combine=False ):
3184 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3185 By default, the list elements and delimiters can have intervening whitespace, and 3186 comments, but this can be overridden by passing C{combine=True} in the constructor. 3187 If C{combine} is set to True, the matching tokens are returned as a single token 3188 string, with the delimiters included; otherwise, the matching tokens are returned 3189 as a list of tokens, with the delimiters suppressed. 3190 """ 3191 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3192 if combine: 3193 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3194 else: 3195 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3196
3197 -def countedArray( expr ):
3198 """Helper to define a counted list of expressions. 3199 This helper defines a pattern of the form:: 3200 integer expr expr expr... 3201 where the leading integer tells how many expr expressions follow. 3202 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3203 """ 3204 arrayExpr = Forward() 3205 def countFieldParseAction(s,l,t): 3206 n = int(t[0]) 3207 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3208 return []
3209 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) 3210
3211 -def _flatten(L):
3212 if type(L) is not list: return [L] 3213 if L == []: return L 3214 return _flatten(L[0]) + _flatten(L[1:])
3215
3216 -def matchPreviousLiteral(expr):
3217 """Helper to define an expression that is indirectly defined from 3218 the tokens matched in a previous expression, that is, it looks 3219 for a 'repeat' of a previous expression. For example:: 3220 first = Word(nums) 3221 second = matchPreviousLiteral(first) 3222 matchExpr = first + ":" + second 3223 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3224 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3225 If this is not desired, use C{matchPreviousExpr}. 3226 Do *not* use with packrat parsing enabled. 3227 """ 3228 rep = Forward() 3229 def copyTokenToRepeater(s,l,t): 3230 if t: 3231 if len(t) == 1: 3232 rep << t[0] 3233 else: 3234 # flatten t tokens 3235 tflat = _flatten(t.asList()) 3236 rep << And( [ Literal(tt) for tt in tflat ] ) 3237 else: 3238 rep << Empty()
3239 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3240 return rep 3241
3242 -def matchPreviousExpr(expr):
3243 """Helper to define an expression that is indirectly defined from 3244 the tokens matched in a previous expression, that is, it looks 3245 for a 'repeat' of a previous expression. For example:: 3246 first = Word(nums) 3247 second = matchPreviousExpr(first) 3248 matchExpr = first + ":" + second 3249 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3250 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3251 the expressions are evaluated first, and then compared, so 3252 C{"1"} is compared with C{"10"}. 3253 Do *not* use with packrat parsing enabled. 3254 """ 3255 rep = Forward() 3256 e2 = expr.copy() 3257 rep << e2 3258 def copyTokenToRepeater(s,l,t): 3259 matchTokens = _flatten(t.asList()) 3260 def mustMatchTheseTokens(s,l,t): 3261 theseTokens = _flatten(t.asList()) 3262 if theseTokens != matchTokens: 3263 raise ParseException("",0,"")
3264 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3265 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3266 return rep 3267
3268 -def _escapeRegexRangeChars(s):
3269 #~ escape these chars: ^-] 3270 for c in r"\^-]": 3271 s = s.replace(c,_bslash+c) 3272 s = s.replace("\n",r"\n") 3273 s = s.replace("\t",r"\t") 3274 return _ustr(s)
3275
3276 -def oneOf( strs, caseless=False, useRegex=True ):
3277 """Helper to quickly define a set of alternative Literals, and makes sure to do 3278 longest-first testing when there is a conflict, regardless of the input order, 3279 but returns a C{MatchFirst} for best performance. 3280 3281 Parameters: 3282 - strs - a string of space-delimited literals, or a list of string literals 3283 - caseless - (default=False) - treat all literals as caseless 3284 - useRegex - (default=True) - as an optimization, will generate a Regex 3285 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3286 if creating a C{Regex} raises an exception) 3287 """ 3288 if caseless: 3289 isequal = ( lambda a,b: a.upper() == b.upper() ) 3290 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3291 parseElementClass = CaselessLiteral 3292 else: 3293 isequal = ( lambda a,b: a == b ) 3294 masks = ( lambda a,b: b.startswith(a) ) 3295 parseElementClass = Literal 3296 3297 if isinstance(strs,(list,tuple)): 3298 symbols = list(strs[:]) 3299 elif isinstance(strs,basestring): 3300 symbols = strs.split() 3301 else: 3302 warnings.warn("Invalid argument to oneOf, expected string or list", 3303 SyntaxWarning, stacklevel=2) 3304 3305 i = 0 3306 while i < len(symbols)-1: 3307 cur = symbols[i] 3308 for j,other in enumerate(symbols[i+1:]): 3309 if ( isequal(other, cur) ): 3310 del symbols[i+j+1] 3311 break 3312 elif ( masks(cur, other) ): 3313 del symbols[i+j+1] 3314 symbols.insert(i,other) 3315 cur = other 3316 break 3317 else: 3318 i += 1 3319 3320 if not caseless and useRegex: 3321 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3322 try: 3323 if len(symbols)==len("".join(symbols)): 3324 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3325 else: 3326 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3327 except: 3328 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3329 SyntaxWarning, stacklevel=2) 3330 3331 3332 # last resort, just use MatchFirst 3333 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3334
3335 -def dictOf( key, value ):
3336 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3337 for the key and value. Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens 3338 in the proper order. The key pattern can include delimiting markers or punctuation, 3339 as long as they are suppressed, thereby leaving the significant key text. The value 3340 pattern can include named results, so that the C{Dict} results can include named token 3341 fields. 3342 """ 3343 return Dict( ZeroOrMore( Group ( key + value ) ) )
3344
3345 -def originalTextFor(expr, asString=True):
3346 """Helper to return the original, untokenized text for a given expression. Useful to 3347 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3348 revert separate tokens with intervening whitespace back to the original matching 3349 input text. Simpler to use than the parse action C{keepOriginalText}, and does not 3350 require the inspect module to chase up the call stack. By default, returns a 3351 string containing the original parsed text. 3352 3353 If the optional C{asString} argument is passed as False, then the return value is a 3354 C{ParseResults} containing any results names that were originally matched, and a 3355 single token containing the original matched text from the input string. So if 3356 the expression passed to C{originalTextFor} contains expressions with defined 3357 results names, you must set C{asString} to False if you want to preserve those 3358 results name values.""" 3359 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3360 endlocMarker = locMarker.copy() 3361 endlocMarker.callPreparse = False 3362 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3363 if asString: 3364 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3365 else: 3366 def extractText(s,l,t): 3367 del t[:] 3368 t.insert(0, s[t._original_start:t._original_end]) 3369 del t["_original_start"] 3370 del t["_original_end"]
3371 matchExpr.setParseAction(extractText) 3372 return matchExpr 3373 3374 # convenience constants for positional expressions 3375 empty = Empty().setName("empty") 3376 lineStart = LineStart().setName("lineStart") 3377 lineEnd = LineEnd().setName("lineEnd") 3378 stringStart = StringStart().setName("stringStart") 3379 stringEnd = StringEnd().setName("stringEnd") 3380 3381 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3382 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3383 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 3384 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 3385 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3386 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3387 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3388 3389 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3390
3391 -def srange(s):
3392 r"""Helper to easily define string ranges for use in Word construction. Borrows 3393 syntax from regexp '[]' string range definitions:: 3394 srange("[0-9]") -> "0123456789" 3395 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3396 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3397 The input string must be enclosed in []'s, and the returned string is the expanded 3398 character set joined into a single string. 3399 The values enclosed in the []'s may be:: 3400 a single character 3401 an escaped character with a leading backslash (such as \- or \]) 3402 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 3403 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3404 a range of any of the above, separated by a dash ('a-z', etc.) 3405 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3406 """ 3407 try: 3408 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3409 except: 3410 return ""
3411
3412 -def matchOnlyAtCol(n):
3413 """Helper method for defining parse actions that require matching at a specific 3414 column in the input text. 3415 """ 3416 def verifyCol(strg,locn,toks): 3417 if col(locn,strg) != n: 3418 raise ParseException(strg,locn,"matched token not at column %d" % n)
3419 return verifyCol 3420
3421 -def replaceWith(replStr):
3422 """Helper method for common parse actions that simply return a literal value. Especially 3423 useful when used with C{transformString()}. 3424 """ 3425 def _replFunc(*args): 3426 return [replStr]
3427 return _replFunc 3428
3429 -def removeQuotes(s,l,t):
3430 """Helper parse action for removing quotation marks from parsed quoted strings. 3431 To use, add this parse action to quoted string using:: 3432 quotedString.setParseAction( removeQuotes ) 3433 """ 3434 return t[0][1:-1]
3435
3436 -def upcaseTokens(s,l,t):
3437 """Helper parse action to convert tokens to upper case.""" 3438 return [ tt.upper() for tt in map(_ustr,t) ]
3439
3440 -def downcaseTokens(s,l,t):
3441 """Helper parse action to convert tokens to lower case.""" 3442 return [ tt.lower() for tt in map(_ustr,t) ]
3443
3444 -def keepOriginalText(s,startLoc,t):
3445 """DEPRECATED - use new helper method C{originalTextFor}. 3446 Helper parse action to preserve original parsed text, 3447 overriding any nested parse actions.""" 3448 try: 3449 endloc = getTokensEndLoc() 3450 except ParseException: 3451 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3452 del t[:] 3453 t += ParseResults(s[startLoc:endloc]) 3454 return t
3455
3456 -def getTokensEndLoc():
3457 """Method to be called from within a parse action to determine the end 3458 location of the parsed tokens.""" 3459 import inspect 3460 fstack = inspect.stack() 3461 try: 3462 # search up the stack (through intervening argument normalizers) for correct calling routine 3463 for f in fstack[2:]: 3464 if f[3] == "_parseNoCache": 3465 endloc = f[0].f_locals["loc"] 3466 return endloc 3467 else: 3468 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3469 finally: 3470 del fstack
3471
3472 -def _makeTags(tagStr, xml):
3473 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3474 if isinstance(tagStr,basestring): 3475 resname = tagStr 3476 tagStr = Keyword(tagStr, caseless=not xml) 3477 else: 3478 resname = tagStr.name 3479 3480 tagAttrName = Word(alphas,alphanums+"_-:") 3481 if (xml): 3482 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3483 openTag = Suppress("<") + tagStr("tag") + \ 3484 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3485 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3486 else: 3487 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3488 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3489 openTag = Suppress("<") + tagStr + \ 3490 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3491 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3492 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3493 closeTag = Combine(_L("</") + tagStr + ">") 3494 3495 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3496 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3497 openTag.tag = resname 3498 closeTag.tag = resname 3499 return openTag, closeTag
3500
3501 -def makeHTMLTags(tagStr):
3502 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3503 return _makeTags( tagStr, False )
3504
3505 -def makeXMLTags(tagStr):
3506 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3507 return _makeTags( tagStr, True )
3508
3509 -def withAttribute(*args,**attrDict):
3510 """Helper to create a validating parse action to be used with start tags created 3511 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 3512 with a required attribute value, to avoid false matches on common tags such as 3513 <TD> or <DIV>. 3514 3515 Call withAttribute with a series of attribute names and values. Specify the list 3516 of filter attributes names and values as: 3517 - keyword arguments, as in (class="Customer",align="right"), or 3518 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3519 For attribute names with a namespace prefix, you must use the second form. Attribute 3520 names are matched insensitive to upper/lower case. 3521 3522 To verify that the attribute exists, but without specifying a value, pass 3523 withAttribute.ANY_VALUE as the value. 3524 """ 3525 if args: 3526 attrs = args[:] 3527 else: 3528 attrs = attrDict.items() 3529 attrs = [(k,v) for k,v in attrs] 3530 def pa(s,l,tokens): 3531 for attrName,attrValue in attrs: 3532 if attrName not in tokens: 3533 raise ParseException(s,l,"no matching attribute " + attrName) 3534 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3535 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3536 (attrName, tokens[attrName], attrValue))
3537 return pa 3538 withAttribute.ANY_VALUE = object() 3539 3540 opAssoc = _Constants() 3541 opAssoc.LEFT = object() 3542 opAssoc.RIGHT = object() 3543
3544 -def operatorPrecedence( baseExpr, opList ):
3545 """Helper method for constructing grammars of expressions made up of 3546 operators working in a precedence hierarchy. Operators may be unary or 3547 binary, left- or right-associative. Parse actions can also be attached 3548 to operator expressions. 3549 3550 Parameters: 3551 - baseExpr - expression representing the most basic element for the nested 3552 - opList - list of tuples, one for each operator precedence level in the 3553 expression grammar; each tuple is of the form 3554 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3555 - opExpr is the pyparsing expression for the operator; 3556 may also be a string, which will be converted to a Literal; 3557 if numTerms is 3, opExpr is a tuple of two expressions, for the 3558 two operators separating the 3 terms 3559 - numTerms is the number of terms for this operator (must 3560 be 1, 2, or 3) 3561 - rightLeftAssoc is the indicator whether the operator is 3562 right or left associative, using the pyparsing-defined 3563 constants opAssoc.RIGHT and opAssoc.LEFT. 3564 - parseAction is the parse action to be associated with 3565 expressions matching this operator expression (the 3566 parse action tuple member may be omitted) 3567 """ 3568 ret = Forward() 3569 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3570 for i,operDef in enumerate(opList): 3571 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3572 if arity == 3: 3573 if opExpr is None or len(opExpr) != 2: 3574 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3575 opExpr1, opExpr2 = opExpr 3576 thisExpr = Forward()#.setName("expr%d" % i) 3577 if rightLeftAssoc == opAssoc.LEFT: 3578 if arity == 1: 3579 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3580 elif arity == 2: 3581 if opExpr is not None: 3582 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3583 else: 3584 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3585 elif arity == 3: 3586 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3587 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3588 else: 3589 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3590 elif rightLeftAssoc == opAssoc.RIGHT: 3591 if arity == 1: 3592 # try to avoid LR with this extra test 3593 if not isinstance(opExpr, Optional): 3594 opExpr = Optional(opExpr) 3595 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3596 elif arity == 2: 3597 if opExpr is not None: 3598 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3599 else: 3600 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3601 elif arity == 3: 3602 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3603 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3604 else: 3605 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3606 else: 3607 raise ValueError("operator must indicate right or left associativity") 3608 if pa: 3609 matchExpr.setParseAction( pa ) 3610 thisExpr << ( matchExpr | lastExpr ) 3611 lastExpr = thisExpr 3612 ret << lastExpr 3613 return ret
3614 3615 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3616 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3617 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3618 unicodeString = Combine(_L('u') + quotedString.copy()) 3619
3620 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3621 """Helper method for defining nested lists enclosed in opening and closing 3622 delimiters ("(" and ")" are the default). 3623 3624 Parameters: 3625 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3626 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3627 - content - expression for items within the nested lists (default=None) 3628 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3629 3630 If an expression is not provided for the content argument, the nested 3631 expression will capture all whitespace-delimited content between delimiters 3632 as a list of separate values. 3633 3634 Use the ignoreExpr argument to define expressions that may contain 3635 opening or closing characters that should not be treated as opening 3636 or closing characters for nesting, such as quotedString or a comment 3637 expression. Specify multiple expressions using an Or or MatchFirst. 3638 The default is quotedString, but if no expressions are to be ignored, 3639 then pass None for this argument. 3640 """ 3641 if opener == closer: 3642 raise ValueError("opening and closing strings cannot be the same") 3643 if content is None: 3644 if isinstance(opener,basestring) and isinstance(closer,basestring): 3645 if len(opener) == 1 and len(closer)==1: 3646 if ignoreExpr is not None: 3647 content = (Combine(OneOrMore(~ignoreExpr + 3648 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3649 ).setParseAction(lambda t:t[0].strip())) 3650 else: 3651 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3652 ).setParseAction(lambda t:t[0].strip())) 3653 else: 3654 if ignoreExpr is not None: 3655 content = (Combine(OneOrMore(~ignoreExpr + 3656 ~Literal(opener) + ~Literal(closer) + 3657 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3658 ).setParseAction(lambda t:t[0].strip())) 3659 else: 3660 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3661 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3662 ).setParseAction(lambda t:t[0].strip())) 3663 else: 3664 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3665 ret = Forward() 3666 if ignoreExpr is not None: 3667 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3668 else: 3669 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3670 return ret
3671
3672 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3673 """Helper method for defining space-delimited indentation blocks, such as 3674 those used to define block statements in Python source code. 3675 3676 Parameters: 3677 - blockStatementExpr - expression defining syntax of statement that 3678 is repeated within the indented block 3679 - indentStack - list created by caller to manage indentation stack 3680 (multiple statementWithIndentedBlock expressions within a single grammar 3681 should share a common indentStack) 3682 - indent - boolean indicating whether block must be indented beyond the 3683 the current level; set to False for block of left-most statements 3684 (default=True) 3685 3686 A valid block must contain at least one blockStatement. 3687 """ 3688 def checkPeerIndent(s,l,t): 3689 if l >= len(s): return 3690 curCol = col(l,s) 3691 if curCol != indentStack[-1]: 3692 if curCol > indentStack[-1]: 3693 raise ParseFatalException(s,l,"illegal nesting") 3694 raise ParseException(s,l,"not a peer entry")
3695 3696 def checkSubIndent(s,l,t): 3697 curCol = col(l,s) 3698 if curCol > indentStack[-1]: 3699 indentStack.append( curCol ) 3700 else: 3701 raise ParseException(s,l,"not a subentry") 3702 3703 def checkUnindent(s,l,t): 3704 if l >= len(s): return 3705 curCol = col(l,s) 3706 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3707 raise ParseException(s,l,"not an unindent") 3708 indentStack.pop() 3709 3710 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3711 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3712 PEER = Empty().setParseAction(checkPeerIndent) 3713 UNDENT = Empty().setParseAction(checkUnindent) 3714 if indent: 3715 smExpr = Group( Optional(NL) + 3716 #~ FollowedBy(blockStatementExpr) + 3717 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3718 else: 3719 smExpr = Group( Optional(NL) + 3720 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3721 blockStatementExpr.ignore(_bslash + LineEnd()) 3722 return smExpr 3723 3724 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3725 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3726 3727 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3728 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3729 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3730 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3731 3732 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3733 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3734 3735 htmlComment = Regex(r"<!--[\s\S]*?-->") 3736 restOfLine = Regex(r".*").leaveWhitespace() 3737 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3738 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3739 3740 javaStyleComment = cppStyleComment 3741 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3742 _noncomma = "".join( [ c for c in printables if c != "," ] ) 3743 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 3744 Optional( Word(" \t") + 3745 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3746 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3747 3748 3749 if __name__ == "__main__": 3750
3751 - def test( teststring ):
3752 try: 3753 tokens = simpleSQL.parseString( teststring ) 3754 tokenlist = tokens.asList() 3755 print (teststring + "->" + str(tokenlist)) 3756 print ("tokens = " + str(tokens)) 3757 print ("tokens.columns = " + str(tokens.columns)) 3758 print ("tokens.tables = " + str(tokens.tables)) 3759 print (tokens.asXML("SQL",True)) 3760 except ParseBaseException: 3761 err = sys.exc_info()[1] 3762 print (teststring + "->") 3763 print (err.line) 3764 print (" "*(err.column-1) + "^") 3765 print (err) 3766 print()
3767 3768 selectToken = CaselessLiteral( "select" ) 3769 fromToken = CaselessLiteral( "from" ) 3770 3771 ident = Word( alphas, alphanums + "_$" ) 3772 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3773 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3774 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3775 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3776 simpleSQL = ( selectToken + \ 3777 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3778 fromToken + \ 3779 tableNameList.setResultsName( "tables" ) ) 3780 3781 test( "SELECT * from XYZZY, ABC" ) 3782 test( "select * from SYS.XYZZY" ) 3783 test( "Select A from Sys.dual" ) 3784 test( "Select AA,BB,CC from Sys.dual" ) 3785 test( "Select A, B, C from Sys.dual" ) 3786 test( "Select A, B, C from Sys.dual" ) 3787 test( "Xelect A, B, C from Sys.dual" ) 3788 test( "Select A, B, C frox Sys.dual" ) 3789 test( "Select" ) 3790 test( "Select ^^^ frox Sys.dual" ) 3791 test( "Select A, B, C from Sys.dual, Table2 " ) 3792