| Package pyparsing ::
        Module pyparsing |  | 
   1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9   
  10   
  11   
  12   
  13   
  14   
  15   
  16   
  17   
  18   
  19   
  20   
  21   
  22   
  23   
  24   
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.5" 
  62  __versionTime__ = "12 Aug 2010 03:56" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy 
  68  import sys 
  69  import warnings 
  70  import re 
  71  import sre_constants 
  72   
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 'originalTextFor', 
  92  ] 
  93   
  94  """ 
  95  Detect if we are running version 3.X and make appropriate changes 
  96  Robert A. Clark 
  97  """ 
  98  _PY3K = sys.version_info[0] > 2 
  99  if _PY3K: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104      _str2dict = set 
 105      alphas = string.ascii_lowercase + string.ascii_uppercase 
 106  else: 
 107      _MAX_INT = sys.maxint 
 108      range = xrange 
 109   
 111          """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 
 112             str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 
 113             then < returns the unicode object | encodes it with the default encoding | ... >. 
 114          """ 
 115          if isinstance(obj,unicode): 
 116              return obj 
 117   
 118          try: 
 119               
 120               
 121              return str(obj) 
 122   
 123          except UnicodeEncodeError: 
 124               
 125               
 126               
 127               
 128               
 129              return unicode(obj) 
  130               
 131               
 132               
 133               
 134               
 135               
 136               
 138          return dict( [(c,0) for c in strg] ) 
  139               
 140      alphas = string.lowercase + string.uppercase 
 141   
 142   
 143  singleArgBuiltins = [] 
 144  import __builtin__ 
 145  for fname in "sum len enumerate sorted reversed list tuple set any all".split(): 
 146      try: 
 147          singleArgBuiltins.append(getattr(__builtin__,fname)) 
 148      except AttributeError: 
 149          continue 
 150   
 152      """Escape &, <, >, ", ', etc. in a string of data.""" 
 153   
 154       
 155      from_symbols = '&><"\'' 
 156      to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 
 157      for from_,to_ in zip(from_symbols, to_symbols): 
 158          data = data.replace(from_, to_) 
 159      return data 
  160   
 163   
 164  nums       = string.digits 
 165  hexnums    = nums + "ABCDEFabcdef" 
 166  alphanums  = alphas + nums 
 167  _bslash    = chr(92) 
 168  printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 
 169   
 171      """base exception class for all parsing runtime exceptions""" 
 172       
 173       
 174 -    def __init__( self, pstr, loc=0, msg=None, elem=None ): 
  175          self.loc = loc 
 176          if msg is None: 
 177              self.msg = pstr 
 178              self.pstr = "" 
 179          else: 
 180              self.msg = msg 
 181              self.pstr = pstr 
 182          self.parserElement = elem 
  183   
 185          """supported attributes by name are: 
 186              - lineno - returns the line number of the exception text 
 187              - col - returns the column number of the exception text 
 188              - line - returns the line containing the exception text 
 189          """ 
 190          if( aname == "lineno" ): 
 191              return lineno( self.loc, self.pstr ) 
 192          elif( aname in ("col", "column") ): 
 193              return col( self.loc, self.pstr ) 
 194          elif( aname == "line" ): 
 195              return line( self.loc, self.pstr ) 
 196          else: 
 197              raise AttributeError(aname) 
  198   
 200          return "%s (at char %d), (line:%d, col:%d)" % \ 
 201                  ( self.msg, self.loc, self.lineno, self.column ) 
  215          return "loc msg pstr parserElement lineno col line " \ 
 216                 "markInputLine __str__ __repr__".split() 
   217   
 219      """exception thrown when parse expressions don't match class; 
 220         supported attributes by name are: 
 221          - lineno - returns the line number of the exception text 
 222          - col - returns the column number of the exception text 
 223          - line - returns the line containing the exception text 
 224      """ 
 225      pass 
  226   
 228      """user-throwable exception thrown when inconsistent parse content 
 229         is found; stops all parsing immediately""" 
 230      pass 
  231   
 233      """just like C{ParseFatalException}, but thrown internally when an 
 234         C{ErrorStop} ('-' operator) indicates that parsing is to stop immediately because 
 235         an unbacktrackable syntax error has been found""" 
  239   
 240   
 241       
 242          
 243           
 244           
 245          
 246          
 247          
 248          
 249       
 250           
 251           
 252   
 254      """exception thrown by C{validate()} if the grammar could be improperly recursive""" 
 255 -    def __init__( self, parseElementList ): 
  256          self.parseElementTrace = parseElementList 
  257   
 259          return "RecursiveGrammarException: %s" % self.parseElementTrace 
   260   
 267          return repr(self.tup) 
  269          self.tup = (self.tup[0],i) 
  272      """Structured parse results, to provide multiple means of access to the parsed data: 
 273         - as a list (C{len(results)}) 
 274         - by list index (C{results[0], results[1]}, etc.) 
 275         - by attribute (C{results.<resultsName>}) 
 276         """ 
 277       
 278 -    def __new__(cls, toklist, name=None, asList=True, modal=True ): 
  279          if isinstance(toklist, cls): 
 280              return toklist 
 281          retobj = object.__new__(cls) 
 282          retobj.__doinit = True 
 283          return retobj 
  284   
 285       
 286       
 287 -    def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ): 
  288          if self.__doinit: 
 289              self.__doinit = False 
 290              self.__name = None 
 291              self.__parent = None 
 292              self.__accumNames = {} 
 293              if isinstance(toklist, list): 
 294                  self.__toklist = toklist[:] 
 295              else: 
 296                  self.__toklist = [toklist] 
 297              self.__tokdict = dict() 
 298   
 299          if name is not None and name: 
 300              if not modal: 
 301                  self.__accumNames[name] = 0 
 302              if isinstance(name,int): 
 303                  name = _ustr(name)  
 304              self.__name = name 
 305              if not toklist in (None,'',[]): 
 306                  if isinstance(toklist,basestring): 
 307                      toklist = [ toklist ] 
 308                  if asList: 
 309                      if isinstance(toklist,ParseResults): 
 310                          self[name] = _ParseResultsWithOffset(toklist.copy(),0) 
 311                      else: 
 312                          self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 
 313                      self[name].__name = name 
 314                  else: 
 315                      try: 
 316                          self[name] = toklist[0] 
 317                      except (KeyError,TypeError,IndexError): 
 318                          self[name] = toklist 
  319   
 321          if isinstance( i, (int,slice) ): 
 322              return self.__toklist[i] 
 323          else: 
 324              if i not in self.__accumNames: 
 325                  return self.__tokdict[i][-1][0] 
 326              else: 
 327                  return ParseResults([ v[0] for v in self.__tokdict[i] ]) 
  328   
 330          if isinstance(v,_ParseResultsWithOffset): 
 331              self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 
 332              sub = v[0] 
 333          elif isinstance(k,int): 
 334              self.__toklist[k] = v 
 335              sub = v 
 336          else: 
 337              self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 
 338              sub = v 
 339          if isinstance(sub,ParseResults): 
 340              sub.__parent = wkref(self) 
  341   
 343          if isinstance(i,(int,slice)): 
 344              mylen = len( self.__toklist ) 
 345              del self.__toklist[i] 
 346   
 347               
 348              if isinstance(i, int): 
 349                  if i < 0: 
 350                      i += mylen 
 351                  i = slice(i, i+1) 
 352               
 353              removed = list(range(*i.indices(mylen))) 
 354              removed.reverse() 
 355               
 356              for name in self.__tokdict: 
 357                  occurrences = self.__tokdict[name] 
 358                  for j in removed: 
 359                      for k, (value, position) in enumerate(occurrences): 
 360                          occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 
 361          else: 
 362              del self.__tokdict[i] 
  363   
 365          return k in self.__tokdict 
  366   
 367 -    def __len__( self ): return len( self.__toklist ) 
  368 -    def __bool__(self): return len( self.__toklist ) > 0 
  369      __nonzero__ = __bool__ 
 370 -    def __iter__( self ): return iter( self.__toklist ) 
  371 -    def __reversed__( self ): return iter( self.__toklist[::-1] ) 
  373          """Returns all named result keys.""" 
 374          return self.__tokdict.keys() 
  375   
 376 -    def pop( self, index=-1 ): 
  377          """Removes and returns item at specified index (default=last). 
 378             Will work with either numeric indices or dict-key indicies.""" 
 379          ret = self[index] 
 380          del self[index] 
 381          return ret 
  382   
 383 -    def get(self, key, defaultValue=None): 
  384          """Returns named result matching the given key, or if there is no 
 385             such name, then returns the given C{defaultValue} or C{None} if no 
 386             C{defaultValue} is specified.""" 
 387          if key in self: 
 388              return self[key] 
 389          else: 
 390              return defaultValue 
  391   
 392 -    def insert( self, index, insStr ): 
  393          """Inserts new element at location index in the list of parsed tokens.""" 
 394          self.__toklist.insert(index, insStr) 
 395           
 396          for name in self.__tokdict: 
 397              occurrences = self.__tokdict[name] 
 398              for k, (value, position) in enumerate(occurrences): 
 399                  occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 
  400   
 402          """Returns all named result keys and values as a list of tuples.""" 
 403          return [(k,self[k]) for k in self.__tokdict] 
  404   
 406          """Returns all named result values.""" 
 407          return [ v[-1][0] for v in self.__tokdict.values() ] 
  408   
 410          if True:  
 411              if name in self.__tokdict: 
 412                  if name not in self.__accumNames: 
 413                      return self.__tokdict[name][-1][0] 
 414                  else: 
 415                      return ParseResults([ v[0] for v in self.__tokdict[name] ]) 
 416              else: 
 417                  return "" 
 418          return None 
  419   
 421          ret = self.copy() 
 422          ret += other 
 423          return ret 
  424   
 426          if other.__tokdict: 
 427              offset = len(self.__toklist) 
 428              addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 
 429              otheritems = other.__tokdict.items() 
 430              otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 
 431                                  for (k,vlist) in otheritems for v in vlist] 
 432              for k,v in otherdictitems: 
 433                  self[k] = v 
 434                  if isinstance(v[0],ParseResults): 
 435                      v[0].__parent = wkref(self) 
 436               
 437          self.__toklist += other.__toklist 
 438          self.__accumNames.update( other.__accumNames ) 
 439          return self 
  440   
 442          if isinstance(other,int) and other == 0: 
 443              return self.copy() 
  444           
 446          return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 
  447   
 449          out = "[" 
 450          sep = "" 
 451          for i in self.__toklist: 
 452              if isinstance(i, ParseResults): 
 453                  out += sep + _ustr(i) 
 454              else: 
 455                  out += sep + repr(i) 
 456              sep = ", " 
 457          out += "]" 
 458          return out 
  459   
 461          out = [] 
 462          for item in self.__toklist: 
 463              if out and sep: 
 464                  out.append(sep) 
 465              if isinstance( item, ParseResults ): 
 466                  out += item._asStringList() 
 467              else: 
 468                  out.append( _ustr(item) ) 
 469          return out 
  470   
 472          """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 
 473          out = [] 
 474          for res in self.__toklist: 
 475              if isinstance(res,ParseResults): 
 476                  out.append( res.asList() ) 
 477              else: 
 478                  out.append( res ) 
 479          return out 
  480   
 482          """Returns the named parse results as dictionary.""" 
 483          return dict( self.items() ) 
  484   
 486          """Returns a new copy of a C{ParseResults} object.""" 
 487          ret = ParseResults( self.__toklist ) 
 488          ret.__tokdict = self.__tokdict.copy() 
 489          ret.__parent = self.__parent 
 490          ret.__accumNames.update( self.__accumNames ) 
 491          ret.__name = self.__name 
 492          return ret 
  493   
 494 -    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 
  495          """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 
 496          nl = "\n" 
 497          out = [] 
 498          namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 
 499                                                              for v in vlist ] ) 
 500          nextLevelIndent = indent + "  " 
 501   
 502           
 503          if not formatted: 
 504              indent = "" 
 505              nextLevelIndent = "" 
 506              nl = "" 
 507   
 508          selfTag = None 
 509          if doctag is not None: 
 510              selfTag = doctag 
 511          else: 
 512              if self.__name: 
 513                  selfTag = self.__name 
 514   
 515          if not selfTag: 
 516              if namedItemsOnly: 
 517                  return "" 
 518              else: 
 519                  selfTag = "ITEM" 
 520   
 521          out += [ nl, indent, "<", selfTag, ">" ] 
 522   
 523          worklist = self.__toklist 
 524          for i,res in enumerate(worklist): 
 525              if isinstance(res,ParseResults): 
 526                  if i in namedItems: 
 527                      out += [ res.asXML(namedItems[i], 
 528                                          namedItemsOnly and doctag is None, 
 529                                          nextLevelIndent, 
 530                                          formatted)] 
 531                  else: 
 532                      out += [ res.asXML(None, 
 533                                          namedItemsOnly and doctag is None, 
 534                                          nextLevelIndent, 
 535                                          formatted)] 
 536              else: 
 537                   
 538                  resTag = None 
 539                  if i in namedItems: 
 540                      resTag = namedItems[i] 
 541                  if not resTag: 
 542                      if namedItemsOnly: 
 543                          continue 
 544                      else: 
 545                          resTag = "ITEM" 
 546                  xmlBodyText = _xml_escape(_ustr(res)) 
 547                  out += [ nl, nextLevelIndent, "<", resTag, ">", 
 548                                                  xmlBodyText, 
 549                                                  "</", resTag, ">" ] 
 550   
 551          out += [ nl, indent, "</", selfTag, ">" ] 
 552          return "".join(out) 
  553   
 555          for k,vlist in self.__tokdict.items(): 
 556              for v,loc in vlist: 
 557                  if sub is v: 
 558                      return k 
 559          return None 
  560   
 562          """Returns the results name for this token expression.""" 
 563          if self.__name: 
 564              return self.__name 
 565          elif self.__parent: 
 566              par = self.__parent() 
 567              if par: 
 568                  return par.__lookup(self) 
 569              else: 
 570                  return None 
 571          elif (len(self) == 1 and 
 572                 len(self.__tokdict) == 1 and 
 573                 self.__tokdict.values()[0][0][1] in (0,-1)): 
 574              return self.__tokdict.keys()[0] 
 575          else: 
 576              return None 
  577   
 578 -    def dump(self,indent='',depth=0): 
  579          """Diagnostic method for listing out the contents of a C{ParseResults}. 
 580             Accepts an optional C{indent} argument so that this string can be embedded 
 581             in a nested display of other data.""" 
 582          out = [] 
 583          out.append( indent+_ustr(self.asList()) ) 
 584          keys = self.items() 
 585          keys.sort() 
 586          for k,v in keys: 
 587              if out: 
 588                  out.append('\n') 
 589              out.append( "%s%s- %s: " % (indent,('  '*depth), k) ) 
 590              if isinstance(v,ParseResults): 
 591                  if v.keys(): 
 592                      out.append( v.dump(indent,depth+1) ) 
 593                  else: 
 594                      out.append(_ustr(v)) 
 595              else: 
 596                  out.append(_ustr(v)) 
 597          return "".join(out) 
  598   
 599       
 601          return ( self.__toklist, 
 602                   ( self.__tokdict.copy(), 
 603                     self.__parent is not None and self.__parent() or None, 
 604                     self.__accumNames, 
 605                     self.__name ) ) 
  606   
 608          self.__toklist = state[0] 
 609          self.__tokdict, \ 
 610          par, \ 
 611          inAccumNames, \ 
 612          self.__name = state[1] 
 613          self.__accumNames = {} 
 614          self.__accumNames.update(inAccumNames) 
 615          if par is not None: 
 616              self.__parent = wkref(par) 
 617          else: 
 618              self.__parent = None 
  619   
  622   
 624      """Returns current column within a string, counting newlines as line separators. 
 625     The first column is number 1. 
 626   
 627     Note: the default parsing behavior is to expand tabs in the input string 
 628     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 629     on parsing strings containing <TAB>s, and suggested methods to maintain a 
 630     consistent view of the parsed string, the parse location, and line and column 
 631     positions within the parsed string. 
 632     """ 
 633      return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) 
  634   
 636      """Returns current line number within a string, counting newlines as line separators. 
 637     The first line is number 1. 
 638   
 639     Note: the default parsing behavior is to expand tabs in the input string 
 640     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 641     on parsing strings containing <TAB>s, and suggested methods to maintain a 
 642     consistent view of the parsed string, the parse location, and line and column 
 643     positions within the parsed string. 
 644     """ 
 645      return strg.count("\n",0,loc) + 1 
  646   
 647 -def line( loc, strg ): 
  648      """Returns the line of text containing loc within a string, counting newlines as line separators. 
 649         """ 
 650      lastCR = strg.rfind("\n", 0, loc) 
 651      nextCR = strg.find("\n", loc) 
 652      if nextCR >= 0: 
 653          return strg[lastCR+1:nextCR] 
 654      else: 
 655          return strg[lastCR+1:] 
  656   
 658      print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 
  659   
 661      print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 
  662   
 664      print ("Exception raised:" + _ustr(exc)) 
  665   
 667      """'Do-nothing' debug action, to suppress debugging output during parsing.""" 
 668      pass 
  669   
 671      """Abstract base level parser element class.""" 
 672      DEFAULT_WHITE_CHARS = " \n\t\r" 
 673      verbose_stacktrace = False 
 674   
 679      setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 
 680   
 682          self.parseAction = list() 
 683          self.failAction = None 
 684           
 685          self.strRepr = None 
 686          self.resultsName = None 
 687          self.saveAsList = savelist 
 688          self.skipWhitespace = True 
 689          self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
 690          self.copyDefaultWhiteChars = True 
 691          self.mayReturnEmpty = False  
 692          self.keepTabs = False 
 693          self.ignoreExprs = list() 
 694          self.debug = False 
 695          self.streamlined = False 
 696          self.mayIndexError = True  
 697          self.errmsg = "" 
 698          self.modalResults = True  
 699          self.debugActions = ( None, None, None )  
 700          self.re = None 
 701          self.callPreparse = True  
 702          self.callDuringTry = False 
  703   
 705          """Make a copy of this C{ParserElement}.  Useful for defining different parse actions 
 706             for the same parsing pattern, using copies of the original parse element.""" 
 707          cpy = copy.copy( self ) 
 708          cpy.parseAction = self.parseAction[:] 
 709          cpy.ignoreExprs = self.ignoreExprs[:] 
 710          if self.copyDefaultWhiteChars: 
 711              cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
 712          return cpy 
  713   
 715          """Define name for this expression, for use in debugging.""" 
 716          self.name = name 
 717          self.errmsg = "Expected " + self.name 
 718          if hasattr(self,"exception"): 
 719              self.exception.msg = self.errmsg 
 720          return self 
  721   
 723          """Define name for referencing matching tokens as a nested attribute 
 724             of the returned parse results. 
 725             NOTE: this returns a *copy* of the original C{ParserElement} object; 
 726             this is so that the client can define a basic element, such as an 
 727             integer, and reference it in multiple places with different names. 
 728              
 729             You can also set results names using the abbreviated syntax, 
 730             C{expr("name")} in place of C{expr.setResultsName("name")} -  
 731             see L{I{__call__}<__call__>}. 
 732          """ 
 733          newself = self.copy() 
 734          newself.resultsName = name 
 735          newself.modalResults = not listAllMatches 
 736          return newself 
  737   
 739          """Method to invoke the Python pdb debugger when this element is 
 740             about to be parsed. Set C{breakFlag} to True to enable, False to 
 741             disable. 
 742          """ 
 743          if breakFlag: 
 744              _parseMethod = self._parse 
 745              def breaker(instring, loc, doActions=True, callPreParse=True): 
 746                  import pdb 
 747                  pdb.set_trace() 
 748                  return _parseMethod( instring, loc, doActions, callPreParse ) 
  749              breaker._originalParseMethod = _parseMethod 
 750              self._parse = breaker 
 751          else: 
 752              if hasattr(self._parse,"_originalParseMethod"): 
 753                  self._parse = self._parse._originalParseMethod 
 754          return self 
  755   
 757          """Internal method used to decorate parse actions that take fewer than 3 arguments, 
 758             so that all parse actions can be called as C{f(s,l,t)}.""" 
 759          STAR_ARGS = 4 
 760   
 761           
 762          if (f in singleArgBuiltins): 
 763              numargs = 1 
 764          else: 
 765                  try: 
 766                      restore = None 
 767                      if isinstance(f,type): 
 768                          restore = f 
 769                          f = f.__init__ 
 770                      if not _PY3K: 
 771                          codeObj = f.func_code 
 772                      else: 
 773                          codeObj = f.code 
 774                      if codeObj.co_flags & STAR_ARGS: 
 775                          return f 
 776                      numargs = codeObj.co_argcount 
 777                      if not _PY3K: 
 778                          if hasattr(f,"im_self"): 
 779                              numargs -= 1 
 780                      else: 
 781                          if hasattr(f,"__self__"): 
 782                              numargs -= 1 
 783                      if restore: 
 784                          f = restore 
 785                  except AttributeError: 
 786                      try: 
 787                          if not _PY3K: 
 788                              call_im_func_code = f.__call__.im_func.func_code 
 789                          else: 
 790                              call_im_func_code = f.__code__ 
 791           
 792                           
 793                           
 794                          if call_im_func_code.co_flags & STAR_ARGS: 
 795                              return f 
 796                          numargs = call_im_func_code.co_argcount 
 797                          if not _PY3K: 
 798                              if hasattr(f.__call__,"im_self"): 
 799                                  numargs -= 1 
 800                          else: 
 801                              if hasattr(f.__call__,"__self__"): 
 802                                  numargs -= 0 
 803                      except AttributeError: 
 804                          if not _PY3K: 
 805                              call_func_code = f.__call__.func_code 
 806                          else: 
 807                              call_func_code = f.__call__.__code__ 
 808                           
 809                          if call_func_code.co_flags & STAR_ARGS: 
 810                              return f 
 811                          numargs = call_func_code.co_argcount 
 812                          if not _PY3K: 
 813                              if hasattr(f.__call__,"im_self"): 
 814                                  numargs -= 1 
 815                          else: 
 816                              if hasattr(f.__call__,"__self__"): 
 817                                  numargs -= 1 
 818   
 819   
 820           
 821          if numargs == 3: 
 822              return f 
 823          else: 
 824              if numargs > 3: 
 825                  def tmp(s,l,t): 
 826                      return f(f.__call__.__self__, s,l,t) 
  827              if numargs == 2: 
 828                  def tmp(s,l,t): 
 829                      return f(l,t) 
 830              elif numargs == 1: 
 831                  def tmp(s,l,t): 
 832                      return f(t) 
 833              else:  
 834                  def tmp(s,l,t): 
 835                      return f() 
 836              try: 
 837                  tmp.__name__ = f.__name__ 
 838              except (AttributeError,TypeError): 
 839                   
 840                  pass 
 841              try: 
 842                  tmp.__doc__ = f.__doc__ 
 843              except (AttributeError,TypeError): 
 844                   
 845                  pass 
 846              try: 
 847                  tmp.__dict__.update(f.__dict__) 
 848              except (AttributeError,TypeError): 
 849                   
 850                  pass 
 851              return tmp 
 852      _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 
 853   
 855          """Define action to perform when successfully matching parse element definition. 
 856             Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 
 857             C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 
 858              - s   = the original string being parsed (see note below) 
 859              - loc = the location of the matching substring 
 860              - toks = a list of the matched tokens, packaged as a ParseResults object 
 861             If the functions in fns modify the tokens, they can return them as the return 
 862             value from fn, and the modified list of tokens will replace the original. 
 863             Otherwise, fn does not need to return any value. 
 864   
 865             Note: the default parsing behavior is to expand tabs in the input string 
 866             before starting the parsing process.  See L{I{parseString}<parseString>} for more information 
 867             on parsing strings containing <TAB>s, and suggested methods to maintain a 
 868             consistent view of the parsed string, the parse location, and line and column 
 869             positions within the parsed string. 
 870             """ 
 871          self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 
 872          self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 
 873          return self 
  874   
 876          """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 
 877          self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 
 878          self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 
 879          return self 
  880   
 882          """Define action to perform if parsing fails at this expression. 
 883             Fail acton fn is a callable function that takes the arguments 
 884             C{fn(s,loc,expr,err)} where: 
 885              - s = string being parsed 
 886              - loc = location where expression match was attempted and failed 
 887              - expr = the parse expression that failed 
 888              - err = the exception thrown 
 889             The function returns no value.  It may throw C{ParseFatalException} 
 890             if it is desired to stop parsing immediately.""" 
 891          self.failAction = fn 
 892          return self 
  893   
 895          exprsFound = True 
 896          while exprsFound: 
 897              exprsFound = False 
 898              for e in self.ignoreExprs: 
 899                  try: 
 900                      while 1: 
 901                          loc,dummy = e._parse( instring, loc ) 
 902                          exprsFound = True 
 903                  except ParseException: 
 904                      pass 
 905          return loc 
  906   
 908          if self.ignoreExprs: 
 909              loc = self._skipIgnorables( instring, loc ) 
 910   
 911          if self.skipWhitespace: 
 912              wt = self.whiteChars 
 913              instrlen = len(instring) 
 914              while loc < instrlen and instring[loc] in wt: 
 915                  loc += 1 
 916   
 917          return loc 
  918   
 919 -    def parseImpl( self, instring, loc, doActions=True ): 
  921   
 922 -    def postParse( self, instring, loc, tokenlist ): 
  924   
 925       
 926 -    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 
  927          debugging = ( self.debug )  
 928   
 929          if debugging or self.failAction: 
 930               
 931              if (self.debugActions[0] ): 
 932                  self.debugActions[0]( instring, loc, self ) 
 933              if callPreParse and self.callPreparse: 
 934                  preloc = self.preParse( instring, loc ) 
 935              else: 
 936                  preloc = loc 
 937              tokensStart = preloc 
 938              try: 
 939                  try: 
 940                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 941                  except IndexError: 
 942                      raise ParseException( instring, len(instring), self.errmsg, self ) 
 943              except ParseBaseException: 
 944                   
 945                  err = None 
 946                  if self.debugActions[2]: 
 947                      err = sys.exc_info()[1] 
 948                      self.debugActions[2]( instring, tokensStart, self, err ) 
 949                  if self.failAction: 
 950                      if err is None: 
 951                          err = sys.exc_info()[1] 
 952                      self.failAction( instring, tokensStart, self, err ) 
 953                  raise 
 954          else: 
 955              if callPreParse and self.callPreparse: 
 956                  preloc = self.preParse( instring, loc ) 
 957              else: 
 958                  preloc = loc 
 959              tokensStart = preloc 
 960              if self.mayIndexError or loc >= len(instring): 
 961                  try: 
 962                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 963                  except IndexError: 
 964                      raise ParseException( instring, len(instring), self.errmsg, self ) 
 965              else: 
 966                  loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 967   
 968          tokens = self.postParse( instring, loc, tokens ) 
 969   
 970          retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 
 971          if self.parseAction and (doActions or self.callDuringTry): 
 972              if debugging: 
 973                  try: 
 974                      for fn in self.parseAction: 
 975                          tokens = fn( instring, tokensStart, retTokens ) 
 976                          if tokens is not None: 
 977                              retTokens = ParseResults( tokens, 
 978                                                        self.resultsName, 
 979                                                        asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
 980                                                        modal=self.modalResults ) 
 981                  except ParseBaseException: 
 982                       
 983                      if (self.debugActions[2] ): 
 984                          err = sys.exc_info()[1] 
 985                          self.debugActions[2]( instring, tokensStart, self, err ) 
 986                      raise 
 987              else: 
 988                  for fn in self.parseAction: 
 989                      tokens = fn( instring, tokensStart, retTokens ) 
 990                      if tokens is not None: 
 991                          retTokens = ParseResults( tokens, 
 992                                                    self.resultsName, 
 993                                                    asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
 994                                                    modal=self.modalResults ) 
 995   
 996          if debugging: 
 997               
 998              if (self.debugActions[1] ): 
 999                  self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 
1000   
1001          return loc, retTokens 
 1002   
1008   
1009       
1010       
1011 -    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 
 1027   
1028      _parse = _parseNoCache 
1029   
1030       
1031      _exprArgCache = {} 
1034      resetCache = staticmethod(resetCache) 
1035   
1036      _packratEnabled = False 
1038          """Enables "packrat" parsing, which adds memoizing to the parsing logic. 
1039             Repeated parse attempts at the same string location (which happens 
1040             often in many complex grammars) can immediately return a cached value, 
1041             instead of re-executing parsing/validating code.  Memoizing is done of 
1042             both valid results and parsing exceptions. 
1043   
1044             This speedup may break existing programs that use parse actions that 
1045             have side-effects.  For this reason, packrat parsing is disabled when 
1046             you first import pyparsing.  To activate the packrat feature, your 
1047             program must call the class method C{ParserElement.enablePackrat()}.  If 
1048             your program uses C{psyco} to "compile as you go", you must call 
1049             C{enablePackrat} before calling C{psyco.full()}.  If you do not do this, 
1050             Python will crash.  For best results, call C{enablePackrat()} immediately 
1051             after importing pyparsing. 
1052          """ 
1053          if not ParserElement._packratEnabled: 
1054              ParserElement._packratEnabled = True 
1055              ParserElement._parse = ParserElement._parseCache 
 1056      enablePackrat = staticmethod(enablePackrat) 
1057   
1059          """Execute the parse expression with the given string. 
1060             This is the main interface to the client code, once the complete 
1061             expression has been built. 
1062   
1063             If you want the grammar to require that the entire input string be 
1064             successfully parsed, then set C{parseAll} to True (equivalent to ending 
1065             the grammar with C{StringEnd()}). 
1066   
1067             Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 
1068             in order to report proper column numbers in parse actions. 
1069             If the input string contains tabs and 
1070             the grammar uses parse actions that use the C{loc} argument to index into the 
1071             string being parsed, you can ensure you have a consistent view of the input 
1072             string by: 
1073              - calling C{parseWithTabs} on your grammar before calling C{parseString} 
1074                (see L{I{parseWithTabs}<parseWithTabs>}) 
1075              - define your parse action using the full C{(s,loc,toks)} signature, and 
1076                reference the input string using the parse action's C{s} argument 
1077              - explictly expand the tabs in your input string before calling 
1078                C{parseString} 
1079          """ 
1080          ParserElement.resetCache() 
1081          if not self.streamlined: 
1082              self.streamline() 
1083               
1084          for e in self.ignoreExprs: 
1085              e.streamline() 
1086          if not self.keepTabs: 
1087              instring = instring.expandtabs() 
1088          try: 
1089              loc, tokens = self._parse( instring, 0 ) 
1090              if parseAll: 
1091                   
1092                  se = StringEnd() 
1093                  se._parse( instring, loc ) 
1094          except ParseBaseException: 
1095              if ParserElement.verbose_stacktrace: 
1096                  raise 
1097              else: 
1098                   
1099                  exc = sys.exc_info()[1] 
1100                  raise exc 
1101          else: 
1102              return tokens 
 1103   
1105          """Scan the input string for expression matches.  Each match will return the 
1106             matching tokens, start location, and end location.  May be called with optional 
1107             C{maxMatches} argument, to clip scanning after 'n' matches are found. 
1108   
1109             Note that the start and end locations are reported relative to the string 
1110             being parsed.  See L{I{parseString}<parseString>} for more information on parsing 
1111             strings with embedded tabs.""" 
1112          if not self.streamlined: 
1113              self.streamline() 
1114          for e in self.ignoreExprs: 
1115              e.streamline() 
1116   
1117          if not self.keepTabs: 
1118              instring = _ustr(instring).expandtabs() 
1119          instrlen = len(instring) 
1120          loc = 0 
1121          preparseFn = self.preParse 
1122          parseFn = self._parse 
1123          ParserElement.resetCache() 
1124          matches = 0 
1125          try: 
1126              while loc <= instrlen and matches < maxMatches: 
1127                  try: 
1128                      preloc = preparseFn( instring, loc ) 
1129                      nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 
1130                  except ParseException: 
1131                      loc = preloc+1 
1132                  else: 
1133                      if nextLoc > loc: 
1134                          matches += 1 
1135                          yield tokens, preloc, nextLoc 
1136                          loc = nextLoc 
1137                      else: 
1138                          loc = preloc+1 
1139          except ParseBaseException: 
1140              if ParserElement.verbose_stacktrace: 
1141                  raise 
1142              else: 
1143                   
1144                  exc = sys.exc_info()[1] 
1145                  raise exc 
 1146   
1179   
1181          """Another extension to C{scanString}, simplifying the access to the tokens found 
1182             to match the given parse expression.  May be called with optional 
1183             C{maxMatches} argument, to clip searching after 'n' matches are found. 
1184          """ 
1185          try: 
1186              return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 
1187          except ParseBaseException: 
1188              if ParserElement.verbose_stacktrace: 
1189                  raise 
1190              else: 
1191                   
1192                  exc = sys.exc_info()[1] 
1193                  raise exc 
 1194   
1196          """Implementation of + operator - returns And""" 
1197          if isinstance( other, basestring ): 
1198              other = Literal( other ) 
1199          if not isinstance( other, ParserElement ): 
1200              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1201                      SyntaxWarning, stacklevel=2) 
1202              return None 
1203          return And( [ self, other ] ) 
 1204   
1206          """Implementation of + operator when left operand is not a C{ParserElement}""" 
1207          if isinstance( other, basestring ): 
1208              other = Literal( other ) 
1209          if not isinstance( other, ParserElement ): 
1210              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1211                      SyntaxWarning, stacklevel=2) 
1212              return None 
1213          return other + self 
 1214   
1216          """Implementation of - operator, returns C{And} with error stop""" 
1217          if isinstance( other, basestring ): 
1218              other = Literal( other ) 
1219          if not isinstance( other, ParserElement ): 
1220              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1221                      SyntaxWarning, stacklevel=2) 
1222              return None 
1223          return And( [ self, And._ErrorStop(), other ] ) 
 1224   
1226          """Implementation of - operator when left operand is not a C{ParserElement}""" 
1227          if isinstance( other, basestring ): 
1228              other = Literal( other ) 
1229          if not isinstance( other, ParserElement ): 
1230              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1231                      SyntaxWarning, stacklevel=2) 
1232              return None 
1233          return other - self 
 1234   
1236          """Implementation of * operator, allows use of C{expr * 3} in place of 
1237             C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer 
1238             tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples 
1239             may also include C{None} as in: 
1240              - C{expr*(n,None)} or C{expr*(n,)} is equivalent 
1241                to C{expr*n + ZeroOrMore(expr)} 
1242                (read as "at least n instances of C{expr}") 
1243              - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 
1244                (read as "0 to n instances of C{expr}") 
1245              - C{expr*(None,None)} is equivalent to C{ZeroOrMore(expr)} 
1246              - C{expr*(1,None)} is equivalent to C{OneOrMore(expr)} 
1247   
1248             Note that C{expr*(None,n)} does not raise an exception if 
1249             more than n exprs exist in the input stream; that is, 
1250             C{expr*(None,n)} does not enforce a maximum number of expr 
1251             occurrences.  If this behavior is desired, then write 
1252             C{expr*(None,n) + ~expr} 
1253   
1254          """ 
1255          if isinstance(other,int): 
1256              minElements, optElements = other,0 
1257          elif isinstance(other,tuple): 
1258              other = (other + (None, None))[:2] 
1259              if other[0] is None: 
1260                  other = (0, other[1]) 
1261              if isinstance(other[0],int) and other[1] is None: 
1262                  if other[0] == 0: 
1263                      return ZeroOrMore(self) 
1264                  if other[0] == 1: 
1265                      return OneOrMore(self) 
1266                  else: 
1267                      return self*other[0] + ZeroOrMore(self) 
1268              elif isinstance(other[0],int) and isinstance(other[1],int): 
1269                  minElements, optElements = other 
1270                  optElements -= minElements 
1271              else: 
1272                  raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 
1273          else: 
1274              raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 
1275   
1276          if minElements < 0: 
1277              raise ValueError("cannot multiply ParserElement by negative value") 
1278          if optElements < 0: 
1279              raise ValueError("second tuple value must be greater or equal to first tuple value") 
1280          if minElements == optElements == 0: 
1281              raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 
1282   
1283          if (optElements): 
1284              def makeOptionalList(n): 
1285                  if n>1: 
1286                      return Optional(self + makeOptionalList(n-1)) 
1287                  else: 
1288                      return Optional(self) 
 1289              if minElements: 
1290                  if minElements == 1: 
1291                      ret = self + makeOptionalList(optElements) 
1292                  else: 
1293                      ret = And([self]*minElements) + makeOptionalList(optElements) 
1294              else: 
1295                  ret = makeOptionalList(optElements) 
1296          else: 
1297              if minElements == 1: 
1298                  ret = self 
1299              else: 
1300                  ret = And([self]*minElements) 
1301          return ret 
1302   
1305   
1307          """Implementation of | operator - returns C{MatchFirst}""" 
1308          if isinstance( other, basestring ): 
1309              other = Literal( other ) 
1310          if not isinstance( other, ParserElement ): 
1311              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1312                      SyntaxWarning, stacklevel=2) 
1313              return None 
1314          return MatchFirst( [ self, other ] ) 
 1315   
1317          """Implementation of | operator when left operand is not a C{ParserElement}""" 
1318          if isinstance( other, basestring ): 
1319              other = Literal( other ) 
1320          if not isinstance( other, ParserElement ): 
1321              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1322                      SyntaxWarning, stacklevel=2) 
1323              return None 
1324          return other | self 
 1325   
1327          """Implementation of ^ operator - returns C{Or}""" 
1328          if isinstance( other, basestring ): 
1329              other = Literal( other ) 
1330          if not isinstance( other, ParserElement ): 
1331              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1332                      SyntaxWarning, stacklevel=2) 
1333              return None 
1334          return Or( [ self, other ] ) 
 1335   
1337          """Implementation of ^ operator when left operand is not a C{ParserElement}""" 
1338          if isinstance( other, basestring ): 
1339              other = Literal( other ) 
1340          if not isinstance( other, ParserElement ): 
1341              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1342                      SyntaxWarning, stacklevel=2) 
1343              return None 
1344          return other ^ self 
 1345   
1347          """Implementation of & operator - returns C{Each}""" 
1348          if isinstance( other, basestring ): 
1349              other = Literal( other ) 
1350          if not isinstance( other, ParserElement ): 
1351              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1352                      SyntaxWarning, stacklevel=2) 
1353              return None 
1354          return Each( [ self, other ] ) 
 1355   
1357          """Implementation of & operator when left operand is not a C{ParserElement}""" 
1358          if isinstance( other, basestring ): 
1359              other = Literal( other ) 
1360          if not isinstance( other, ParserElement ): 
1361              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1362                      SyntaxWarning, stacklevel=2) 
1363              return None 
1364          return other & self 
 1365   
1367          """Implementation of ~ operator - returns C{NotAny}""" 
1368          return NotAny( self ) 
 1369   
1371          """Shortcut for C{setResultsName}, with C{listAllMatches=default}:: 
1372               userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 
1373             could be written as:: 
1374               userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 
1375             """ 
1376          return self.setResultsName(name) 
 1377   
1379          """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 
1380             cluttering up returned output. 
1381          """ 
1382          return Suppress( self ) 
 1383   
1385          """Disables the skipping of whitespace before matching the characters in the 
1386             C{ParserElement}'s defined pattern.  This is normally only used internally by 
1387             the pyparsing module, but may be needed in some whitespace-sensitive grammars. 
1388          """ 
1389          self.skipWhitespace = False 
1390          return self 
 1391   
1393          """Overrides the default whitespace chars 
1394          """ 
1395          self.skipWhitespace = True 
1396          self.whiteChars = chars 
1397          self.copyDefaultWhiteChars = False 
1398          return self 
 1399   
1401          """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 
1402             Must be called before C{parseString} when the input grammar contains elements that 
1403             match <TAB> characters.""" 
1404          self.keepTabs = True 
1405          return self 
 1406   
1408          """Define expression to be ignored (e.g., comments) while doing pattern 
1409             matching; may be called repeatedly, to define multiple comment or other 
1410             ignorable patterns. 
1411          """ 
1412          if isinstance( other, Suppress ): 
1413              if other not in self.ignoreExprs: 
1414                  self.ignoreExprs.append( other.copy() ) 
1415          else: 
1416              self.ignoreExprs.append( Suppress( other.copy() ) ) 
1417          return self 
 1418   
1419 -    def setDebugActions( self, startAction, successAction, exceptionAction ): 
 1420          """Enable display of debugging messages while doing pattern matching.""" 
1421          self.debugActions = (startAction or _defaultStartDebugAction, 
1422                               successAction or _defaultSuccessDebugAction, 
1423                               exceptionAction or _defaultExceptionDebugAction) 
1424          self.debug = True 
1425          return self 
 1426   
1428          """Enable display of debugging messages while doing pattern matching. 
1429             Set C{flag} to True to enable, False to disable.""" 
1430          if flag: 
1431              self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 
1432          else: 
1433              self.debug = False 
1434          return self 
 1435   
1438   
1441   
1443          self.streamlined = True 
1444          self.strRepr = None 
1445          return self 
 1446   
1449   
1450 -    def validate( self, validateTrace=[] ): 
 1451          """Check defined expressions for valid structure, check for infinite recursive definitions.""" 
1452          self.checkRecursion( [] ) 
 1453   
1454 -    def parseFile( self, file_or_filename, parseAll=False ): 
 1455          """Execute the parse expression on the given file or filename. 
1456             If a filename is specified (instead of a file object), 
1457             the entire file is opened, read, and closed before parsing. 
1458          """ 
1459          try: 
1460              file_contents = file_or_filename.read() 
1461          except AttributeError: 
1462              f = open(file_or_filename, "rb") 
1463              file_contents = f.read() 
1464              f.close() 
1465          try: 
1466              return self.parseString(file_contents, parseAll) 
1467          except ParseBaseException: 
1468               
1469              exc = sys.exc_info()[1] 
1470              raise exc 
 1471   
1474   
1476          if aname == "myException": 
1477              self.myException = ret = self.getException(); 
1478              return ret; 
1479          else: 
1480              raise AttributeError("no such attribute " + aname) 
 1481   
1483          if isinstance(other, ParserElement): 
1484              return self is other or self.__dict__ == other.__dict__ 
1485          elif isinstance(other, basestring): 
1486              try: 
1487                  self.parseString(_ustr(other), parseAll=True) 
1488                  return True 
1489              except ParseBaseException: 
1490                  return False 
1491          else: 
1492              return super(ParserElement,self)==other 
 1493   
1495          return not (self == other) 
 1496   
1498          return hash(id(self)) 
 1499   
1501          return self == other 
 1502   
1504          return not (self == other) 
 1505   
1506   
1507 -class Token(ParserElement): 
 1508      """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" 
1511           
1512   
1514          s = super(Token,self).setName(name) 
1515          self.errmsg = "Expected " + self.name 
1516           
1517          return s 
  1518   
1519   
1521      """An empty token, will always match.""" 
1523          super(Empty,self).__init__() 
1524          self.name = "Empty" 
1525          self.mayReturnEmpty = True 
1526          self.mayIndexError = False 
  1527   
1528   
1530      """A token that will never match.""" 
1532          super(NoMatch,self).__init__() 
1533          self.name = "NoMatch" 
1534          self.mayReturnEmpty = True 
1535          self.mayIndexError = False 
1536          self.errmsg = "Unmatchable token" 
 1537           
1538   
1539 -    def parseImpl( self, instring, loc, doActions=True ): 
 1540          exc = self.myException 
1541          exc.loc = loc 
1542          exc.pstr = instring 
1543          raise exc 
  1544   
1545   
1547      """Token to exactly match a specified string.""" 
1549          super(Literal,self).__init__() 
1550          self.match = matchString 
1551          self.matchLen = len(matchString) 
1552          try: 
1553              self.firstMatchChar = matchString[0] 
1554          except IndexError: 
1555              warnings.warn("null string passed to Literal; use Empty() instead", 
1556                              SyntaxWarning, stacklevel=2) 
1557              self.__class__ = Empty 
1558          self.name = '"%s"' % _ustr(self.match) 
1559          self.errmsg = "Expected " + self.name 
1560          self.mayReturnEmpty = False 
1561           
1562          self.mayIndexError = False 
 1563   
1564       
1565       
1566       
1567       
1568 -    def parseImpl( self, instring, loc, doActions=True ): 
 1569          if (instring[loc] == self.firstMatchChar and 
1570              (self.matchLen==1 or instring.startswith(self.match,loc)) ): 
1571              return loc+self.matchLen, self.match 
1572           
1573          exc = self.myException 
1574          exc.loc = loc 
1575          exc.pstr = instring 
1576          raise exc 
  1577  _L = Literal 
1578   
1580      """Token to exactly match a specified string as a keyword, that is, it must be 
1581         immediately followed by a non-keyword character.  Compare with C{Literal}:: 
1582           Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 
1583           Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 
1584         Accepts two optional constructor arguments in addition to the keyword string: 
1585         C{identChars} is a string of characters that would be valid identifier characters, 
1586         defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 
1587         matching, default is False. 
1588      """ 
1589      DEFAULT_KEYWORD_CHARS = alphanums+"_$" 
1590   
1592          super(Keyword,self).__init__() 
1593          self.match = matchString 
1594          self.matchLen = len(matchString) 
1595          try: 
1596              self.firstMatchChar = matchString[0] 
1597          except IndexError: 
1598              warnings.warn("null string passed to Keyword; use Empty() instead", 
1599                              SyntaxWarning, stacklevel=2) 
1600          self.name = '"%s"' % self.match 
1601          self.errmsg = "Expected " + self.name 
1602          self.mayReturnEmpty = False 
1603           
1604          self.mayIndexError = False 
1605          self.caseless = caseless 
1606          if caseless: 
1607              self.caselessmatch = matchString.upper() 
1608              identChars = identChars.upper() 
1609          self.identChars = _str2dict(identChars) 
 1610   
1611 -    def parseImpl( self, instring, loc, doActions=True ): 
 1612          if self.caseless: 
1613              if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
1614                   (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 
1615                   (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 
1616                  return loc+self.matchLen, self.match 
1617          else: 
1618              if (instring[loc] == self.firstMatchChar and 
1619                  (self.matchLen==1 or instring.startswith(self.match,loc)) and 
1620                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 
1621                  (loc == 0 or instring[loc-1] not in self.identChars) ): 
1622                  return loc+self.matchLen, self.match 
1623           
1624          exc = self.myException 
1625          exc.loc = loc 
1626          exc.pstr = instring 
1627          raise exc 
 1628   
1633   
1638      setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) 
 1639   
1641      """Token to match a specified string, ignoring case of letters. 
1642         Note: the matched results will always be in the case of the given 
1643         match string, NOT the case of the input text. 
1644      """ 
1646          super(CaselessLiteral,self).__init__( matchString.upper() ) 
1647           
1648          self.returnString = matchString 
1649          self.name = "'%s'" % self.returnString 
1650          self.errmsg = "Expected " + self.name 
 1651           
1652   
1653 -    def parseImpl( self, instring, loc, doActions=True ): 
 1654          if instring[ loc:loc+self.matchLen ].upper() == self.match: 
1655              return loc+self.matchLen, self.returnString 
1656           
1657          exc = self.myException 
1658          exc.loc = loc 
1659          exc.pstr = instring 
1660          raise exc 
  1661   
1665   
1666 -    def parseImpl( self, instring, loc, doActions=True ): 
 1667          if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
1668               (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 
1669              return loc+self.matchLen, self.match 
1670           
1671          exc = self.myException 
1672          exc.loc = loc 
1673          exc.pstr = instring 
1674          raise exc 
  1675   
1677      """Token for matching words composed of allowed character sets. 
1678         Defined with string containing all allowed initial characters, 
1679         an optional string containing allowed body characters (if omitted, 
1680         defaults to the initial character set), and an optional minimum, 
1681         maximum, and/or exact length.  The default value for C{min} is 1 (a 
1682         minimum value < 1 is not valid); the default values for C{max} and C{exact} 
1683         are 0, meaning no maximum or exact length restriction. 
1684      """ 
1685 -    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): 
 1686          super(Word,self).__init__() 
1687          self.initCharsOrig = initChars 
1688          self.initChars = _str2dict(initChars) 
1689          if bodyChars : 
1690              self.bodyCharsOrig = bodyChars 
1691              self.bodyChars = _str2dict(bodyChars) 
1692          else: 
1693              self.bodyCharsOrig = initChars 
1694              self.bodyChars = _str2dict(initChars) 
1695   
1696          self.maxSpecified = max > 0 
1697   
1698          if min < 1: 
1699              raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 
1700   
1701          self.minLen = min 
1702   
1703          if max > 0: 
1704              self.maxLen = max 
1705          else: 
1706              self.maxLen = _MAX_INT 
1707   
1708          if exact > 0: 
1709              self.maxLen = exact 
1710              self.minLen = exact 
1711   
1712          self.name = _ustr(self) 
1713          self.errmsg = "Expected " + self.name 
1714           
1715          self.mayIndexError = False 
1716          self.asKeyword = asKeyword 
1717   
1718          if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 
1719              if self.bodyCharsOrig == self.initCharsOrig: 
1720                  self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 
1721              elif len(self.bodyCharsOrig) == 1: 
1722                  self.reString = "%s[%s]*" % \ 
1723                                        (re.escape(self.initCharsOrig), 
1724                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
1725              else: 
1726                  self.reString = "[%s][%s]*" % \ 
1727                                        (_escapeRegexRangeChars(self.initCharsOrig), 
1728                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
1729              if self.asKeyword: 
1730                  self.reString = r"\b"+self.reString+r"\b" 
1731              try: 
1732                  self.re = re.compile( self.reString ) 
1733              except: 
1734                  self.re = None 
 1735   
1736 -    def parseImpl( self, instring, loc, doActions=True ): 
 1737          if self.re: 
1738              result = self.re.match(instring,loc) 
1739              if not result: 
1740                  exc = self.myException 
1741                  exc.loc = loc 
1742                  exc.pstr = instring 
1743                  raise exc 
1744   
1745              loc = result.end() 
1746              return loc,result.group() 
1747   
1748          if not(instring[ loc ] in self.initChars): 
1749               
1750              exc = self.myException 
1751              exc.loc = loc 
1752              exc.pstr = instring 
1753              raise exc 
1754          start = loc 
1755          loc += 1 
1756          instrlen = len(instring) 
1757          bodychars = self.bodyChars 
1758          maxloc = start + self.maxLen 
1759          maxloc = min( maxloc, instrlen ) 
1760          while loc < maxloc and instring[loc] in bodychars: 
1761              loc += 1 
1762   
1763          throwException = False 
1764          if loc - start < self.minLen: 
1765              throwException = True 
1766          if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 
1767              throwException = True 
1768          if self.asKeyword: 
1769              if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 
1770                  throwException = True 
1771   
1772          if throwException: 
1773               
1774              exc = self.myException 
1775              exc.loc = loc 
1776              exc.pstr = instring 
1777              raise exc 
1778   
1779          return loc, instring[start:loc] 
 1780   
1782          try: 
1783              return super(Word,self).__str__() 
1784          except: 
1785              pass 
1786   
1787   
1788          if self.strRepr is None: 
1789   
1790              def charsAsStr(s): 
1791                  if len(s)>4: 
1792                      return s[:4]+"..." 
1793                  else: 
1794                      return s 
 1795   
1796              if ( self.initCharsOrig != self.bodyCharsOrig ): 
1797                  self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 
1798              else: 
1799                  self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 
1800   
1801          return self.strRepr 
 1802   
1803   
1805      """Token for matching strings that match a given regular expression. 
1806         Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 
1807      """ 
1808      compiledREtype = type(re.compile("[A-Z]")) 
1809 -    def __init__( self, pattern, flags=0): 
 1810          """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 
1811          super(Regex,self).__init__() 
1812   
1813          if isinstance(pattern, basestring): 
1814                  if len(pattern) == 0: 
1815                      warnings.warn("null string passed to Regex; use Empty() instead", 
1816                              SyntaxWarning, stacklevel=2) 
1817           
1818                  self.pattern = pattern 
1819                  self.flags = flags 
1820           
1821                  try: 
1822                      self.re = re.compile(self.pattern, self.flags) 
1823                      self.reString = self.pattern 
1824                  except sre_constants.error: 
1825                      warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 
1826                          SyntaxWarning, stacklevel=2) 
1827                      raise 
1828   
1829          elif isinstance(pattern, Regex.compiledREtype): 
1830              self.re = pattern 
1831              self.pattern = \ 
1832              self.reString = str(pattern) 
1833              self.flags = flags 
1834               
1835          else: 
1836              raise ValueError("Regex may only be constructed with a string or a compiled RE object") 
1837   
1838          self.name = _ustr(self) 
1839          self.errmsg = "Expected " + self.name 
1840           
1841          self.mayIndexError = False 
1842          self.mayReturnEmpty = True 
 1843   
1844 -    def parseImpl( self, instring, loc, doActions=True ): 
 1845          result = self.re.match(instring,loc) 
1846          if not result: 
1847              exc = self.myException 
1848              exc.loc = loc 
1849              exc.pstr = instring 
1850              raise exc 
1851   
1852          loc = result.end() 
1853          d = result.groupdict() 
1854          ret = ParseResults(result.group()) 
1855          if d: 
1856              for k in d: 
1857                  ret[k] = d[k] 
1858          return loc,ret 
 1859   
1861          try: 
1862              return super(Regex,self).__str__() 
1863          except: 
1864              pass 
1865   
1866          if self.strRepr is None: 
1867              self.strRepr = "Re:(%s)" % repr(self.pattern) 
1868   
1869          return self.strRepr 
  1870   
1871   
1873      """Token for matching strings that are delimited by quoting characters. 
1874      """ 
1875 -    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): 
 1876          """ 
1877             Defined with the following parameters: 
1878              - quoteChar - string of one or more characters defining the quote delimiting string 
1879              - escChar - character to escape quotes, typically backslash (default=None) 
1880              - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 
1881              - multiline - boolean indicating whether quotes can span multiple lines (default=False) 
1882              - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 
1883              - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 
1884          """ 
1885          super(QuotedString,self).__init__() 
1886   
1887           
1888          quoteChar = quoteChar.strip() 
1889          if len(quoteChar) == 0: 
1890              warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
1891              raise SyntaxError() 
1892   
1893          if endQuoteChar is None: 
1894              endQuoteChar = quoteChar 
1895          else: 
1896              endQuoteChar = endQuoteChar.strip() 
1897              if len(endQuoteChar) == 0: 
1898                  warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
1899                  raise SyntaxError() 
1900   
1901          self.quoteChar = quoteChar 
1902          self.quoteCharLen = len(quoteChar) 
1903          self.firstQuoteChar = quoteChar[0] 
1904          self.endQuoteChar = endQuoteChar 
1905          self.endQuoteCharLen = len(endQuoteChar) 
1906          self.escChar = escChar 
1907          self.escQuote = escQuote 
1908          self.unquoteResults = unquoteResults 
1909   
1910          if multiline: 
1911              self.flags = re.MULTILINE | re.DOTALL 
1912              self.pattern = r'%s(?:[^%s%s]' % \ 
1913                  ( re.escape(self.quoteChar), 
1914                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
1915                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
1916          else: 
1917              self.flags = 0 
1918              self.pattern = r'%s(?:[^%s\n\r%s]' % \ 
1919                  ( re.escape(self.quoteChar), 
1920                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
1921                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
1922          if len(self.endQuoteChar) > 1: 
1923              self.pattern += ( 
1924                  '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 
1925                                                 _escapeRegexRangeChars(self.endQuoteChar[i])) 
1926                                      for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 
1927                  ) 
1928          if escQuote: 
1929              self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 
1930          if escChar: 
1931              self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 
1932              self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 
1933          self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 
1934   
1935          try: 
1936              self.re = re.compile(self.pattern, self.flags) 
1937              self.reString = self.pattern 
1938          except sre_constants.error: 
1939              warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 
1940                  SyntaxWarning, stacklevel=2) 
1941              raise 
1942   
1943          self.name = _ustr(self) 
1944          self.errmsg = "Expected " + self.name 
1945           
1946          self.mayIndexError = False 
1947          self.mayReturnEmpty = True 
 1948   
1949 -    def parseImpl( self, instring, loc, doActions=True ): 
 1950          result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 
1951          if not result: 
1952              exc = self.myException 
1953              exc.loc = loc 
1954              exc.pstr = instring 
1955              raise exc 
1956   
1957          loc = result.end() 
1958          ret = result.group() 
1959   
1960          if self.unquoteResults: 
1961   
1962               
1963              ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 
1964   
1965              if isinstance(ret,basestring): 
1966                   
1967                  if self.escChar: 
1968                      ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 
1969   
1970                   
1971                  if self.escQuote: 
1972                      ret = ret.replace(self.escQuote, self.endQuoteChar) 
1973   
1974          return loc, ret 
 1975   
1977          try: 
1978              return super(QuotedString,self).__str__() 
1979          except: 
1980              pass 
1981   
1982          if self.strRepr is None: 
1983              self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 
1984   
1985          return self.strRepr 
  1986   
1987   
1989      """Token for matching words composed of characters *not* in a given set. 
1990         Defined with string containing all disallowed characters, and an optional 
1991         minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a 
1992         minimum value < 1 is not valid); the default values for C{max} and C{exact} 
1993         are 0, meaning no maximum or exact length restriction. 
1994      """ 
1995 -    def __init__( self, notChars, min=1, max=0, exact=0 ): 
 1996          super(CharsNotIn,self).__init__() 
1997          self.skipWhitespace = False 
1998          self.notChars = notChars 
1999   
2000          if min < 1: 
2001              raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 
2002   
2003          self.minLen = min 
2004   
2005          if max > 0: 
2006              self.maxLen = max 
2007          else: 
2008              self.maxLen = _MAX_INT 
2009   
2010          if exact > 0: 
2011              self.maxLen = exact 
2012              self.minLen = exact 
2013   
2014          self.name = _ustr(self) 
2015          self.errmsg = "Expected " + self.name 
2016          self.mayReturnEmpty = ( self.minLen == 0 ) 
2017           
2018          self.mayIndexError = False 
 2019   
2020 -    def parseImpl( self, instring, loc, doActions=True ): 
 2021          if instring[loc] in self.notChars: 
2022               
2023              exc = self.myException 
2024              exc.loc = loc 
2025              exc.pstr = instring 
2026              raise exc 
2027   
2028          start = loc 
2029          loc += 1 
2030          notchars = self.notChars 
2031          maxlen = min( start+self.maxLen, len(instring) ) 
2032          while loc < maxlen and \ 
2033                (instring[loc] not in notchars): 
2034              loc += 1 
2035   
2036          if loc - start < self.minLen: 
2037               
2038              exc = self.myException 
2039              exc.loc = loc 
2040              exc.pstr = instring 
2041              raise exc 
2042   
2043          return loc, instring[start:loc] 
 2044   
2046          try: 
2047              return super(CharsNotIn, self).__str__() 
2048          except: 
2049              pass 
2050   
2051          if self.strRepr is None: 
2052              if len(self.notChars) > 4: 
2053                  self.strRepr = "!W:(%s...)" % self.notChars[:4] 
2054              else: 
2055                  self.strRepr = "!W:(%s)" % self.notChars 
2056   
2057          return self.strRepr 
  2058   
2060      """Special matching class for matching whitespace.  Normally, whitespace is ignored 
2061         by pyparsing grammars.  This class is included when some whitespace structures 
2062         are significant.  Define with a string containing the whitespace characters to be 
2063         matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments, 
2064         as defined for the C{Word} class.""" 
2065      whiteStrs = { 
2066          " " : "<SPC>", 
2067          "\t": "<TAB>", 
2068          "\n": "<LF>", 
2069          "\r": "<CR>", 
2070          "\f": "<FF>", 
2071          } 
2072 -    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 
 2073          super(White,self).__init__() 
2074          self.matchWhite = ws 
2075          self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 
2076           
2077          self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 
2078          self.mayReturnEmpty = True 
2079          self.errmsg = "Expected " + self.name 
2080           
2081   
2082          self.minLen = min 
2083   
2084          if max > 0: 
2085              self.maxLen = max 
2086          else: 
2087              self.maxLen = _MAX_INT 
2088   
2089          if exact > 0: 
2090              self.maxLen = exact 
2091              self.minLen = exact 
 2092   
2093 -    def parseImpl( self, instring, loc, doActions=True ): 
 2094          if not(instring[ loc ] in self.matchWhite): 
2095               
2096              exc = self.myException 
2097              exc.loc = loc 
2098              exc.pstr = instring 
2099              raise exc 
2100          start = loc 
2101          loc += 1 
2102          maxloc = start + self.maxLen 
2103          maxloc = min( maxloc, len(instring) ) 
2104          while loc < maxloc and instring[loc] in self.matchWhite: 
2105              loc += 1 
2106   
2107          if loc - start < self.minLen: 
2108               
2109              exc = self.myException 
2110              exc.loc = loc 
2111              exc.pstr = instring 
2112              raise exc 
2113   
2114          return loc, instring[start:loc] 
  2115   
2116   
2119          super(_PositionToken,self).__init__() 
2120          self.name=self.__class__.__name__ 
2121          self.mayReturnEmpty = True 
2122          self.mayIndexError = False 
 2125      """Token to advance to a specific column of input text; useful for tabular report scraping.""" 
2129   
2131          if col(loc,instring) != self.col: 
2132              instrlen = len(instring) 
2133              if self.ignoreExprs: 
2134                  loc = self._skipIgnorables( instring, loc ) 
2135              while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 
2136                  loc += 1 
2137          return loc 
 2138   
2139 -    def parseImpl( self, instring, loc, doActions=True ): 
 2140          thiscol = col( loc, instring ) 
2141          if thiscol > self.col: 
2142              raise ParseException( instring, loc, "Text not in expected column", self ) 
2143          newloc = loc + self.col - thiscol 
2144          ret = instring[ loc: newloc ] 
2145          return newloc, ret 
  2146   
2148      """Matches if current position is at the beginning of a line within the parse string""" 
2153           
2154   
2156          preloc = super(LineStart,self).preParse(instring,loc) 
2157          if instring[preloc] == "\n": 
2158              loc += 1 
2159          return loc 
 2160   
2161 -    def parseImpl( self, instring, loc, doActions=True ): 
 2162          if not( loc==0 or 
2163              (loc == self.preParse( instring, 0 )) or 
2164              (instring[loc-1] == "\n") ):  
2165               
2166              exc = self.myException 
2167              exc.loc = loc 
2168              exc.pstr = instring 
2169              raise exc 
2170          return loc, [] 
  2171   
2173      """Matches if current position is at the end of a line within the parse string""" 
2178           
2179   
2180 -    def parseImpl( self, instring, loc, doActions=True ): 
 2181          if loc<len(instring): 
2182              if instring[loc] == "\n": 
2183                  return loc+1, "\n" 
2184              else: 
2185                   
2186                  exc = self.myException 
2187                  exc.loc = loc 
2188                  exc.pstr = instring 
2189                  raise exc 
2190          elif loc == len(instring): 
2191              return loc+1, [] 
2192          else: 
2193              exc = self.myException 
2194              exc.loc = loc 
2195              exc.pstr = instring 
2196              raise exc 
  2197   
2199      """Matches if current position is at the beginning of the parse string""" 
2203           
2204   
2205 -    def parseImpl( self, instring, loc, doActions=True ): 
 2206          if loc != 0: 
2207               
2208              if loc != self.preParse( instring, 0 ): 
2209                   
2210                  exc = self.myException 
2211                  exc.loc = loc 
2212                  exc.pstr = instring 
2213                  raise exc 
2214          return loc, [] 
  2215   
2217      """Matches if current position is at the end of the parse string""" 
2221           
2222   
2223 -    def parseImpl( self, instring, loc, doActions=True ): 
 2224          if loc < len(instring): 
2225               
2226              exc = self.myException 
2227              exc.loc = loc 
2228              exc.pstr = instring 
2229              raise exc 
2230          elif loc == len(instring): 
2231              return loc+1, [] 
2232          elif loc > len(instring): 
2233              return loc, [] 
2234          else: 
2235              exc = self.myException 
2236              exc.loc = loc 
2237              exc.pstr = instring 
2238              raise exc 
  2239   
2241      """Matches if the current position is at the beginning of a Word, and 
2242         is not preceded by any character in a given set of wordChars 
2243         (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 
2244         use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 
2245         the string being parsed, or at the beginning of a line. 
2246      """ 
2248          super(WordStart,self).__init__() 
2249          self.wordChars = _str2dict(wordChars) 
2250          self.errmsg = "Not at the start of a word" 
 2251   
2252 -    def parseImpl(self, instring, loc, doActions=True ): 
 2253          if loc != 0: 
2254              if (instring[loc-1] in self.wordChars or 
2255                  instring[loc] not in self.wordChars): 
2256                  exc = self.myException 
2257                  exc.loc = loc 
2258                  exc.pstr = instring 
2259                  raise exc 
2260          return loc, [] 
  2261   
2263      """Matches if the current position is at the end of a Word, and 
2264         is not followed by any character in a given set of wordChars 
2265         (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 
2266         use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 
2267         the string being parsed, or at the end of a line. 
2268      """ 
2270          super(WordEnd,self).__init__() 
2271          self.wordChars = _str2dict(wordChars) 
2272          self.skipWhitespace = False 
2273          self.errmsg = "Not at the end of a word" 
 2274   
2275 -    def parseImpl(self, instring, loc, doActions=True ): 
 2276          instrlen = len(instring) 
2277          if instrlen>0 and loc<instrlen: 
2278              if (instring[loc] in self.wordChars or 
2279                  instring[loc-1] not in self.wordChars): 
2280                   
2281                  exc = self.myException 
2282                  exc.loc = loc 
2283                  exc.pstr = instring 
2284                  raise exc 
2285          return loc, [] 
  2286   
2287   
2289      """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 
2290 -    def __init__( self, exprs, savelist = False ): 
 2291          super(ParseExpression,self).__init__(savelist) 
2292          if isinstance( exprs, list ): 
2293              self.exprs = exprs 
2294          elif isinstance( exprs, basestring ): 
2295              self.exprs = [ Literal( exprs ) ] 
2296          else: 
2297              try: 
2298                  self.exprs = list( exprs ) 
2299              except TypeError: 
2300                  self.exprs = [ exprs ] 
2301          self.callPreparse = False 
 2302   
2304          return self.exprs[i] 
 2305   
2307          self.exprs.append( other ) 
2308          self.strRepr = None 
2309          return self 
 2310   
2312          """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 
2313             all contained expressions.""" 
2314          self.skipWhitespace = False 
2315          self.exprs = [ e.copy() for e in self.exprs ] 
2316          for e in self.exprs: 
2317              e.leaveWhitespace() 
2318          return self 
 2319   
2321          if isinstance( other, Suppress ): 
2322              if other not in self.ignoreExprs: 
2323                  super( ParseExpression, self).ignore( other ) 
2324                  for e in self.exprs: 
2325                      e.ignore( self.ignoreExprs[-1] ) 
2326          else: 
2327              super( ParseExpression, self).ignore( other ) 
2328              for e in self.exprs: 
2329                  e.ignore( self.ignoreExprs[-1] ) 
2330          return self 
 2331   
2333          try: 
2334              return super(ParseExpression,self).__str__() 
2335          except: 
2336              pass 
2337   
2338          if self.strRepr is None: 
2339              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 
2340          return self.strRepr 
 2341   
2343          super(ParseExpression,self).streamline() 
2344   
2345          for e in self.exprs: 
2346              e.streamline() 
2347   
2348           
2349           
2350           
2351          if ( len(self.exprs) == 2 ): 
2352              other = self.exprs[0] 
2353              if ( isinstance( other, self.__class__ ) and 
2354                    not(other.parseAction) and 
2355                    other.resultsName is None and 
2356                    not other.debug ): 
2357                  self.exprs = other.exprs[:] + [ self.exprs[1] ] 
2358                  self.strRepr = None 
2359                  self.mayReturnEmpty |= other.mayReturnEmpty 
2360                  self.mayIndexError  |= other.mayIndexError 
2361   
2362              other = self.exprs[-1] 
2363              if ( isinstance( other, self.__class__ ) and 
2364                    not(other.parseAction) and 
2365                    other.resultsName is None and 
2366                    not other.debug ): 
2367                  self.exprs = self.exprs[:-1] + other.exprs[:] 
2368                  self.strRepr = None 
2369                  self.mayReturnEmpty |= other.mayReturnEmpty 
2370                  self.mayIndexError  |= other.mayIndexError 
2371   
2372          return self 
 2373   
2377   
2378 -    def validate( self, validateTrace=[] ): 
 2379          tmp = validateTrace[:]+[self] 
2380          for e in self.exprs: 
2381              e.validate(tmp) 
2382          self.checkRecursion( [] ) 
  2383   
2384 -class And(ParseExpression): 
 2385      """Requires all given C{ParseExpressions} to be found in the given order. 
2386         Expressions may be separated by whitespace. 
2387         May be constructed using the '+' operator. 
2388      """ 
2389   
2394   
2395 -    def __init__( self, exprs, savelist = True ): 
 2396          super(And,self).__init__(exprs, savelist) 
2397          self.mayReturnEmpty = True 
2398          for e in self.exprs: 
2399              if not e.mayReturnEmpty: 
2400                  self.mayReturnEmpty = False 
2401                  break 
2402          self.setWhitespaceChars( exprs[0].whiteChars ) 
2403          self.skipWhitespace = exprs[0].skipWhitespace 
2404          self.callPreparse = True 
 2405   
2406 -    def parseImpl( self, instring, loc, doActions=True ): 
 2407           
2408           
2409          loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 
2410          errorStop = False 
2411          for e in self.exprs[1:]: 
2412              if isinstance(e, And._ErrorStop): 
2413                  errorStop = True 
2414                  continue 
2415              if errorStop: 
2416                  try: 
2417                      loc, exprtokens = e._parse( instring, loc, doActions ) 
2418                  except ParseSyntaxException: 
2419                      raise 
2420                  except ParseBaseException: 
2421                      pe = sys.exc_info()[1] 
2422                      raise ParseSyntaxException(pe) 
2423                  except IndexError: 
2424                      raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 
2425              else: 
2426                  loc, exprtokens = e._parse( instring, loc, doActions ) 
2427              if exprtokens or exprtokens.keys(): 
2428                  resultlist += exprtokens 
2429          return loc, resultlist 
 2430   
2432          if isinstance( other, basestring ): 
2433              other = Literal( other ) 
2434          return self.append( other )  
 2435   
2437          subRecCheckList = parseElementList[:] + [ self ] 
2438          for e in self.exprs: 
2439              e.checkRecursion( subRecCheckList ) 
2440              if not e.mayReturnEmpty: 
2441                  break 
 2442   
2444          if hasattr(self,"name"): 
2445              return self.name 
2446   
2447          if self.strRepr is None: 
2448              self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2449   
2450          return self.strRepr 
  2451   
2452   
2453 -class Or(ParseExpression): 
 2454      """Requires that at least one C{ParseExpression} is found. 
2455         If two expressions match, the expression that matches the longest string will be used. 
2456         May be constructed using the '^' operator. 
2457      """ 
2458 -    def __init__( self, exprs, savelist = False ): 
 2459          super(Or,self).__init__(exprs, savelist) 
2460          self.mayReturnEmpty = False 
2461          for e in self.exprs: 
2462              if e.mayReturnEmpty: 
2463                  self.mayReturnEmpty = True 
2464                  break 
 2465   
2466 -    def parseImpl( self, instring, loc, doActions=True ): 
 2467          maxExcLoc = -1 
2468          maxMatchLoc = -1 
2469          maxException = None 
2470          for e in self.exprs: 
2471              try: 
2472                  loc2 = e.tryParse( instring, loc ) 
2473              except ParseException: 
2474                  err = sys.exc_info()[1] 
2475                  if err.loc > maxExcLoc: 
2476                      maxException = err 
2477                      maxExcLoc = err.loc 
2478              except IndexError: 
2479                  if len(instring) > maxExcLoc: 
2480                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
2481                      maxExcLoc = len(instring) 
2482              else: 
2483                  if loc2 > maxMatchLoc: 
2484                      maxMatchLoc = loc2 
2485                      maxMatchExp = e 
2486   
2487          if maxMatchLoc < 0: 
2488              if maxException is not None: 
2489                  raise maxException 
2490              else: 
2491                  raise ParseException(instring, loc, "no defined alternatives to match", self) 
2492   
2493          return maxMatchExp._parse( instring, loc, doActions ) 
 2494   
2496          if isinstance( other, basestring ): 
2497              other = Literal( other ) 
2498          return self.append( other )  
 2499   
2501          if hasattr(self,"name"): 
2502              return self.name 
2503   
2504          if self.strRepr is None: 
2505              self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2506   
2507          return self.strRepr 
 2508   
2510          subRecCheckList = parseElementList[:] + [ self ] 
2511          for e in self.exprs: 
2512              e.checkRecursion( subRecCheckList ) 
  2513   
2514   
2516      """Requires that at least one C{ParseExpression} is found. 
2517         If two expressions match, the first one listed is the one that will match. 
2518         May be constructed using the '|' operator. 
2519      """ 
2520 -    def __init__( self, exprs, savelist = False ): 
 2521          super(MatchFirst,self).__init__(exprs, savelist) 
2522          if exprs: 
2523              self.mayReturnEmpty = False 
2524              for e in self.exprs: 
2525                  if e.mayReturnEmpty: 
2526                      self.mayReturnEmpty = True 
2527                      break 
2528          else: 
2529              self.mayReturnEmpty = True 
 2530   
2531 -    def parseImpl( self, instring, loc, doActions=True ): 
 2532          maxExcLoc = -1 
2533          maxException = None 
2534          for e in self.exprs: 
2535              try: 
2536                  ret = e._parse( instring, loc, doActions ) 
2537                  return ret 
2538              except ParseException, err: 
2539                  if err.loc > maxExcLoc: 
2540                      maxException = err 
2541                      maxExcLoc = err.loc 
2542              except IndexError: 
2543                  if len(instring) > maxExcLoc: 
2544                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
2545                      maxExcLoc = len(instring) 
2546   
2547           
2548          else: 
2549              if maxException is not None: 
2550                  raise maxException 
2551              else: 
2552                  raise ParseException(instring, loc, "no defined alternatives to match", self) 
 2553   
2555          if isinstance( other, basestring ): 
2556              other = Literal( other ) 
2557          return self.append( other )  
 2558   
2560          if hasattr(self,"name"): 
2561              return self.name 
2562   
2563          if self.strRepr is None: 
2564              self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2565   
2566          return self.strRepr 
 2567   
2569          subRecCheckList = parseElementList[:] + [ self ] 
2570          for e in self.exprs: 
2571              e.checkRecursion( subRecCheckList ) 
  2572   
2573   
2574 -class Each(ParseExpression): 
 2575      """Requires all given C{ParseExpressions} to be found, but in any order. 
2576         Expressions may be separated by whitespace. 
2577         May be constructed using the '&' operator. 
2578      """ 
2579 -    def __init__( self, exprs, savelist = True ): 
 2580          super(Each,self).__init__(exprs, savelist) 
2581          self.mayReturnEmpty = True 
2582          for e in self.exprs: 
2583              if not e.mayReturnEmpty: 
2584                  self.mayReturnEmpty = False 
2585                  break 
2586          self.skipWhitespace = True 
2587          self.initExprGroups = True 
 2588   
2589 -    def parseImpl( self, instring, loc, doActions=True ): 
 2590          if self.initExprGroups: 
2591              opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 
2592              opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 
2593              self.optionals = opt1 + opt2 
2594              self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 
2595              self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 
2596              self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 
2597              self.required += self.multirequired 
2598              self.initExprGroups = False 
2599          tmpLoc = loc 
2600          tmpReqd = self.required[:] 
2601          tmpOpt  = self.optionals[:] 
2602          matchOrder = [] 
2603   
2604          keepMatching = True 
2605          while keepMatching: 
2606              tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 
2607              failed = [] 
2608              for e in tmpExprs: 
2609                  try: 
2610                      tmpLoc = e.tryParse( instring, tmpLoc ) 
2611                  except ParseException: 
2612                      failed.append(e) 
2613                  else: 
2614                      matchOrder.append(e) 
2615                      if e in tmpReqd: 
2616                          tmpReqd.remove(e) 
2617                      elif e in tmpOpt: 
2618                          tmpOpt.remove(e) 
2619              if len(failed) == len(tmpExprs): 
2620                  keepMatching = False 
2621   
2622          if tmpReqd: 
2623              missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 
2624              raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 
2625   
2626           
2627          matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 
2628   
2629          resultlist = [] 
2630          for e in matchOrder: 
2631              loc,results = e._parse(instring,loc,doActions) 
2632              resultlist.append(results) 
2633   
2634          finalResults = ParseResults([]) 
2635          for r in resultlist: 
2636              dups = {} 
2637              for k in r.keys(): 
2638                  if k in finalResults.keys(): 
2639                      tmp = ParseResults(finalResults[k]) 
2640                      tmp += ParseResults(r[k]) 
2641                      dups[k] = tmp 
2642              finalResults += ParseResults(r) 
2643              for k,v in dups.items(): 
2644                  finalResults[k] = v 
2645          return loc, finalResults 
 2646   
2648          if hasattr(self,"name"): 
2649              return self.name 
2650   
2651          if self.strRepr is None: 
2652              self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 
2653   
2654          return self.strRepr 
 2655   
2657          subRecCheckList = parseElementList[:] + [ self ] 
2658          for e in self.exprs: 
2659              e.checkRecursion( subRecCheckList ) 
  2660   
2661   
2663      """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" 
2664 -    def __init__( self, expr, savelist=False ): 
 2665          super(ParseElementEnhance,self).__init__(savelist) 
2666          if isinstance( expr, basestring ): 
2667              expr = Literal(expr) 
2668          self.expr = expr 
2669          self.strRepr = None 
2670          if expr is not None: 
2671              self.mayIndexError = expr.mayIndexError 
2672              self.mayReturnEmpty = expr.mayReturnEmpty 
2673              self.setWhitespaceChars( expr.whiteChars ) 
2674              self.skipWhitespace = expr.skipWhitespace 
2675              self.saveAsList = expr.saveAsList 
2676              self.callPreparse = expr.callPreparse 
2677              self.ignoreExprs.extend(expr.ignoreExprs) 
 2678   
2679 -    def parseImpl( self, instring, loc, doActions=True ): 
 2680          if self.expr is not None: 
2681              return self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2682          else: 
2683              raise ParseException("",loc,self.errmsg,self) 
 2684   
2686          self.skipWhitespace = False 
2687          self.expr = self.expr.copy() 
2688          if self.expr is not None: 
2689              self.expr.leaveWhitespace() 
2690          return self 
 2691   
2693          if isinstance( other, Suppress ): 
2694              if other not in self.ignoreExprs: 
2695                  super( ParseElementEnhance, self).ignore( other ) 
2696                  if self.expr is not None: 
2697                      self.expr.ignore( self.ignoreExprs[-1] ) 
2698          else: 
2699              super( ParseElementEnhance, self).ignore( other ) 
2700              if self.expr is not None: 
2701                  self.expr.ignore( self.ignoreExprs[-1] ) 
2702          return self 
 2703   
2709   
2711          if self in parseElementList: 
2712              raise RecursiveGrammarException( parseElementList+[self] ) 
2713          subRecCheckList = parseElementList[:] + [ self ] 
2714          if self.expr is not None: 
2715              self.expr.checkRecursion( subRecCheckList ) 
 2716   
2717 -    def validate( self, validateTrace=[] ): 
 2718          tmp = validateTrace[:]+[self] 
2719          if self.expr is not None: 
2720              self.expr.validate(tmp) 
2721          self.checkRecursion( [] ) 
 2722   
2724          try: 
2725              return super(ParseElementEnhance,self).__str__() 
2726          except: 
2727              pass 
2728   
2729          if self.strRepr is None and self.expr is not None: 
2730              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 
2731          return self.strRepr 
  2732   
2733   
2735      """Lookahead matching of the given parse expression.  C{FollowedBy} 
2736      does *not* advance the parsing position within the input string, it only 
2737      verifies that the specified parse expression matches at the current 
2738      position.  C{FollowedBy} always returns a null token list.""" 
2742   
2743 -    def parseImpl( self, instring, loc, doActions=True ): 
 2744          self.expr.tryParse( instring, loc ) 
2745          return loc, [] 
  2746   
2747   
2748 -class NotAny(ParseElementEnhance): 
 2749      """Lookahead to disallow matching with the given parse expression.  C{NotAny} 
2750      does *not* advance the parsing position within the input string, it only 
2751      verifies that the specified parse expression does *not* match at the current 
2752      position.  Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 
2753      always returns a null token list.  May be constructed using the '~' operator.""" 
2755          super(NotAny,self).__init__(expr) 
2756           
2757          self.skipWhitespace = False   
2758          self.mayReturnEmpty = True 
2759          self.errmsg = "Found unwanted token, "+_ustr(self.expr) 
 2760           
2761   
2762 -    def parseImpl( self, instring, loc, doActions=True ): 
 2763          try: 
2764              self.expr.tryParse( instring, loc ) 
2765          except (ParseException,IndexError): 
2766              pass 
2767          else: 
2768               
2769              exc = self.myException 
2770              exc.loc = loc 
2771              exc.pstr = instring 
2772              raise exc 
2773          return loc, [] 
 2774   
2776          if hasattr(self,"name"): 
2777              return self.name 
2778   
2779          if self.strRepr is None: 
2780              self.strRepr = "~{" + _ustr(self.expr) + "}" 
2781   
2782          return self.strRepr 
  2783   
2784   
2786      """Optional repetition of zero or more of the given expression.""" 
2790   
2791 -    def parseImpl( self, instring, loc, doActions=True ): 
 2792          tokens = [] 
2793          try: 
2794              loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2795              hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 
2796              while 1: 
2797                  if hasIgnoreExprs: 
2798                      preloc = self._skipIgnorables( instring, loc ) 
2799                  else: 
2800                      preloc = loc 
2801                  loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 
2802                  if tmptokens or tmptokens.keys(): 
2803                      tokens += tmptokens 
2804          except (ParseException,IndexError): 
2805              pass 
2806   
2807          return loc, tokens 
 2808   
2810          if hasattr(self,"name"): 
2811              return self.name 
2812   
2813          if self.strRepr is None: 
2814              self.strRepr = "[" + _ustr(self.expr) + "]..." 
2815   
2816          return self.strRepr 
 2817   
 2822   
2823   
2825      """Repetition of one or more of the given expression.""" 
2826 -    def parseImpl( self, instring, loc, doActions=True ): 
 2827           
2828          loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2829          try: 
2830              hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 
2831              while 1: 
2832                  if hasIgnoreExprs: 
2833                      preloc = self._skipIgnorables( instring, loc ) 
2834                  else: 
2835                      preloc = loc 
2836                  loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 
2837                  if tmptokens or tmptokens.keys(): 
2838                      tokens += tmptokens 
2839          except (ParseException,IndexError): 
2840              pass 
2841   
2842          return loc, tokens 
 2843   
2845          if hasattr(self,"name"): 
2846              return self.name 
2847   
2848          if self.strRepr is None: 
2849              self.strRepr = "{" + _ustr(self.expr) + "}..." 
2850   
2851          return self.strRepr 
 2852   
 2857   
2864   
2865  _optionalNotMatched = _NullToken() 
2867      """Optional matching of the given expression. 
2868         A default return string can also be specified, if the optional expression 
2869         is not found. 
2870      """ 
2872          super(Optional,self).__init__( exprs, savelist=False ) 
2873          self.defaultValue = default 
2874          self.mayReturnEmpty = True 
 2875   
2876 -    def parseImpl( self, instring, loc, doActions=True ): 
 2877          try: 
2878              loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2879          except (ParseException,IndexError): 
2880              if self.defaultValue is not _optionalNotMatched: 
2881                  if self.expr.resultsName: 
2882                      tokens = ParseResults([ self.defaultValue ]) 
2883                      tokens[self.expr.resultsName] = self.defaultValue 
2884                  else: 
2885                      tokens = [ self.defaultValue ] 
2886              else: 
2887                  tokens = [] 
2888          return loc, tokens 
 2889   
2891          if hasattr(self,"name"): 
2892              return self.name 
2893   
2894          if self.strRepr is None: 
2895              self.strRepr = "[" + _ustr(self.expr) + "]" 
2896   
2897          return self.strRepr 
  2898   
2899   
2900 -class SkipTo(ParseElementEnhance): 
 2901      """Token for skipping over all undefined text until the matched expression is found. 
2902         If C{include} is set to true, the matched expression is also parsed (the skipped text 
2903         and matched expression are returned as a 2-element list).  The C{ignore} 
2904         argument is used to define grammars (typically quoted strings and comments) that 
2905         might contain false matches. 
2906      """ 
2907 -    def __init__( self, other, include=False, ignore=None, failOn=None ): 
 2908          super( SkipTo, self ).__init__( other ) 
2909          self.ignoreExpr = ignore 
2910          self.mayReturnEmpty = True 
2911          self.mayIndexError = False 
2912          self.includeMatch = include 
2913          self.asList = False 
2914          if failOn is not None and isinstance(failOn, basestring): 
2915              self.failOn = Literal(failOn) 
2916          else: 
2917              self.failOn = failOn 
2918          self.errmsg = "No match found for "+_ustr(self.expr) 
 2919           
2920   
2921 -    def parseImpl( self, instring, loc, doActions=True ): 
 2922          startLoc = loc 
2923          instrlen = len(instring) 
2924          expr = self.expr 
2925          failParse = False 
2926          while loc <= instrlen: 
2927              try: 
2928                  if self.failOn: 
2929                      try: 
2930                          self.failOn.tryParse(instring, loc) 
2931                      except ParseBaseException: 
2932                          pass 
2933                      else: 
2934                          failParse = True 
2935                          raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 
2936                      failParse = False 
2937                  if self.ignoreExpr is not None: 
2938                      while 1: 
2939                          try: 
2940                              loc = self.ignoreExpr.tryParse(instring,loc) 
2941                               
2942                          except ParseBaseException: 
2943                              break 
2944                  expr._parse( instring, loc, doActions=False, callPreParse=False ) 
2945                  skipText = instring[startLoc:loc] 
2946                  if self.includeMatch: 
2947                      loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 
2948                      if mat: 
2949                          skipRes = ParseResults( skipText ) 
2950                          skipRes += mat 
2951                          return loc, [ skipRes ] 
2952                      else: 
2953                          return loc, [ skipText ] 
2954                  else: 
2955                      return loc, [ skipText ] 
2956              except (ParseException,IndexError): 
2957                  if failParse: 
2958                      raise 
2959                  else: 
2960                      loc += 1 
2961          exc = self.myException 
2962          exc.loc = loc 
2963          exc.pstr = instring 
2964          raise exc 
  2965   
2966 -class Forward(ParseElementEnhance): 
 2967      """Forward declaration of an expression to be defined later - 
2968         used for recursive grammars, such as algebraic infix notation. 
2969         When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 
2970   
2971         Note: take care when assigning to C{Forward} not to overlook precedence of operators. 
2972         Specifically, '|' has a lower precedence than '<<', so that:: 
2973            fwdExpr << a | b | c 
2974         will actually be evaluated as:: 
2975            (fwdExpr << a) | b | c 
2976         thereby leaving b and c out as parseable alternatives.  It is recommended that you 
2977         explicitly group the values inserted into the C{Forward}:: 
2978            fwdExpr << (a | b | c) 
2979      """ 
2982   
2984          if isinstance( other, basestring ): 
2985              other = Literal(other) 
2986          self.expr = other 
2987          self.mayReturnEmpty = other.mayReturnEmpty 
2988          self.strRepr = None 
2989          self.mayIndexError = self.expr.mayIndexError 
2990          self.mayReturnEmpty = self.expr.mayReturnEmpty 
2991          self.setWhitespaceChars( self.expr.whiteChars ) 
2992          self.skipWhitespace = self.expr.skipWhitespace 
2993          self.saveAsList = self.expr.saveAsList 
2994          self.ignoreExprs.extend(self.expr.ignoreExprs) 
2995          return None 
 2996   
2998          self.skipWhitespace = False 
2999          return self 
 3000   
3002          if not self.streamlined: 
3003              self.streamlined = True 
3004              if self.expr is not None: 
3005                  self.expr.streamline() 
3006          return self 
 3007   
3008 -    def validate( self, validateTrace=[] ): 
 3009          if self not in validateTrace: 
3010              tmp = validateTrace[:]+[self] 
3011              if self.expr is not None: 
3012                  self.expr.validate(tmp) 
3013          self.checkRecursion([]) 
 3014   
3016          if hasattr(self,"name"): 
3017              return self.name 
3018   
3019          self._revertClass = self.__class__ 
3020          self.__class__ = _ForwardNoRecurse 
3021          try: 
3022              if self.expr is not None: 
3023                  retString = _ustr(self.expr) 
3024              else: 
3025                  retString = "None" 
3026          finally: 
3027              self.__class__ = self._revertClass 
3028          return self.__class__.__name__ + ": " + retString 
 3029   
3031          if self.expr is not None: 
3032              return super(Forward,self).copy() 
3033          else: 
3034              ret = Forward() 
3035              ret << self 
3036              return ret 
  3037   
3041   
3043      """Abstract subclass of ParseExpression, for converting parsed results.""" 
3044 -    def __init__( self, expr, savelist=False ): 
  3047   
3048 -class Upcase(TokenConverter): 
 3049      """Converter to upper case all matching tokens.""" 
3051          super(Upcase,self).__init__(*args) 
3052          warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 
3053                         DeprecationWarning,stacklevel=2) 
 3054   
3055 -    def postParse( self, instring, loc, tokenlist ): 
 3056          return list(map( string.upper, tokenlist )) 
  3057   
3058   
3060      """Converter to concatenate all matching tokens to a single string. 
3061         By default, the matching patterns must also be contiguous in the input string; 
3062         this can be disabled by specifying C{'adjacent=False'} in the constructor. 
3063      """ 
3064 -    def __init__( self, expr, joinString="", adjacent=True ): 
 3065          super(Combine,self).__init__( expr ) 
3066           
3067          if adjacent: 
3068              self.leaveWhitespace() 
3069          self.adjacent = adjacent 
3070          self.skipWhitespace = True 
3071          self.joinString = joinString 
3072          self.callPreparse = True 
 3073   
3080   
3081 -    def postParse( self, instring, loc, tokenlist ): 
 3082          retToks = tokenlist.copy() 
3083          del retToks[:] 
3084          retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 
3085   
3086          if self.resultsName and len(retToks.keys())>0: 
3087              return [ retToks ] 
3088          else: 
3089              return retToks 
  3090   
3091 -class Group(TokenConverter): 
 3092      """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" 
3094          super(Group,self).__init__( expr ) 
3095          self.saveAsList = True 
 3096   
3097 -    def postParse( self, instring, loc, tokenlist ): 
 3098          return [ tokenlist ] 
  3099   
3100 -class Dict(TokenConverter): 
 3101      """Converter to return a repetitive expression as a list, but also as a dictionary. 
3102         Each element can also be referenced using the first token in the expression as its key. 
3103         Useful for tabular report scraping when the first column can be used as a item key. 
3104      """ 
3106          super(Dict,self).__init__( exprs ) 
3107          self.saveAsList = True 
 3108   
3109 -    def postParse( self, instring, loc, tokenlist ): 
 3110          for i,tok in enumerate(tokenlist): 
3111              if len(tok) == 0: 
3112                  continue 
3113              ikey = tok[0] 
3114              if isinstance(ikey,int): 
3115                  ikey = _ustr(tok[0]).strip() 
3116              if len(tok)==1: 
3117                  tokenlist[ikey] = _ParseResultsWithOffset("",i) 
3118              elif len(tok)==2 and not isinstance(tok[1],ParseResults): 
3119                  tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 
3120              else: 
3121                  dictvalue = tok.copy()  
3122                  del dictvalue[0] 
3123                  if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 
3124                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 
3125                  else: 
3126                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 
3127   
3128          if self.resultsName: 
3129              return [ tokenlist ] 
3130          else: 
3131              return tokenlist 
  3132   
3133   
3135      """Converter for ignoring the results of a parsed expression.""" 
3136 -    def postParse( self, instring, loc, tokenlist ): 
 3138   
 3141   
3142   
3144      """Wrapper for parse actions, to ensure they are only called once.""" 
3146          self.callable = ParserElement._normalizeParseActionArgs(methodCall) 
3147          self.called = False 
 3149          if not self.called: 
3150              results = self.callable(s,l,t) 
3151              self.called = True 
3152              return results 
3153          raise ParseException(s,l,"") 
  3156   
3158      """Decorator for debugging parse actions.""" 
3159      f = ParserElement._normalizeParseActionArgs(f) 
3160      def z(*paArgs): 
3161          thisFunc = f.func_name 
3162          s,l,t = paArgs[-3:] 
3163          if len(paArgs)>3: 
3164              thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 
3165          sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 
3166          try: 
3167              ret = f(*paArgs) 
3168          except Exception: 
3169              exc = sys.exc_info()[1] 
3170              sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 
3171              raise 
3172          sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 
3173          return ret 
 3174      try: 
3175          z.__name__ = f.__name__ 
3176      except AttributeError: 
3177          pass 
3178      return z 
3179   
3180   
3181   
3182   
3184      """Helper to define a delimited list of expressions - the delimiter defaults to ','. 
3185         By default, the list elements and delimiters can have intervening whitespace, and 
3186         comments, but this can be overridden by passing C{combine=True} in the constructor. 
3187         If C{combine} is set to True, the matching tokens are returned as a single token 
3188         string, with the delimiters included; otherwise, the matching tokens are returned 
3189         as a list of tokens, with the delimiters suppressed. 
3190      """ 
3191      dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 
3192      if combine: 
3193          return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 
3194      else: 
3195          return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 
 3196   
3198      """Helper to define a counted list of expressions. 
3199         This helper defines a pattern of the form:: 
3200             integer expr expr expr... 
3201         where the leading integer tells how many expr expressions follow. 
3202         The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 
3203      """ 
3204      arrayExpr = Forward() 
3205      def countFieldParseAction(s,l,t): 
3206          n = int(t[0]) 
3207          arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 
3208          return [] 
 3209      return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) 
3210   
3212      if type(L) is not list: return [L] 
3213      if L == []: return L 
3214      return _flatten(L[0]) + _flatten(L[1:]) 
 3215   
3217      """Helper to define an expression that is indirectly defined from 
3218         the tokens matched in a previous expression, that is, it looks 
3219         for a 'repeat' of a previous expression.  For example:: 
3220             first = Word(nums) 
3221             second = matchPreviousLiteral(first) 
3222             matchExpr = first + ":" + second 
3223         will match C{"1:1"}, but not C{"1:2"}.  Because this matches a 
3224         previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 
3225         If this is not desired, use C{matchPreviousExpr}. 
3226         Do *not* use with packrat parsing enabled. 
3227      """ 
3228      rep = Forward() 
3229      def copyTokenToRepeater(s,l,t): 
3230          if t: 
3231              if len(t) == 1: 
3232                  rep << t[0] 
3233              else: 
3234                   
3235                  tflat = _flatten(t.asList()) 
3236                  rep << And( [ Literal(tt) for tt in tflat ] ) 
3237          else: 
3238              rep << Empty() 
 3239      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
3240      return rep 
3241   
3243      """Helper to define an expression that is indirectly defined from 
3244         the tokens matched in a previous expression, that is, it looks 
3245         for a 'repeat' of a previous expression.  For example:: 
3246             first = Word(nums) 
3247             second = matchPreviousExpr(first) 
3248             matchExpr = first + ":" + second 
3249         will match C{"1:1"}, but not C{"1:2"}.  Because this matches by 
3250         expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 
3251         the expressions are evaluated first, and then compared, so 
3252         C{"1"} is compared with C{"10"}. 
3253         Do *not* use with packrat parsing enabled. 
3254      """ 
3255      rep = Forward() 
3256      e2 = expr.copy() 
3257      rep << e2 
3258      def copyTokenToRepeater(s,l,t): 
3259          matchTokens = _flatten(t.asList()) 
3260          def mustMatchTheseTokens(s,l,t): 
3261              theseTokens = _flatten(t.asList()) 
3262              if  theseTokens != matchTokens: 
3263                  raise ParseException("",0,"") 
 3264          rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 
3265      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
3266      return rep 
3267   
3269       
3270      for c in r"\^-]": 
3271          s = s.replace(c,_bslash+c) 
3272      s = s.replace("\n",r"\n") 
3273      s = s.replace("\t",r"\t") 
3274      return _ustr(s) 
 3275   
3276 -def oneOf( strs, caseless=False, useRegex=True ): 
 3277      """Helper to quickly define a set of alternative Literals, and makes sure to do 
3278         longest-first testing when there is a conflict, regardless of the input order, 
3279         but returns a C{MatchFirst} for best performance. 
3280   
3281         Parameters: 
3282          - strs - a string of space-delimited literals, or a list of string literals 
3283          - caseless - (default=False) - treat all literals as caseless 
3284          - useRegex - (default=True) - as an optimization, will generate a Regex 
3285            object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 
3286            if creating a C{Regex} raises an exception) 
3287      """ 
3288      if caseless: 
3289          isequal = ( lambda a,b: a.upper() == b.upper() ) 
3290          masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 
3291          parseElementClass = CaselessLiteral 
3292      else: 
3293          isequal = ( lambda a,b: a == b ) 
3294          masks = ( lambda a,b: b.startswith(a) ) 
3295          parseElementClass = Literal 
3296   
3297      if isinstance(strs,(list,tuple)): 
3298          symbols = list(strs[:]) 
3299      elif isinstance(strs,basestring): 
3300          symbols = strs.split() 
3301      else: 
3302          warnings.warn("Invalid argument to oneOf, expected string or list", 
3303                  SyntaxWarning, stacklevel=2) 
3304   
3305      i = 0 
3306      while i < len(symbols)-1: 
3307          cur = symbols[i] 
3308          for j,other in enumerate(symbols[i+1:]): 
3309              if ( isequal(other, cur) ): 
3310                  del symbols[i+j+1] 
3311                  break 
3312              elif ( masks(cur, other) ): 
3313                  del symbols[i+j+1] 
3314                  symbols.insert(i,other) 
3315                  cur = other 
3316                  break 
3317          else: 
3318              i += 1 
3319   
3320      if not caseless and useRegex: 
3321           
3322          try: 
3323              if len(symbols)==len("".join(symbols)): 
3324                  return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 
3325              else: 
3326                  return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 
3327          except: 
3328              warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 
3329                      SyntaxWarning, stacklevel=2) 
3330   
3331   
3332       
3333      return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) 
 3334   
3336      """Helper to easily and clearly define a dictionary by specifying the respective patterns 
3337         for the key and value.  Takes care of defining the C{Dict}, C{ZeroOrMore}, and C{Group} tokens 
3338         in the proper order.  The key pattern can include delimiting markers or punctuation, 
3339         as long as they are suppressed, thereby leaving the significant key text.  The value 
3340         pattern can include named results, so that the C{Dict} results can include named token 
3341         fields. 
3342      """ 
3343      return Dict( ZeroOrMore( Group ( key + value ) ) ) 
 3344   
3345 -def originalTextFor(expr, asString=True): 
 3346      """Helper to return the original, untokenized text for a given expression.  Useful to 
3347         restore the parsed fields of an HTML start tag into the raw tag text itself, or to 
3348         revert separate tokens with intervening whitespace back to the original matching 
3349         input text. Simpler to use than the parse action C{keepOriginalText}, and does not 
3350         require the inspect module to chase up the call stack.  By default, returns a  
3351         string containing the original parsed text.   
3352          
3353         If the optional C{asString} argument is passed as False, then the return value is a  
3354         C{ParseResults} containing any results names that were originally matched, and a  
3355         single token containing the original matched text from the input string.  So if  
3356         the expression passed to C{originalTextFor} contains expressions with defined 
3357         results names, you must set C{asString} to False if you want to preserve those 
3358         results name values.""" 
3359      locMarker = Empty().setParseAction(lambda s,loc,t: loc) 
3360      endlocMarker = locMarker.copy() 
3361      endlocMarker.callPreparse = False 
3362      matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 
3363      if asString: 
3364          extractText = lambda s,l,t: s[t._original_start:t._original_end] 
3365      else: 
3366          def extractText(s,l,t): 
3367              del t[:] 
3368              t.insert(0, s[t._original_start:t._original_end]) 
3369              del t["_original_start"] 
3370              del t["_original_end"] 
 3371      matchExpr.setParseAction(extractText) 
3372      return matchExpr 
3373       
3374   
3375  empty       = Empty().setName("empty") 
3376  lineStart   = LineStart().setName("lineStart") 
3377  lineEnd     = LineEnd().setName("lineEnd") 
3378  stringStart = StringStart().setName("stringStart") 
3379  stringEnd   = StringEnd().setName("stringEnd") 
3380   
3381  _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 
3382  _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ]) 
3383  _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 
3384  _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 
3385  _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 
3386  _charRange = Group(_singleChar + Suppress("-") + _singleChar) 
3387  _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 
3388   
3389  _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 
3390   
3392      r"""Helper to easily define string ranges for use in Word construction.  Borrows 
3393         syntax from regexp '[]' string range definitions:: 
3394            srange("[0-9]")   -> "0123456789" 
3395            srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz" 
3396            srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 
3397         The input string must be enclosed in []'s, and the returned string is the expanded 
3398         character set joined into a single string. 
3399         The values enclosed in the []'s may be:: 
3400            a single character 
3401            an escaped character with a leading backslash (such as \- or \]) 
3402            an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 
3403            an escaped octal character with a leading '\0' (\041, which is a '!' character) 
3404            a range of any of the above, separated by a dash ('a-z', etc.) 
3405            any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 
3406      """ 
3407      try: 
3408          return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 
3409      except: 
3410          return "" 
 3411   
3413      """Helper method for defining parse actions that require matching at a specific 
3414         column in the input text. 
3415      """ 
3416      def verifyCol(strg,locn,toks): 
3417          if col(locn,strg) != n: 
3418              raise ParseException(strg,locn,"matched token not at column %d" % n) 
 3419      return verifyCol 
3420   
3422      """Helper method for common parse actions that simply return a literal value.  Especially 
3423         useful when used with C{transformString()}. 
3424      """ 
3425      def _replFunc(*args): 
3426          return [replStr] 
 3427      return _replFunc 
3428   
3430      """Helper parse action for removing quotation marks from parsed quoted strings. 
3431         To use, add this parse action to quoted string using:: 
3432           quotedString.setParseAction( removeQuotes ) 
3433      """ 
3434      return t[0][1:-1] 
 3435   
3437      """Helper parse action to convert tokens to upper case.""" 
3438      return [ tt.upper() for tt in map(_ustr,t) ] 
 3439   
3441      """Helper parse action to convert tokens to lower case.""" 
3442      return [ tt.lower() for tt in map(_ustr,t) ] 
 3443   
3444 -def keepOriginalText(s,startLoc,t): 
 3445      """DEPRECATED - use new helper method C{originalTextFor}. 
3446         Helper parse action to preserve original parsed text, 
3447         overriding any nested parse actions.""" 
3448      try: 
3449          endloc = getTokensEndLoc() 
3450      except ParseException: 
3451          raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 
3452      del t[:] 
3453      t += ParseResults(s[startLoc:endloc]) 
3454      return t 
 3455   
3457      """Method to be called from within a parse action to determine the end 
3458         location of the parsed tokens.""" 
3459      import inspect 
3460      fstack = inspect.stack() 
3461      try: 
3462           
3463          for f in fstack[2:]: 
3464              if f[3] == "_parseNoCache": 
3465                  endloc = f[0].f_locals["loc"] 
3466                  return endloc 
3467          else: 
3468              raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 
3469      finally: 
3470          del fstack 
 3471   
3500   
3504   
3508   
3510      """Helper to create a validating parse action to be used with start tags created 
3511         with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 
3512         with a required attribute value, to avoid false matches on common tags such as 
3513         <TD> or <DIV>. 
3514   
3515         Call withAttribute with a series of attribute names and values. Specify the list 
3516         of filter attributes names and values as: 
3517          - keyword arguments, as in (class="Customer",align="right"), or 
3518          - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 
3519         For attribute names with a namespace prefix, you must use the second form.  Attribute 
3520         names are matched insensitive to upper/lower case. 
3521   
3522         To verify that the attribute exists, but without specifying a value, pass 
3523         withAttribute.ANY_VALUE as the value. 
3524         """ 
3525      if args: 
3526          attrs = args[:] 
3527      else: 
3528          attrs = attrDict.items() 
3529      attrs = [(k,v) for k,v in attrs] 
3530      def pa(s,l,tokens): 
3531          for attrName,attrValue in attrs: 
3532              if attrName not in tokens: 
3533                  raise ParseException(s,l,"no matching attribute " + attrName) 
3534              if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 
3535                  raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 
3536                                              (attrName, tokens[attrName], attrValue)) 
 3537      return pa 
3538  withAttribute.ANY_VALUE = object() 
3539   
3540  opAssoc = _Constants() 
3541  opAssoc.LEFT = object() 
3542  opAssoc.RIGHT = object() 
3543   
3545      """Helper method for constructing grammars of expressions made up of 
3546         operators working in a precedence hierarchy.  Operators may be unary or 
3547         binary, left- or right-associative.  Parse actions can also be attached 
3548         to operator expressions. 
3549   
3550         Parameters: 
3551          - baseExpr - expression representing the most basic element for the nested 
3552          - opList - list of tuples, one for each operator precedence level in the 
3553            expression grammar; each tuple is of the form 
3554            (opExpr, numTerms, rightLeftAssoc, parseAction), where: 
3555             - opExpr is the pyparsing expression for the operator; 
3556                may also be a string, which will be converted to a Literal; 
3557                if numTerms is 3, opExpr is a tuple of two expressions, for the 
3558                two operators separating the 3 terms 
3559             - numTerms is the number of terms for this operator (must 
3560                be 1, 2, or 3) 
3561             - rightLeftAssoc is the indicator whether the operator is 
3562                right or left associative, using the pyparsing-defined 
3563                constants opAssoc.RIGHT and opAssoc.LEFT. 
3564             - parseAction is the parse action to be associated with 
3565                expressions matching this operator expression (the 
3566                parse action tuple member may be omitted) 
3567      """ 
3568      ret = Forward() 
3569      lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 
3570      for i,operDef in enumerate(opList): 
3571          opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 
3572          if arity == 3: 
3573              if opExpr is None or len(opExpr) != 2: 
3574                  raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 
3575              opExpr1, opExpr2 = opExpr 
3576          thisExpr = Forward() 
3577          if rightLeftAssoc == opAssoc.LEFT: 
3578              if arity == 1: 
3579                  matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 
3580              elif arity == 2: 
3581                  if opExpr is not None: 
3582                      matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 
3583                  else: 
3584                      matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 
3585              elif arity == 3: 
3586                  matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 
3587                              Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 
3588              else: 
3589                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
3590          elif rightLeftAssoc == opAssoc.RIGHT: 
3591              if arity == 1: 
3592                   
3593                  if not isinstance(opExpr, Optional): 
3594                      opExpr = Optional(opExpr) 
3595                  matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 
3596              elif arity == 2: 
3597                  if opExpr is not None: 
3598                      matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 
3599                  else: 
3600                      matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 
3601              elif arity == 3: 
3602                  matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 
3603                              Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 
3604              else: 
3605                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
3606          else: 
3607              raise ValueError("operator must indicate right or left associativity") 
3608          if pa: 
3609              matchExpr.setParseAction( pa ) 
3610          thisExpr << ( matchExpr | lastExpr ) 
3611          lastExpr = thisExpr 
3612      ret << lastExpr 
3613      return ret 
 3614   
3615  dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 
3616  sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 
3617  quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 
3618  unicodeString = Combine(_L('u') + quotedString.copy()) 
3619   
3621      """Helper method for defining nested lists enclosed in opening and closing 
3622         delimiters ("(" and ")" are the default). 
3623   
3624         Parameters: 
3625          - opener - opening character for a nested list (default="("); can also be a pyparsing expression 
3626          - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 
3627          - content - expression for items within the nested lists (default=None) 
3628          - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 
3629   
3630         If an expression is not provided for the content argument, the nested 
3631         expression will capture all whitespace-delimited content between delimiters 
3632         as a list of separate values. 
3633   
3634         Use the ignoreExpr argument to define expressions that may contain 
3635         opening or closing characters that should not be treated as opening 
3636         or closing characters for nesting, such as quotedString or a comment 
3637         expression.  Specify multiple expressions using an Or or MatchFirst. 
3638         The default is quotedString, but if no expressions are to be ignored, 
3639         then pass None for this argument. 
3640      """ 
3641      if opener == closer: 
3642          raise ValueError("opening and closing strings cannot be the same") 
3643      if content is None: 
3644          if isinstance(opener,basestring) and isinstance(closer,basestring): 
3645              if len(opener) == 1 and len(closer)==1: 
3646                  if ignoreExpr is not None: 
3647                      content = (Combine(OneOrMore(~ignoreExpr + 
3648                                      CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3649                                  ).setParseAction(lambda t:t[0].strip())) 
3650                  else: 
3651                      content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 
3652                                  ).setParseAction(lambda t:t[0].strip())) 
3653              else: 
3654                  if ignoreExpr is not None: 
3655                      content = (Combine(OneOrMore(~ignoreExpr +  
3656                                      ~Literal(opener) + ~Literal(closer) + 
3657                                      CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3658                                  ).setParseAction(lambda t:t[0].strip())) 
3659                  else: 
3660                      content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 
3661                                      CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3662                                  ).setParseAction(lambda t:t[0].strip())) 
3663          else: 
3664              raise ValueError("opening and closing arguments must be strings if no content expression is given") 
3665      ret = Forward() 
3666      if ignoreExpr is not None: 
3667          ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 
3668      else: 
3669          ret << Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) ) 
3670      return ret 
 3671   
3672 -def indentedBlock(blockStatementExpr, indentStack, indent=True): 
 3673      """Helper method for defining space-delimited indentation blocks, such as 
3674         those used to define block statements in Python source code. 
3675   
3676         Parameters: 
3677          - blockStatementExpr - expression defining syntax of statement that 
3678              is repeated within the indented block 
3679          - indentStack - list created by caller to manage indentation stack 
3680              (multiple statementWithIndentedBlock expressions within a single grammar 
3681              should share a common indentStack) 
3682          - indent - boolean indicating whether block must be indented beyond the 
3683              the current level; set to False for block of left-most statements 
3684              (default=True) 
3685   
3686         A valid block must contain at least one blockStatement. 
3687      """ 
3688      def checkPeerIndent(s,l,t): 
3689          if l >= len(s): return 
3690          curCol = col(l,s) 
3691          if curCol != indentStack[-1]: 
3692              if curCol > indentStack[-1]: 
3693                  raise ParseFatalException(s,l,"illegal nesting") 
3694              raise ParseException(s,l,"not a peer entry") 
 3695   
3696      def checkSubIndent(s,l,t): 
3697          curCol = col(l,s) 
3698          if curCol > indentStack[-1]: 
3699              indentStack.append( curCol ) 
3700          else: 
3701              raise ParseException(s,l,"not a subentry") 
3702   
3703      def checkUnindent(s,l,t): 
3704          if l >= len(s): return 
3705          curCol = col(l,s) 
3706          if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 
3707              raise ParseException(s,l,"not an unindent") 
3708          indentStack.pop() 
3709   
3710      NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 
3711      INDENT = Empty() + Empty().setParseAction(checkSubIndent) 
3712      PEER   = Empty().setParseAction(checkPeerIndent) 
3713      UNDENT = Empty().setParseAction(checkUnindent) 
3714      if indent: 
3715          smExpr = Group( Optional(NL) + 
3716               
3717              INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 
3718      else: 
3719          smExpr = Group( Optional(NL) + 
3720              (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 
3721      blockStatementExpr.ignore(_bslash + LineEnd()) 
3722      return smExpr 
3723   
3724  alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 
3725  punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 
3726   
3727  anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 
3728  commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 
3729  _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 
3730  replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 
3731   
3732   
3733  cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 
3734   
3735  htmlComment = Regex(r"<!--[\s\S]*?-->") 
3736  restOfLine = Regex(r".*").leaveWhitespace() 
3737  dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 
3738  cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 
3739   
3740  javaStyleComment = cppStyleComment 
3741  pythonStyleComment = Regex(r"#.*").setName("Python style comment") 
3742  _noncomma = "".join( [ c for c in printables if c != "," ] ) 
3743  _commasepitem = Combine(OneOrMore(Word(_noncomma) + 
3744                                    Optional( Word(" \t") + 
3745                                              ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 
3746  commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 
3747   
3748   
3749  if __name__ == "__main__": 
3750   
3751 -    def test( teststring ): 
 3752          try: 
3753              tokens = simpleSQL.parseString( teststring ) 
3754              tokenlist = tokens.asList() 
3755              print (teststring + "->"   + str(tokenlist)) 
3756              print ("tokens = "         + str(tokens)) 
3757              print ("tokens.columns = " + str(tokens.columns)) 
3758              print ("tokens.tables = "  + str(tokens.tables)) 
3759              print (tokens.asXML("SQL",True)) 
3760          except ParseBaseException: 
3761              err = sys.exc_info()[1] 
3762              print (teststring + "->") 
3763              print (err.line) 
3764              print (" "*(err.column-1) + "^") 
3765              print (err) 
3766          print() 
 3767   
3768      selectToken    = CaselessLiteral( "select" ) 
3769      fromToken      = CaselessLiteral( "from" ) 
3770   
3771      ident          = Word( alphas, alphanums + "_$" ) 
3772      columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 
3773      columnNameList = Group( delimitedList( columnName ) ) 
3774      tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 
3775      tableNameList  = Group( delimitedList( tableName ) ) 
3776      simpleSQL      = ( selectToken + \ 
3777                       ( '*' | columnNameList ).setResultsName( "columns" ) + \ 
3778                       fromToken + \ 
3779                       tableNameList.setResultsName( "tables" ) ) 
3780   
3781      test( "SELECT * from XYZZY, ABC" ) 
3782      test( "select * from SYS.XYZZY" ) 
3783      test( "Select A from Sys.dual" ) 
3784      test( "Select AA,BB,CC from Sys.dual" ) 
3785      test( "Select A, B, C from Sys.dual" ) 
3786      test( "Select A, B, C from Sys.dual" ) 
3787      test( "Xelect A, B, C from Sys.dual" ) 
3788      test( "Select A, B, C frox Sys.dual" ) 
3789      test( "Select" ) 
3790      test( "Select ^^^ frox Sys.dual" ) 
3791      test( "Select A, B, C from Sys.dual, Table2   " ) 
3792