Annotated source for src/blib2to3/pgen2/pgen.py

src/blib2to3/pgen2/pgen.py
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | # Licensed to PSF under a Contributor Agreement.
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | import os
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | from typing import (
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     IO,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     Any,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     Dict,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     Iterator,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     List,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     NoReturn,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     Optional,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     Sequence,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     Tuple,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |     Union,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | )
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | from blib2to3.pgen2 import grammar, token, tokenize
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | from blib2to3.pgen2.tokenize import GoodTokenInfo
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Path = Union[str, "os.PathLike[str]"]
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- 01 -- --- --- --- --- --- --- ------- ------- ------- | class PgenGrammar(grammar.Grammar):
---- ---- ---- ---- ---- ---- ---- 01 -- --- --- --- --- --- --- ------- ------- ------- |     pass
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | class ParserGenerator:
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |     filename: Path
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |     stream: IO[str]
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |     generator: Iterator[GoodTokenInfo]
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |     first: Dict[str, Optional[Dict[str, int]]]
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |     def __init__(self, filename: Path, stream: Optional[IO[str]] = None) -> None:
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         close_stream = None
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         if stream is None:
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |             stream = open(filename, encoding="utf-8")
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |             close_stream = stream.close
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         self.filename = filename
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         self.stream = stream
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         self.generator = tokenize.generate_tokens(stream.readline)
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         self.gettoken()  # Initialize lookahead
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         self.dfas, self.startsymbol = self.parse()
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         if close_stream is not None:
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |             close_stream()
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         self.first = {}  # map from symbol name to set of tokens
0014 0014 0002 0000 0000 0000 05 03 002 003 002 004 005 006 0013.93 0018.58 0001.33 |         self.addfirstsets()
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |     def make_grammar(self) -> PgenGrammar:
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         c = PgenGrammar()
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         names = list(self.dfas.keys())
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         names.sort()
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         names.remove(self.startsymbol)
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         names.insert(0, self.startsymbol)
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         for name in names:
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             i = 256 + len(c.symbol2number)
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             c.symbol2number[name] = i
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             c.number2symbol[i] = name
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         for name in names:
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             dfa = self.dfas[name]
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             states = []
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             for state in dfa:
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |                 arcs = []
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |                 for label, next in sorted(state.arcs.items()):
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |                     arcs.append((self.make_label(c, label), dfa.index(next)))
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |                 if state.isfinal:
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |                     arcs.append((0, dfa.index(state)))
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |                 states.append(arcs)
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             c.states.append(states)
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         c.start = c.symbol2number[self.startsymbol]
0024 0024 0000 0000 0000 0000 05 06 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         return c
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0008 0008 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |     def make_first(self, c: PgenGrammar, name: str) -> Dict[int, int]:
0008 0008 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         rawfirst = self.first[name]
0008 0008 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         assert rawfirst is not None
0008 0008 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         first = {}
0008 0008 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         for label in sorted(rawfirst):
0008 0008 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             ilabel = self.make_label(c, label)
0008 0008 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             ##assert ilabel not in first # XXX failed on <> ... !=
0008 0008 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             first[ilabel] = 1
0008 0008 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         return first
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |     def make_label(self, c: PgenGrammar, label: str) -> int:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |         # XXX Maybe this should be a method on a subclass of converter?
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |         ilabel = len(c.labels)
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |         if label[0].isalpha():
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |             # Either a symbol name or a named token
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |             if label in c.symbol2number:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 # A symbol name (a non-terminal)
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 if label in c.symbol2label:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     return c.symbol2label[label]
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 else:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     c.labels.append((c.symbol2number[label], None))
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     c.symbol2label[label] = ilabel
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     return ilabel
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |             else:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 # A named token (NAME, NUMBER, STRING)
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 itoken = getattr(token, label, None)
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 assert isinstance(itoken, int), label
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 assert itoken in token.tok_name, label
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 if itoken in c.tokens:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     return c.tokens[itoken]
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 else:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     c.labels.append((itoken, None))
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     c.tokens[itoken] = ilabel
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     return ilabel
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |         else:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |             # Either a keyword or an operator
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |             assert label[0] in ('"', "'"), label
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |             value = eval(label)
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |             if value[0].isalpha():
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 if label[0] == '"':
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     keywords = c.soft_keywords
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 else:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     keywords = c.keywords
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 | 
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 # A keyword
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 if value in keywords:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     return keywords[value]
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 else:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     c.labels.append((token.NAME, value))
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     keywords[value] = ilabel
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     return ilabel
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |             else:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 # An operator (any non-numeric token)
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 itoken = grammar.opmap[value]  # Fails if unknown token
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 if itoken in c.tokens:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     return c.tokens[itoken]
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                 else:
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     c.labels.append((itoken, None))
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     c.tokens[itoken] = ilabel
0042 0042 0008 0000 0001 0007 05 09 002 012 008 016 014 024 0091.38 0121.84 0001.33 |                     return ilabel
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0006 0006 0001 0000 0000 0001 05 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |     def addfirstsets(self) -> None:
0006 0006 0001 0000 0000 0001 05 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         names = list(self.dfas.keys())
0006 0006 0001 0000 0000 0001 05 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         names.sort()
0006 0006 0001 0000 0000 0001 05 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         for name in names:
0006 0006 0001 0000 0000 0001 05 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             if name not in self.first:
0006 0006 0001 0000 0000 0001 05 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |                 self.calcfirst(name)
0006 0006 0001 0000 0000 0001 05 -- --- --- --- --- --- --- ------- ------- ------- |             # print name, self.first[name].keys()
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |     def calcfirst(self, name: str) -> None:
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |         dfa = self.dfas[name]
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |         self.first[name] = None  # dummy to detect left recursion
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |         state = dfa[0]
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |         totalset: Dict[str, int] = {}
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |         overlapcheck = {}
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |         for label in state.arcs:
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |             if label in self.dfas:
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                 if label in self.first:
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                     fset = self.first[label]
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                     if fset is None:
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                         raise ValueError("recursion for rule %r" % name)
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                 else:
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                     self.calcfirst(label)
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                     fset = self.first[label]
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                     assert fset is not None
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                 totalset.update(fset)
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                 overlapcheck[label] = fset
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |             else:
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                 totalset[label] = 1
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                 overlapcheck[label] = {label: 1}
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |         inverse: Dict[str, str] = {}
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |         for label, itsfirst in overlapcheck.items():
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |             for symbol in itsfirst:
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                 if symbol in inverse:
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                     raise ValueError(
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                         "rule %s is ambiguous; %s is in the first sets of %s as well"
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                         " as %s" % (name, symbol, label, inverse[symbol])
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                     )
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |                 inverse[symbol] = label
0031 0031 0001 0000 0000 0000 05 08 004 011 007 014 015 021 0082.04 0208.84 0002.55 |         self.first[name] = totalset
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |     def parse(self) -> Tuple[Dict[str, List["DFAState"]], str]:
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |         dfas = {}
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |         startsymbol: Optional[str] = None
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |         # MSTART: (NEWLINE | RULE)* ENDMARKER
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |         while self.type != token.ENDMARKER:
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             while self.type == token.NEWLINE:
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |                 self.gettoken()
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             # RULE: NAME ':' RHS NEWLINE
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             name = self.expect(token.NAME)
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             self.expect(token.OP, ":")
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             a, z = self.parse_rhs()
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             self.expect(token.NEWLINE)
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             # self.dump_nfa(name, a, z)
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             dfa = self.make_dfa(a, z)
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             # self.dump_dfa(name, dfa)
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             # oldlen = len(dfa)
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             self.simplify_dfa(dfa)
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             # newlen = len(dfa)
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             dfas[name] = dfa
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             # print name, oldlen, newlen
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |             if startsymbol is None:
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |                 startsymbol = name
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |         assert startsymbol is not None
0018 0017 0007 0000 0000 0007 05 04 004 005 004 008 009 012 0038.04 0121.73 0003.20 |         return dfas, startsymbol
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |     def make_dfa(self, start: "NFAState", finish: "NFAState") -> List["DFAState"]:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         # To turn an NFA into a DFA, we define the states of the DFA
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         # to correspond to *sets* of states of the NFA.  Then do some
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         # state reduction.  Let's represent sets as dicts with 1 for
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         # values.
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         assert isinstance(start, NFAState)
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         assert isinstance(finish, NFAState)
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 | 
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         def closure(state: NFAState) -> Dict[NFAState, int]:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             base: Dict[NFAState, int] = {}
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             addclosure(state, base)
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             return base
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 | 
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         def addclosure(state: NFAState, base: Dict[NFAState, int]) -> None:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             assert isinstance(state, NFAState)
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             if state in base:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                 return
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             base[state] = 1
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             for label, next in state.arcs:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                 if label is None:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                     addclosure(next, base)
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 | 
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         states = [DFAState(closure(start), finish)]
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         for state in states:  # NB states grows while we're iterating
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             arcs: Dict[str, Dict[NFAState, int]] = {}
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             for nfastate in state.nfaset:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                 for label, next in nfastate.arcs:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                     if label is not None:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                         addclosure(next, arcs.setdefault(label, {}))
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |             for label, nfaset in sorted(arcs.items()):
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                 for st in states:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                     if st.nfaset == nfaset:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                         break
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                 else:
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                     st = DFAState(nfaset, finish)
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                     states.append(st)
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |                 state.addarc(st, label)
0033 0031 0006 0000 0003 0004 05 09 004 007 004 008 011 012 0041.51 0094.89 0002.29 |         return states  # List of DFAState instances; first one is start
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |     def dump_nfa(self, name: str, start: "NFAState", finish: "NFAState") -> None:
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |         print("Dump of NFA for", name)
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |         todo = [start]
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |         for i, state in enumerate(todo):
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |             print("  State", i, state is finish and "(final)" or "")
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |             for label, next in state.arcs:
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |                 if next in todo:
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |                     j = todo.index(next)
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |                 else:
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |                     j = len(todo)
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |                     todo.append(next)
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |                 if label is None:
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |                     print("    -> %d" % j)
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |                 else:
0015 0015 0000 0000 0000 0000 05 07 005 014 007 014 019 021 0089.21 0223.02 0002.50 |                     print("    %s -> %d" % (label, j))
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0006 0006 0000 0000 0000 0000 05 05 003 006 003 006 009 009 0028.53 0042.79 0001.50 |     def dump_dfa(self, name: str, dfa: Sequence["DFAState"]) -> None:
0006 0006 0000 0000 0000 0000 05 05 003 006 003 006 009 009 0028.53 0042.79 0001.50 |         print("Dump of DFA for", name)
0006 0006 0000 0000 0000 0000 05 05 003 006 003 006 009 009 0028.53 0042.79 0001.50 |         for i, state in enumerate(dfa):
0006 0006 0000 0000 0000 0000 05 05 003 006 003 006 009 009 0028.53 0042.79 0001.50 |             print("  State", i, state.isfinal and "(final)" or "")
0006 0006 0000 0000 0000 0000 05 05 003 006 003 006 009 009 0028.53 0042.79 0001.50 |             for label, next in sorted(state.arcs.items()):
0006 0006 0000 0000 0000 0000 05 05 003 006 003 006 009 009 0028.53 0042.79 0001.50 |                 print("    %s -> %d" % (label, dfa.index(next)))
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |     def simplify_dfa(self, dfa: List["DFAState"]) -> None:
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         # This is not theoretically optimal, but works well enough.
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         # Algorithm: repeatedly look for two states that have the same
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         # set of arcs (same labels pointing to the same nodes) and
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         # unify them, until things stop changing.
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 | 
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         # dfa is a list of DFAState instances
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         changes = True
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         while changes:
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             changes = False
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             for i, state_i in enumerate(dfa):
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                 for j in range(i + 1, len(dfa)):
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                     state_j = dfa[j]
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                     if state_i == state_j:
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                         # print "  unify", i, j
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                         del dfa[j]
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                         for state in dfa:
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                             state.unifystate(state_j, state_i)
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                         changes = True
0013 0013 0006 0000 0001 0006 05 06 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                         break
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |     def parse_rhs(self) -> Tuple["NFAState", "NFAState"]:
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |         # RHS: ALT ('|' ALT)*
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |         a, z = self.parse_alt()
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |         if self.value != "|":
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |             return a, z
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |         else:
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |             aa = NFAState()
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |             zz = NFAState()
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |             aa.addarc(a)
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |             z.addarc(zz)
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |             while self.value == "|":
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |                 self.gettoken()
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |                 a, z = self.parse_alt()
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |                 aa.addarc(a)
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |                 z.addarc(zz)
0015 0015 0001 0000 0000 0001 05 03 002 002 002 004 004 006 0012.00 0024.00 0002.00 |             return aa, zz
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0007 0007 0001 0000 0000 0001 05 03 002 006 003 006 008 009 0027.00 0027.00 0001.00 |     def parse_alt(self) -> Tuple["NFAState", "NFAState"]:
0007 0007 0001 0000 0000 0001 05 03 002 006 003 006 008 009 0027.00 0027.00 0001.00 |         # ALT: ITEM+
0007 0007 0001 0000 0000 0001 05 03 002 006 003 006 008 009 0027.00 0027.00 0001.00 |         a, b = self.parse_item()
0007 0007 0001 0000 0000 0001 05 03 002 006 003 006 008 009 0027.00 0027.00 0001.00 |         while self.value in ("(", "[") or self.type in (token.NAME, token.STRING):
0007 0007 0001 0000 0000 0001 05 03 002 006 003 006 008 009 0027.00 0027.00 0001.00 |             c, d = self.parse_item()
0007 0007 0001 0000 0000 0001 05 03 002 006 003 006 008 009 0027.00 0027.00 0001.00 |             b.addarc(c)
0007 0007 0001 0000 0000 0001 05 03 002 006 003 006 008 009 0027.00 0027.00 0001.00 |             b = d
0007 0007 0001 0000 0000 0001 05 03 002 006 003 006 008 009 0027.00 0027.00 0001.00 |         return a, b
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |     def parse_item(self) -> Tuple["NFAState", "NFAState"]:
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |         # ITEM: '[' RHS ']' | ATOM ['+' | '*']
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |         if self.value == "[":
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             self.gettoken()
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             a, z = self.parse_rhs()
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             self.expect(token.OP, "]")
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             a.addarc(z)
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             return a, z
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |         else:
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             a, z = self.parse_atom()
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             value = self.value
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             if value not in ("+", "*"):
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |                 return a, z
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             self.gettoken()
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             z.addarc(a)
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             if value == "+":
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |                 return a, z
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |             else:
0018 0018 0001 0000 0000 0001 05 04 002 004 003 006 006 009 0023.26 0034.90 0001.50 |                 return a, a
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |     def parse_atom(self) -> Tuple["NFAState", "NFAState"]:
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         # ATOM: '(' RHS ')' | NAME | STRING
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         if self.value == "(":
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             self.gettoken()
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             a, z = self.parse_rhs()
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             self.expect(token.OP, ")")
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             return a, z
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         elif self.type in (token.NAME, token.STRING):
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             a = NFAState()
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             z = NFAState()
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             a.addarc(z, self.value)
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             self.gettoken()
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             return a, z
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         else:
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             self.raise_error(
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                 "expected (...) or NAME or STRING, got %s/%s", self.type, self.value
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             )
0015 0017 0001 0000 0000 0001 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             raise AssertionError
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0006 0008 0000 0000 0000 0000 05 04 004 007 005 010 011 015 0051.89 0148.26 0002.86 |     def expect(self, type: int, value: Optional[Any] = None) -> str:
0006 0008 0000 0000 0000 0000 05 04 004 007 005 010 011 015 0051.89 0148.26 0002.86 |         if self.type != type or (value is not None and self.value != value):
0006 0008 0000 0000 0000 0000 05 04 004 007 005 010 011 015 0051.89 0148.26 0002.86 |             self.raise_error(
0006 0008 0000 0000 0000 0000 05 04 004 007 005 010 011 015 0051.89 0148.26 0002.86 |                 "expected %s/%s, got %s/%s", type, value, self.type, self.value
0006 0008 0000 0000 0000 0000 05 04 004 007 005 010 011 015 0051.89 0148.26 0002.86 |             )
0006 0008 0000 0000 0000 0000 05 04 004 007 005 010 011 015 0051.89 0148.26 0002.86 |         value = self.value
0006 0008 0000 0000 0000 0000 05 04 004 007 005 010 011 015 0051.89 0148.26 0002.86 |         self.gettoken()
0006 0008 0000 0000 0000 0000 05 04 004 007 005 010 011 015 0051.89 0148.26 0002.86 |         return value
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0005 0005 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |     def gettoken(self) -> None:
0005 0005 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         tup = next(self.generator)
0005 0005 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         while tup[0] in (tokenize.COMMENT, tokenize.NL):
0005 0005 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             tup = next(self.generator)
0005 0005 0001 0000 0000 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         self.type, self.value, self.begin, self.end, self.line = tup
0005 0005 0001 0000 0000 0001 05 -- --- --- --- --- --- --- ------- ------- ------- |         # print token.tok_name[self.type], repr(self.value)
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | 
0007 0007 0000 0000 0000 0000 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |     def raise_error(self, msg: str, *args: Any) -> NoReturn:
0007 0007 0000 0000 0000 0000 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         if args:
0007 0007 0000 0000 0000 0000 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             try:
0007 0007 0000 0000 0000 0000 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                 msg = msg % args
0007 0007 0000 0000 0000 0000 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |             except Exception:
0007 0007 0000 0000 0000 0000 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |                 msg = " ".join([msg] + list(map(str, args)))
0007 0007 0000 0000 0000 0000 05 03 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         raise SyntaxError(msg, (self.filename, self.end[0], self.end[1], self.line))
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- 02 -- --- --- --- --- --- --- ------- ------- ------- | class NFAState:
---- ---- ---- ---- ---- ---- ---- 02 -- --- --- --- --- --- --- ------- ------- ------- |     arcs: List[Tuple[Optional[str], "NFAState"]]
---- ---- ---- ---- ---- ---- ---- 02 -- --- --- --- --- --- --- ------- ------- ------- | 
0002 0002 0001 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 |     def __init__(self) -> None:
0002 0002 0001 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 |         self.arcs = []  # list of (label, NFAState) pairs
---- ---- ---- ---- ---- ---- ---- 02 -- --- --- --- --- --- --- ------- ------- ------- | 
0004 0004 0000 0000 0000 0000 02 01 002 004 002 004 006 006 0015.51 0015.51 0001.00 |     def addarc(self, next: "NFAState", label: Optional[str] = None) -> None:
0004 0004 0000 0000 0000 0000 02 01 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         assert label is None or isinstance(label, str)
0004 0004 0000 0000 0000 0000 02 01 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         assert isinstance(next, NFAState)
0004 0004 0000 0000 0000 0000 02 01 002 004 002 004 006 006 0015.51 0015.51 0001.00 |         self.arcs.append((label, next))
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | class DFAState:
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- |     nfaset: Dict[NFAState, Any]
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- |     isfinal: bool
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- |     arcs: Dict[str, "DFAState"]
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | 
0007 0007 0001 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |     def __init__(self, nfaset: Dict[NFAState, Any], final: NFAState) -> None:
0007 0007 0001 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         assert isinstance(nfaset, dict)
0007 0007 0001 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         assert isinstance(next(iter(nfaset)), NFAState)
0007 0007 0001 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         assert isinstance(final, NFAState)
0007 0007 0001 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         self.nfaset = nfaset
0007 0007 0001 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         self.isfinal = final in nfaset
0007 0007 0001 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         self.arcs = {}  # map from label to DFAState
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | 
0005 0005 0000 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |     def addarc(self, next: "DFAState", label: str) -> None:
0005 0005 0000 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         assert isinstance(label, str)
0005 0005 0000 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         assert label not in self.arcs
0005 0005 0000 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         assert isinstance(next, DFAState)
0005 0005 0000 0000 0000 0000 03 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         self.arcs[label] = next
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | 
0004 0004 0000 0000 0000 0000 03 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |     def unifystate(self, old: "DFAState", new: "DFAState") -> None:
0004 0004 0000 0000 0000 0000 03 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |         for label, next in self.arcs.items():
0004 0004 0000 0000 0000 0000 03 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |             if next is old:
0004 0004 0000 0000 0000 0000 03 03 001 002 001 002 003 003 0004.75 0002.38 0000.50 |                 self.arcs[label] = new
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | 
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |     def __eq__(self, other: Any) -> bool:
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |         # Equality test -- ignore the nfaset instance variable
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |         assert isinstance(other, DFAState)
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |         if self.isfinal != other.isfinal:
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |             return False
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |         # Can't just return self.arcs == other.arcs, because that
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |         # would invoke this method recursively, with cycles...
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |         if len(self.arcs) != len(other.arcs):
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |             return False
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |         for label, next in self.arcs.items():
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |             if next is not other.arcs.get(label):
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |                 return False
0010 0010 0003 0000 0000 0003 03 05 002 005 003 006 007 009 0025.27 0030.32 0001.20 |         return True
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- |     __hash__: Any = None  # For Py3 compatibility.
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | 
0003 0003 0000 0000 0000 0000 -- 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | def generate_grammar(filename: Path = "Grammar.txt") -> PgenGrammar:
0003 0003 0000 0000 0000 0000 -- 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 |     p = ParserGenerator(filename)
0003 0003 0000 0000 0000 0000 -- 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 |     return p.make_grammar()