src/blib2to3/pgen2/parse.py
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | # Licensed to PSF under a Contributor Agreement.
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | """Parser engine for the grammar tables generated by pgen.
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | The grammar table must be loaded first.
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | See Parser/parser.c in the Python distribution for additional info on
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | how this parsing engine works.
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | """
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | from contextlib import contextmanager
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | from typing import (
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | TYPE_CHECKING,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Any,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Callable,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Dict,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Iterator,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | List,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Optional,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Set,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Tuple,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Union,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | cast,
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | )
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | from blib2to3.pgen2.grammar import Grammar
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | from blib2to3.pytree import NL, Context, Leaf, Node, RawNode, convert
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | # Local imports
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | from . import grammar, token, tokenize
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | if TYPE_CHECKING:
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | from blib2to3.pgen2.driver import TokenProxy
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Results = Dict[str, NL]
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]]
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | DFA = List[List[Tuple[int, int]]]
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | DFAS = Tuple[DFA, Dict[int, int]]
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
0006 0003 0003 0001 0000 0002 0001 -- 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 | def lam_sub(grammar: Grammar, node: RawNode) -> NL:
0006 0003 0003 0001 0000 0002 0001 -- 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 | assert node[3] is not None
0006 0003 0003 0001 0000 0002 0001 -- 01 001 002 001 002 003 003 0004.75 0002.38 0000.50 | return Node(type=node[0], children=node[3], context=node[2])
0006 0003 0003 0001 0000 0002 0001 -- -- --- --- --- --- --- --- ------- ------- ------- |
0006 0003 0003 0001 0000 0002 0001 -- -- --- --- --- --- --- --- ------- ------- ------- |
0006 0003 0003 0001 0000 0002 0001 -- -- --- --- --- --- --- --- ------- ------- ------- | # A placeholder node, used when parser is backtracking.
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- | DUMMY_NODE = (-1, None, None, None)
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
0005 0003 0004 0000 0000 0000 0001 -- 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | def stack_copy(
0005 0003 0004 0000 0000 0000 0001 -- 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | stack: List[Tuple[DFAS, int, RawNode]]
0005 0003 0004 0000 0000 0000 0001 -- 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | ) -> List[Tuple[DFAS, int, RawNode]]:
0005 0003 0004 0000 0000 0000 0001 -- 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | """Nodeless stack copy."""
0005 0003 0004 0000 0000 0000 0001 -- 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | return [(dfa, label, DUMMY_NODE) for dfa, label, _ in stack]
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | class Recorder:
0008 0009 0007 0001 0000 0001 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None:
0008 0009 0007 0001 0000 0001 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.parser = parser
0008 0009 0007 0001 0000 0001 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self._ilabels = ilabels
0008 0009 0007 0001 0000 0001 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.context = context # not really matter
0008 0009 0007 0001 0000 0001 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 |
0008 0009 0007 0001 0000 0001 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self._dead_ilabels: Set[int] = set()
0008 0009 0007 0001 0000 0001 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self._start_point = self.parser.stack
0008 0009 0007 0001 0000 0001 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels}
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | @property
0003 0003 0003 0000 0000 0000 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | def ilabels(self) -> Set[int]:
0003 0003 0003 0000 0000 0000 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | return self._dead_ilabels.symmetric_difference(self._ilabels)
0003 0003 0003 0000 0000 0000 0000 03 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | @contextmanager
0010 0010 0010 0000 0000 0000 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | def switch_to(self, ilabel: int) -> Iterator[None]:
0010 0010 0010 0000 0000 0000 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | with self.backtrack():
0010 0010 0010 0000 0000 0000 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.parser.stack = self._points[ilabel]
0010 0010 0010 0000 0000 0000 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | try:
0010 0010 0010 0000 0000 0000 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | yield
0010 0010 0010 0000 0000 0000 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | except ParseError:
0010 0010 0010 0000 0000 0000 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self._dead_ilabels.add(ilabel)
0010 0010 0010 0000 0000 0000 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | finally:
0010 0010 0010 0000 0000 0000 0000 03 02 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.parser.stack = self._start_point
0010 0010 0010 0000 0000 0000 0000 03 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- | @contextmanager
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | def backtrack(self) -> Iterator[None]:
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | """
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | Use the node-level invariant ones for basic parsing operations (push/pop/shift).
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | These still will operate on the stack; but they won't create any new nodes, or
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | modify the contents of any other existing nodes.
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 |
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | This saves us a ton of time when we are backtracking, since we
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | want to restore to the initial state as quick as possible, which
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | can only be done by having as little mutatations as possible.
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | """
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | is_backtracking = self.parser.is_backtracking
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | try:
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.parser.is_backtracking = True
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | yield
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | finally:
0017 0009 0008 0000 0008 0001 0000 03 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.parser.is_backtracking = is_backtracking
0017 0009 0008 0000 0008 0001 0000 03 -- --- --- --- --- --- --- ------- ------- ------- |
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | def add_token(self, tok_type: int, tok_val: str, raw: bool = False) -> None:
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | func: Callable[..., Any]
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | if raw:
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | func = self.parser._addtoken
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | else:
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | func = self.parser.addtoken
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 |
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | for ilabel in self.ilabels:
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | with self.switch_to(ilabel):
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | args = [tok_type, tok_val, self.context]
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | if raw:
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | args.insert(0, ilabel)
0013 0013 0012 0000 0000 0001 0000 03 04 000 000 000 000 000 000 0000.00 0000.00 0000.00 | func(*args)
---- ---- ---- ---- ---- ---- ---- 03 -- --- --- --- --- --- --- ------- ------- ------- |
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | def determine_route(
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | self, value: Optional[str] = None, force: bool = False
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | ) -> Optional[int]:
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | alive_ilabels = self.ilabels
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | if len(alive_ilabels) == 0:
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | *_, most_successful_ilabel = self._dead_ilabels
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | raise ParseError("bad input", most_successful_ilabel, value, self.context)
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 |
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | ilabel, *rest = alive_ilabels
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | if force or not rest:
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | return ilabel
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | else:
0013 0010 0012 0000 0000 0001 0000 03 04 003 005 003 005 008 008 0024.00 0036.00 0001.50 | return None
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 02 -- --- --- --- --- --- --- ------- ------- ------- | class ParseError(Exception):
---- ---- ---- ---- ---- ---- ---- 02 -- --- --- --- --- --- --- ------- ------- ------- | """Exception to signal the parser is stuck."""
---- ---- ---- ---- ---- ---- ---- 02 -- --- --- --- --- --- --- ------- ------- ------- |
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | def __init__(
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self, msg: str, type: Optional[int], value: Optional[str], context: Context
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | ) -> None:
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | Exception.__init__(
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self, f"{msg}: type={type!r}, value={value!r}, context={context!r}"
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | )
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.msg = msg
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.type = type
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.value = value
0010 0006 0010 0000 0000 0000 0000 02 01 000 000 000 000 000 000 0000.00 0000.00 0000.00 | self.context = context
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- -- -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | class Parser:
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | """Parser engine.
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | The proper usage sequence is:
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | p = Parser(grammar, [converter]) # create instance
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | p.setup([start]) # prepare for parsing
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | <for each input token>:
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | if p.addtoken(...): # parse a token; may raise ParseError
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | break
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | root = p.rootnode # root of abstract syntax tree
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | A Parser instance may be reused by calling setup() repeatedly.
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | A Parser instance contains state pertaining to the current token
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | sequence, and should not be used concurrently by different threads
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | to parse separate token sequences.
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | See driver.py for how to get input tokens by tokenizing a file or
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | string.
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | Parsing is complete when addtoken() returns True; the root of the
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | abstract syntax tree can then be retrieved from the rootnode
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | instance variable. When a syntax error occurs, addtoken() raises
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | the ParseError exception. There is no error recovery; the parser
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | cannot be used after a syntax error was reported (but it can be
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | reinitialized by calling setup()).
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- | """
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | """Constructor.
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | The grammar argument is a grammar.Grammar instance; see the
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | grammar module for more information.
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | The parser is not ready yet for parsing; you must call the
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | setup() method to get it started.
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | The optional convert argument is a function mapping concrete
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | syntax tree nodes to abstract syntax tree nodes. If not
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | given, no conversion is done and the syntax tree produced is
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | the concrete syntax tree. If given, it must be a function of
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | two arguments, the first being the grammar (a grammar.Grammar
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | instance), and the second being the concrete syntax tree node
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | to be converted. The syntax tree is converted from the bottom
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | up.
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | **post-note: the convert argument is ignored since for Black's
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | usage, convert will always be blib2to3.pytree.convert. Allowing
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | this to be dynamic hurts mypyc's ability to use early binding.
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | These docs are left for historical and informational value.
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | A concrete syntax tree node is a (type, value, context, nodes)
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | tuple, where type is the node type (a token or symbol number),
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | value is None for symbols and a string for tokens, context is
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | None or an opaque value used for error reporting (typically a
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | (lineno, offset) pair), and nodes is a list of children for
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | symbols, and None for tokens.
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | An abstract syntax tree node may be anything; this is entirely
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | up to the converter function.
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | """
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | self.grammar = grammar
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | # See note in docstring above. TL;DR this is ignored.
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | self.convert = convert or lam_sub
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | self.is_backtracking = False
0039 0007 0005 0001 0026 0007 0001 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | self.last_token: Optional[int] = None
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | """Prepare for parsing.
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | This *must* be called before starting to parse.
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | The optional argument is an alternative start symbol; it
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | defaults to the grammar's start symbol.
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | You can use a Parser instance to parse any number of programs;
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | each time you call setup() the parser is reset to an initial
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | state determined by the (implicit or explicit) start symbol.
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 |
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | """
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | if start is None:
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | start = self.grammar.start
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | # Each stack entry is a tuple: (dfa, state, node).
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | # A node is a tuple: (type, value, context, children),
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | # where children is a list of nodes or None, and context may be None.
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | newnode: RawNode = (start, None, None, [])
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | stackentry = (self.grammar.dfas[start], 0, newnode)
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry]
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | self.rootnode: Optional[NL] = None
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | self.used_names: Set[str] = set()
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | self.proxy = proxy
0025 0015 0010 0003 0008 0004 0003 05 02 001 002 001 002 003 003 0004.75 0002.38 0000.50 | self.last_token = None
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | def addtoken(self, type: int, value: str, context: Context) -> bool:
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | """Add a token; return True iff this is the end of the program."""
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # Map from token to label
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | ilabels = self.classify(type, value, context)
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | assert len(ilabels) >= 1
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # If we have only one state to advance, we'll directly
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # take it as is.
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | if len(ilabels) == 1:
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | [ilabel] = ilabels
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | return self._addtoken(ilabel, type, value, context)
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # If there are multiple states which we can advance (only
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # happen under soft-keywords), then we will try all of them
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # in parallel and as soon as one state can reach further than
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # the rest, we'll choose that one. This is a pretty hacky
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # and hopefully temporary algorithm.
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | #
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # For a more detailed explanation, check out this post:
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | # https://tree.science/what-the-backtracking.html
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | with self.proxy.release() as proxy:
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | counter, force = 0, False
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | recorder = Recorder(self, ilabels, context)
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | recorder.add_token(type, value, raw=True)
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | next_token_value = value
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | while recorder.determine_route(next_token_value) is None:
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | if not proxy.can_advance(counter):
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | force = True
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | break
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | next_token_type, next_token_value, *_ = proxy.eat(counter)
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | if next_token_type in (tokenize.COMMENT, tokenize.NL):
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | counter += 1
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | continue
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | if next_token_type == tokenize.OP:
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | next_token_type = grammar.opmap[next_token_value]
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | recorder.add_token(next_token_type, next_token_value)
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | counter += 1
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | ilabel = cast(int, recorder.determine_route(next_token_value, force=force))
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | assert ilabel is not None
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 |
0047 0027 0026 0011 0000 0009 0012 05 06 007 011 009 017 018 026 0108.42 0586.44 0005.41 | return self._addtoken(ilabel, type, value, context)
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | def _addtoken(self, ilabel: int, type: int, value: str, context: Context) -> bool:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # Loop until the token is shifted; may raise exceptions
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | while True:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | dfa, state, node = self.stack[-1]
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | states, first = dfa
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | arcs = states[state]
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # Look for a state with this label
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | for i, newstate in arcs:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | t = self.grammar.labels[i][0]
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | if t >= 256:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # See if it's a symbol and if we're in its first set
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | itsdfa = self.grammar.dfas[t]
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | itsstates, itsfirst = itsdfa
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | if ilabel in itsfirst:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # Push a symbol
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | self.push(t, itsdfa, newstate, context)
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | break # To continue the outer while loop
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 |
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | elif ilabel == i:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # Look it up in the list of labels
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # Shift a token; we're done with it
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | self.shift(type, value, newstate, context)
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # Pop while we are in an accept-only state
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | state = newstate
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | while states[state] == [(0, state)]:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | self.pop()
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | if not self.stack:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # Done parsing!
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | return True
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | dfa, state, node = self.stack[-1]
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | states, first = dfa
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # Done with this token
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | self.last_token = type
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | return False
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 |
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | else:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | if (0, state) in arcs:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # An accepting state, pop it and try something else
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | self.pop()
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | if not self.stack:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # Done parsing, but another token is input
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | raise ParseError("too much input", type, value, context)
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | else:
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | # No success finding a transition
0045 0031 0031 0013 0000 0002 0012 05 11 005 011 009 014 016 023 0092.00 0292.73 0003.18 | raise ParseError("bad input", type, value, context)
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | def classify(self, type: int, value: str, context: Context) -> List[int]:
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | """Turn a token into a label. (Internal)
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 |
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | Depending on whether the value is a soft-keyword or not,
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | this function may return multiple labels to choose from."""
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | if type == token.NAME:
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | # Keep a listing of all used names
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | self.used_names.add(value)
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | # Check for reserved words
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | if value in self.grammar.keywords:
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | return [self.grammar.keywords[value]]
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | elif value in self.grammar.soft_keywords:
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | assert type in self.grammar.tokens
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | # Current soft keywords (match, case, type) can only appear at the
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | # beginning of a statement. So as a shortcut, don't try to treat them
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | # like keywords in any other context.
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | # ('_' is also a soft keyword in the real grammar, but for our grammar
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | # it's just an expression, so we don't need to treat it specially.)
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | if self.last_token not in (
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | None,
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | token.INDENT,
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | token.DEDENT,
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | token.NEWLINE,
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | token.SEMI,
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | token.COLON,
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | ):
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | return [self.grammar.tokens[type]]
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | return [
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | self.grammar.tokens[type],
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | self.grammar.soft_keywords[value],
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | ]
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 |
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | ilabel = self.grammar.tokens.get(type)
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | if ilabel is None:
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | raise ParseError("bad token", type, value, context)
0036 0015 0024 0007 0003 0002 0007 05 06 004 010 006 012 014 018 0068.53 0164.48 0002.40 | return [ilabel]
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | def shift(self, type: int, value: str, newstate: int, context: Context) -> None:
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | """Shift a token. (Internal)"""
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | if self.is_backtracking:
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | dfa, state, _ = self.stack[-1]
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | self.stack[-1] = (dfa, newstate, DUMMY_NODE)
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | else:
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | dfa, state, node = self.stack[-1]
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | rawnode: RawNode = (type, value, context, None)
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | newnode = convert(self.grammar, rawnode)
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | assert node[-1] is not None
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | node[-1].append(newnode)
0012 0013 0011 0000 0000 0000 0001 05 02 002 003 007 008 005 015 0034.83 0092.88 0002.67 | self.stack[-1] = (dfa, newstate, node)
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | """Push a nonterminal. (Internal)"""
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | if self.is_backtracking:
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | dfa, state, _ = self.stack[-1]
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | self.stack[-1] = (dfa, newstate, DUMMY_NODE)
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | self.stack.append((newdfa, 0, DUMMY_NODE))
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | else:
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | dfa, state, node = self.stack[-1]
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | newnode: RawNode = (type, None, context, [])
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | self.stack[-1] = (dfa, newstate, node)
0011 0012 0010 0000 0000 0000 0001 05 02 001 001 004 004 002 008 0008.00 0016.00 0002.00 | self.stack.append((newdfa, 0, newnode))
---- ---- ---- ---- ---- ---- ---- 05 -- --- --- --- --- --- --- ------- ------- ------- |
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | def pop(self) -> None:
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | """Pop a nonterminal. (Internal)"""
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | if self.is_backtracking:
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | self.stack.pop()
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | else:
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | popdfa, popstate, popnode = self.stack.pop()
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | newnode = convert(self.grammar, popnode)
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | if self.stack:
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | dfa, state, node = self.stack[-1]
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | assert node[-1] is not None
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | node[-1].append(newnode)
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | else:
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | self.rootnode = newnode
0014 0014 0013 0000 0000 0000 0001 05 03 002 003 004 005 005 009 0020.90 0034.83 0001.67 | self.rootnode.used_names = self.used_names