X Tutup
Skip to content

Commit b6dee24

Browse files
committed
First step towards managing control flow decoding
1 parent 4f83a87 commit b6dee24

File tree

5 files changed

+79
-15
lines changed

5 files changed

+79
-15
lines changed

pytest/test_grammar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def check_tokens(tokens, opcode_set):
3131
assert expect_right_recursive == right_recursive
3232
s = get_scanner(PYTHON_VERSION, IS_PYPY)
3333
ignore_set = set(
34-
"""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM
34+
"""JUMP_BACK CONTINUE RETURN_END_IF COME_FROM COME_FROM_EXCEPT
3535
LOAD_GENEXPR LOAD_ASSERT LOAD_SETCOMP LOAD_DICTCOMP
3636
LAMBDA_MARKER RETURN_LAST
3737
""".split())

uncompyle6/parsers/parse3.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def p_grammar(self, args):
169169
# COME_FROM targets from the wrong places
170170
171171
trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
172-
try_middle _come_from
172+
try_middle opt_come_from_except
173173
174174
# this is nested inside a trystmt
175175
tryfinallystmt ::= SETUP_FINALLY suite_stmts_opt
@@ -187,8 +187,14 @@ def p_grammar(self, args):
187187
188188
try_middle ::= jmp_abs COME_FROM except_stmts
189189
END_FINALLY
190+
try_middle ::= jmp_abs COME_FROM_EXCEPT except_stmts
191+
END_FINALLY
192+
193+
# FIXME: remove this
190194
try_middle ::= JUMP_FORWARD COME_FROM except_stmts
191195
END_FINALLY COME_FROM
196+
try_middle ::= JUMP_FORWARD COME_FROM except_stmts
197+
END_FINALLY COME_FROM_EXCEPT
192198
193199
except_stmts ::= except_stmts except_stmt
194200
except_stmts ::= except_stmt
@@ -242,17 +248,25 @@ def p_grammar(self, args):
242248

243249
def p_misc3(self, args):
244250
"""
245-
try_middle ::= JUMP_FORWARD COME_FROM except_stmts END_FINALLY NOP COME_FROM
251+
try_middle ::= JUMP_FORWARD COME_FROM_EXCEPT except_stmts END_FINALLY COME_FROM
252+
246253
for_block ::= l_stmts
247254
iflaststmtl ::= testexpr c_stmts_opt
248255
iflaststmt ::= testexpr c_stmts_opt34
249256
c_stmts_opt34 ::= JUMP_BACK JUMP_ABSOLUTE c_stmts_opt
250257
"""
251258

252-
def p_jump3(self, args):
259+
def p_come_from3(self, args):
253260
"""
261+
opt_come_from_except ::= COME_FROM_EXCEPT
262+
opt_come_from_except ::= come_froms
263+
254264
come_froms ::= come_froms COME_FROM
255-
come_froms ::= COME_FROM
265+
come_froms ::=
266+
"""
267+
268+
def p_jump3(self, args):
269+
"""
256270
jmp_false ::= POP_JUMP_IF_FALSE
257271
jmp_true ::= POP_JUMP_IF_TRUE
258272

uncompyle6/scanners/controlflow.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""
2+
Detect control flow as much as possible.
3+
The basic idea here is to put in explicit end instructions that make
4+
grammar parsing simpler and more precise.
5+
"""
6+
7+
from collections import namedtuple
8+
from xdis.bytecode import Bytecode
9+
10+
control_flow_start = namedtuple('control_flow_start', ['name', 'type', 'offset'])
11+
control_flow_end = namedtuple('control_flow_end', ['name', 'type', 'offset'])
12+
control_flow_pair = namedtuple('control_flow_pair', ['name', 'start_offset', 'end_offset'])
13+
14+
15+
class ControlFlow():
16+
def __init__(self, scanner):
17+
self.scanner = scanner
18+
self.opc = self.scanner.opc
19+
self.setup_ops = self.scanner.setup_ops
20+
self.op_range = self.scanner.op_range
21+
22+
# Control-flow nesting
23+
self.offset_action = {}
24+
self.cf_end = []
25+
26+
def detect_control_flow(self, co):
27+
self.bytecode = Bytecode(co, self.opc)
28+
for inst in self.bytecode:
29+
if inst.opcode in self.setup_ops:
30+
# Use part after SETUP_
31+
name = inst.opname[len('SETUP_'):]
32+
self.offset_action[inst.offset] = control_flow_start(name, 'start', inst.offset)
33+
self.offset_action[inst.argval] = control_flow_end(name, 'end', inst.offset)
34+
pass
35+
pass
36+
# import pprint
37+
# pp = pprint.PrettyPrinter(indent=4)
38+
# pp.pprint(self.offset_action)
39+
40+
return self.offset_action

uncompyle6/scanners/scanner3.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from xdis.code import iscode
3030
from xdis.bytecode import Bytecode
3131
from uncompyle6.scanner import Token, parse_fn_counts
32+
from uncompyle6.scanners.controlflow import ControlFlow
3233

3334
# Get all the opcodes into globals
3435
import xdis.opcodes.opcode_33 as op3
@@ -102,6 +103,11 @@ def __init__(self, version, show_asm=None, is_pypy=False):
102103
varargs_ops.add(self.opc.CALL_METHOD)
103104
self.varargs_ops = frozenset(varargs_ops)
104105

106+
self.setup_ops = frozenset([
107+
self.opc.SETUP_LOOP,
108+
self.opc.SETUP_EXCEPT, self.opc.SETUP_FINALLY,
109+
self.opc.SETUP_WITH])
110+
105111
# Not really a set, but still clasification-like
106112
self.statement_opcode_sequences = [
107113
(self.opc.POP_JUMP_IF_FALSE, self.opc.JUMP_FORWARD),
@@ -127,7 +133,7 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
127133
"""
128134

129135
show_asm = self.show_asm if not show_asm else show_asm
130-
# show_asm = 'both'
136+
# show_asm = 'after'
131137
if show_asm in ('both', 'before'):
132138
bytecode = Bytecode(co, self.opc)
133139
for instr in bytecode.get_instructions(co):
@@ -179,13 +185,24 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None):
179185
# Format: {target offset: [jump offsets]}
180186
jump_targets = self.find_jump_targets()
181187

188+
offset_action = ControlFlow(self).detect_control_flow(co)
182189
for inst in bytecode:
183190

184191
argval = inst.argval
185192
if inst.offset in jump_targets:
186193
jump_idx = 0
187194
for jump_offset in jump_targets[inst.offset]:
188-
tokens.append(Token('COME_FROM', None, repr(jump_offset),
195+
come_from_name = 'COME_FROM'
196+
if (inst.offset in offset_action
197+
and offset_action[inst.offset].type == 'end'
198+
# Adjust the grammar and remove the below
199+
and offset_action[inst.offset].name in ['EXCEPT']
200+
):
201+
come_from_name = '%s_%s' % (
202+
(come_from_name, offset_action[inst.offset].name))
203+
pass
204+
tokens.append(Token(come_from_name,
205+
None, repr(jump_offset),
189206
offset='%s_%s' % (inst.offset, jump_idx),
190207
has_arg = True, opc=self.opc))
191208
jump_idx += 1

uncompyle6/scanners/tok.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,7 @@ def __str__(self):
6161
if self.pattr:
6262
pattr = self.pattr
6363
if self.opc:
64-
if self.op in self.opc.hasjrel:
65-
pattr = "to " + self.pattr
66-
elif self.op in self.opc.hasjabs:
67-
self.pattr= str(self.pattr)
68-
if not self.pattr.startswith('to '):
69-
pattr = "to " + str(self.pattr)
70-
pass
71-
elif self.op in self.opc.hascompare:
64+
if self.op in self.opc.hascompare:
7265
if isinstance(self.attr, int):
7366
pattr = self.opc.cmp_op[self.attr]
7467
# And so on. See xdis/bytecode.py get_instructions_bytes

0 commit comments

Comments
 (0)
X Tutup