X Tutup
Skip to content

Commit bdd7df6

Browse files
committed
Python 2 loop scanner detection in Python 3
scanner*.py: Make scanner27 and scanner3 more aligned Makefile: we can run py.test on Python 3.5 HISTORY.md: grammar changes
1 parent 134b67d commit bdd7df6

File tree

6 files changed

+155
-60
lines changed

6 files changed

+155
-60
lines changed

HISTORY.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ supported.
6969
Next we get to ["uncompyle" and
7070
PyPI](https://pypi.python.org/pypi/uncompyle/1.1) and the era of
7171
public version control. (Dan's code although not public used
72-
[darcs](http://darcs.net/) for version control.
72+
[darcs](http://darcs.net/) for version control.)
7373

7474
In contrast to _decompyle_, _uncompyle_ at least in its final versions,
7575
runs only on Python 2.7. However it accepts bytecode back to Python
@@ -98,7 +98,7 @@ actively, if briefly, worked on. Also starting around 2012 is Dark
9898
Fenx's uncompyle3 which I used for inspiration for Python3.
9999

100100
I started working on this late 2015, mostly to add fragment support.
101-
In that decided to make this runnable on Python 3.2+ and Python 2.6+
101+
In that, I decided to make this runnable on Python 3.2+ and Python 2.6+
102102
while, handling Python bytecodes from Python versions 2.5+ and
103103
3.2+. (I think I could go back further, but I'd consider doing that
104104
only after code is better cleaned up and supports Python 3 better.)
@@ -116,7 +116,7 @@ Hartmut a decade an a half ago:
116116
NB. This is not a masterpiece of software, but became more like a hack.
117117
Probably a complete rewrite would be sensefull. hG/2000-12-27
118118

119-
This project deparses using a Early-algorithm parse with lots of
119+
This project deparses using an Early-algorithm parse with lots of
120120
massaging of tokens and the grammar in the scanner
121121
phase. Early-algorithm parsers are context free and tend to be linear
122122
if the grammar is LR or left recursive.

Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ check:
2323
@PYTHON_VERSION=`$(PYTHON) -V 2>&1 | cut -d ' ' -f 2 | cut -d'.' -f1,2`; \
2424
$(MAKE) check-$$PYTHON_VERSION
2525

26-
#: Tests for Python 2.7, 3.3 and 3.4
27-
check-2.7 check-3.3 check-3.4: pytest
26+
#: Tests for Python 2.7, 3.3, 3.4 and 3.5
27+
check-2.7 check-3.3 check-3.4 check-3.5: pytest
2828
$(MAKE) -C test $@
2929

30-
#: Tests for Python 3.5 - pytest doesn't work here
31-
check-3.2 check-3.5:
30+
#: Tests for Python 3.2 - pytest doesn't work here
31+
check-3.2:
3232
$(MAKE) -C test $@
3333

3434
#:Tests for Python 2.6 (doesn't have pytest)

uncompyle6/parsers/parse3.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def p_grammar(self, args):
243243
testtrue ::= expr jmp_true
244244
245245
_ifstmts_jump ::= return_if_stmts
246-
_ifstmts_jump ::= c_stmts_opt
246+
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
247247
248248
iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE
249249
@@ -594,7 +594,6 @@ def p_32(self, args):
594594
"""
595595
# Store locals is only used in Python 3.2
596596
designator ::= STORE_LOCALS
597-
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
598597
"""
599598

600599
class Python34Parser(Python3Parser):
@@ -614,6 +613,11 @@ def p_35on(self, args):
614613
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
615614
POP_BLOCK LOAD_CONST COME_FROM
616615
WITH_CLEANUP_START WITH_CLEANUP_FINISH END_FINALLY
616+
617+
# Python 3.5 has more loop optimization that removes
618+
# JUMP_FORWARD in some cases, and hence we also don't
619+
# see COME_FROM
620+
_ifstmts_jump ::= c_stmts_opt
617621
"""
618622

619623
class Python3ParserSingle(Python3Parser, PythonParserSingle):

uncompyle6/scanner.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
# Copyright (c) 1999 John Aycock
2-
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
1+
# Copyright (c) 2016 by Rocky Bernstein
32
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
3+
# Copyright (c) 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
4+
# Copyright (c) 1999 John Aycock
45
#
56
# See LICENSE
67
#

uncompyle6/scanners/scanner27.py

Lines changed: 71 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -44,43 +44,11 @@ def disassemble(self, co, classname=None, code_objects={}):
4444

4545
customize = {}
4646
Token = self.Token # shortcut
47-
self.code = array('B', co.co_code)
48-
49-
for i in self.op_range(0, len(self.code)):
50-
if self.code[i] in (RETURN_VALUE, END_FINALLY):
51-
n = i + 1
52-
self.code = array('B', co.co_code[:n])
53-
54-
self.prev = [0]
55-
# mapping addresses of instruction & argument
56-
for i in self.op_range(0, n):
57-
op = self.code[i]
58-
self.prev.append(i)
59-
if op >= HAVE_ARGUMENT:
60-
self.prev.append(i)
61-
self.prev.append(i)
62-
63-
self.lines = []
64-
linetuple = namedtuple('linetuple', ['l_no', 'next'])
65-
66-
j = 0
6747

68-
# linestarts is a tuple of (offset, line number).
69-
# Turn that in a has that we can index
70-
linestarts = list(dis.findlinestarts(co))
71-
linestartoffsets = {}
72-
for offset, lineno in linestarts:
73-
linestartoffsets[offset] = lineno
48+
n = self.setup_code(co)
49+
self.build_lines_data(co, n)
50+
self.build_prev_op(n)
7451

75-
(prev_start_byte, prev_line_no) = linestarts[0]
76-
for (start_byte, line_no) in linestarts[1:]:
77-
while j < start_byte:
78-
self.lines.append(linetuple(prev_line_no, start_byte))
79-
j += 1
80-
prev_line_no = start_byte
81-
while j < n:
82-
self.lines.append(linetuple(prev_line_no, n))
83-
j+=1
8452
# self.lines contains (block,addrLastInstr)
8553
if classname:
8654
classname = '_' + classname.lstrip('_') + '__'
@@ -104,7 +72,7 @@ def unmangle(name):
10472
if names[self.get_argument(i+3)] == 'AssertionError':
10573
self.load_asserts.add(i+3)
10674

107-
cf = self.find_jump_targets(self.code)
75+
cf = self.find_jump_targets()
10876
# contains (code, [addrRefToCode])
10977
last_stmt = self.next_stmt[0]
11078
i = self.next_stmt[last_stmt]
@@ -213,8 +181,8 @@ def unmangle(name):
213181
if offset in self.return_end_ifs:
214182
op_name = 'RETURN_END_IF'
215183

216-
if offset in linestartoffsets:
217-
linestart = linestartoffsets[offset]
184+
if offset in self.linestartoffsets:
185+
linestart = self.linestartoffsets[offset]
218186
else:
219187
linestart = None
220188

@@ -224,6 +192,63 @@ def unmangle(name):
224192
tokens.append(Token(replace[offset], oparg, pattr, offset, linestart))
225193
return tokens, customize
226194

195+
def setup_code(self, co):
196+
"""
197+
Creates Python-independent bytecode structure (byte array) in
198+
self.code and records previous instruction in self.prev
199+
The size of self.code is returned
200+
"""
201+
self.code = array('B', co.co_code)
202+
203+
n = -1
204+
for i in self.op_range(0, len(self.code)):
205+
if self.code[i] in (RETURN_VALUE, END_FINALLY):
206+
n = i + 1
207+
pass
208+
pass
209+
assert n > -1, "Didn't find RETURN_VALUE or END_FINALLY FINALLY"
210+
self.code = array('B', co.co_code[:n])
211+
212+
return n
213+
214+
def build_prev_op(self, n):
215+
self.prev = [0]
216+
# mapping addresses of instruction & argument
217+
for i in self.op_range(0, n):
218+
op = self.code[i]
219+
self.prev.append(i)
220+
if op >= HAVE_ARGUMENT:
221+
self.prev.append(i)
222+
self.prev.append(i)
223+
pass
224+
pass
225+
226+
def build_lines_data(self, co, n):
227+
"""
228+
Initializes self.lines and self.linesstartoffsets
229+
"""
230+
self.lines = []
231+
linetuple = namedtuple('linetuple', ['l_no', 'next'])
232+
233+
# linestarts is a tuple of (offset, line number).
234+
# Turn that in a has that we can index
235+
linestarts = list(dis.findlinestarts(co))
236+
self.linestartoffsets = {}
237+
for offset, lineno in linestarts:
238+
self.linestartoffsets[offset] = lineno
239+
240+
j = 0
241+
(prev_start_byte, prev_line_no) = linestarts[0]
242+
for (start_byte, line_no) in linestarts[1:]:
243+
while j < start_byte:
244+
self.lines.append(linetuple(prev_line_no, start_byte))
245+
j += 1
246+
prev_line_no = start_byte
247+
while j < n:
248+
self.lines.append(linetuple(prev_line_no, n))
249+
j+=1
250+
return
251+
227252
def build_stmt_indices(self):
228253
code = self.code
229254
start = 0
@@ -585,7 +610,7 @@ def detect_structure(self, pos, op=None):
585610
target = self.get_target(pos, op)
586611
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
587612

588-
def find_jump_targets(self, code):
613+
def find_jump_targets(self):
589614
'''
590615
Detect all offsets in a byte code which are jump targets.
591616
@@ -595,28 +620,30 @@ def find_jump_targets(self, code):
595620
for each target the number of jumps are counted.
596621
'''
597622

598-
n = len(code)
623+
n = len(self.code)
599624
self.structs = [{'type': 'root',
600625
'start': 0,
601626
'end': n-1}]
602627
self.loops = [] # All loop entry points
603628
self.fixed_jumps = {} # Map fixed jumps to their real destination
604629
self.ignore_if = set()
605630
self.build_stmt_indices()
631+
632+
# Containers filled by detect_structure()
606633
self.not_continue = set()
607634
self.return_end_ifs = set()
608635

609636
targets = {}
610637
for i in self.op_range(0, n):
611-
op = code[i]
638+
op = self.code[i]
612639

613640
# Determine structures and fix jumps in Python versions
614641
# since 2.3
615642
self.detect_structure(i, op)
616643

617644
if op >= HAVE_ARGUMENT:
618645
label = self.fixed_jumps.get(i)
619-
oparg = code[i+1] + code[i+2] * 256
646+
oparg = self.code[i+1] + self.code[i+2] * 256
620647
if label is None:
621648
if op in hasjrel and op != FOR_ITER:
622649
label = i + 3 + oparg
@@ -634,7 +661,8 @@ def find_jump_targets(self, code):
634661

635662
if __name__ == "__main__":
636663
co = inspect.currentframe().f_code
637-
tokens, customize = Scanner27().disassemble(co)
664+
from uncompyle6 import PYTHON_VERSION
665+
tokens, customize = Scanner27(PYTHON_VERSION).disassemble(co)
638666
for t in tokens:
639667
print(t)
640668
pass

uncompyle6/scanners/scanner3.py

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ def unmangle(name):
394394
#
395395
# We may however want to consider whether we do
396396
# this in 3.5 or not.
397-
if oparg == 0 and self.version != 3.4:
397+
if oparg == 0 and self.version >= 3.5:
398398
tokens.append(Token('NOP', oparg, pattr, offset, linestart))
399399
continue
400400
elif op_name == 'LOAD_GLOBAL':
@@ -481,23 +481,25 @@ def find_jump_targets(self):
481481
for each target the number of jumps is counted.
482482
"""
483483
code = self.code
484-
codelen = len(code)
484+
n = len(code)
485485
self.structs = [{'type': 'root',
486486
'start': 0,
487-
'end': codelen-1}]
487+
'end': n-1}]
488488

489489
# All loop entry points
490-
# self.loops = []
490+
self.loops = []
491+
491492
# Map fixed jumps to their real destination
492493
self.fixed_jumps = {}
493494
self.ignore_if = set()
494495
self.build_statement_indices()
496+
495497
# Containers filled by detect_structure()
496498
self.not_continue = set()
497499
self.return_end_ifs = set()
498500

499501
targets = {}
500-
for offset in self.op_range(0, codelen):
502+
for offset in self.op_range(0, n):
501503
op = code[offset]
502504

503505
# Determine structures and fix jumps in Python versions
@@ -656,7 +658,67 @@ def detect_structure(self, offset):
656658
end = curent_end
657659
parent = struct
658660

659-
if op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
661+
if op == SETUP_LOOP:
662+
start = offset+3
663+
target = self.get_target(offset)
664+
end = self.restrict_to_parent(target, parent)
665+
666+
if target != end:
667+
self.fixed_jumps[offset] = end
668+
(line_no, next_line_byte) = self.lines[offset]
669+
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE,
670+
next_line_byte, False)
671+
672+
if jump_back and jump_back != self.prev_op[end] and code[jump_back+3] in (JUMP_ABSOLUTE, JUMP_FORWARD):
673+
if code[self.prev_op[end]] == RETURN_VALUE or \
674+
(code[self.prev_op[end]] == POP_BLOCK and code[self.prev_op[self.prev_op[end]]] == RETURN_VALUE):
675+
jump_back = None
676+
if not jump_back: # loop suite ends in return. wtf right?
677+
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
678+
if not jump_back:
679+
return
680+
if code[self.prev_op[next_line_byte]] not in (PJIF, PJIT):
681+
loop_type = 'for'
682+
else:
683+
loop_type = 'while'
684+
self.ignore_if.add(self.prev_op[next_line_byte])
685+
target = next_line_byte
686+
end = jump_back + 3
687+
else:
688+
if self.get_target(jump_back) >= next_line_byte:
689+
jump_back = self.last_instr(start, end, JUMP_ABSOLUTE, start, False)
690+
if end > jump_back+4 and code[end] in (JUMP_FORWARD, JUMP_ABSOLUTE):
691+
if code[jump_back+4] in (JUMP_ABSOLUTE, JUMP_FORWARD):
692+
if self.get_target(jump_back+4) == self.get_target(end):
693+
self.fixed_jumps[offset] = jump_back+4
694+
end = jump_back+4
695+
elif target < offset:
696+
self.fixed_jumps[offset] = jump_back+4
697+
end = jump_back+4
698+
target = self.get_target(jump_back)
699+
700+
if code[target] in (FOR_ITER, GET_ITER):
701+
loop_type = 'for'
702+
else:
703+
loop_type = 'while'
704+
test = self.prev_op[next_line_byte]
705+
if test == offset:
706+
loop_type = 'while 1'
707+
elif self.code[test] in op3.hasjabs+op3.hasjrel:
708+
self.ignore_if.add(test)
709+
test_target = self.get_target(test)
710+
if test_target > (jump_back+3):
711+
jump_back = test_target
712+
self.not_continue.add(jump_back)
713+
self.loops.append(target)
714+
self.structs.append({'type': loop_type + '-loop',
715+
'start': target,
716+
'end': jump_back})
717+
if jump_back+3 != end:
718+
self.structs.append({'type': loop_type + '-else',
719+
'start': jump_back+3,
720+
'end': end})
721+
elif op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
660722
start = offset + self.op_size(op)
661723
target = self.get_target(offset)
662724
rtarget = self.restrict_to_parent(target, parent)

0 commit comments

Comments
 (0)
X Tutup