From 7410ccf808298b80d6da73e235f61a4a68361efc Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 25 Nov 2016 10:42:24 +0000 Subject: [PATCH] Add ConcreteBytecode.{instr_at_code_offset, index_at_code_offset} --- bytecode/concrete.py | 40 +++++++++++++++++++++++++++++++++ bytecode/tests/test_concrete.py | 37 ++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/bytecode/concrete.py b/bytecode/concrete.py index 318703d3..c36ba016 100644 --- a/bytecode/concrete.py +++ b/bytecode/concrete.py @@ -377,6 +377,46 @@ def to_bytecode(self): bytecode.extend(instructions) return bytecode + def index_at_code_offset(self, offset): + """ + Returns the index `i`, for use in slicing, such that: + `self[i:]` is the decoded version of `self.to_code()[offset:]` + `self[:i]` is the decoded version of `self.to_code()[:offset]` + + For getting a single instruction an offset, use instr_at_code_offset + + Raises IndexError if `offset` > len(self.to_code()), or `offset` + lies midway through an instruction. + """ + if offset < 0: + raise IndexError('Offset {} is out of range'.format(offset)) + + at = 0 + for i, instr in enumerate(self): + if offset == at: + return i + elif offset < at: + raise IndexError('Offset {} lies within instruction #{}, {}'.format(offset, i, instr)) + if isinstance(instr, ConcreteInstr): + at += instr.size + + # returning the length of the array is ok + if offset == at: + return i + 1 + + raise IndexError('Offset {} is out of range for code of length {}'.format(offset, at)) + + def instr_at_code_offset(self, offset): + """ + Return the instruction starting at `offset` within `self.to_code()` + """ + i = self.index_at_code_offset(offset) + while i < len(self): + if isinstance(self[i], ConcreteInstr): + return self[i] + i += 1 + raise IndexError('Instruction at {} is out of range'.format(i)) + class _ConvertBytecodeToConcrete: diff --git a/bytecode/tests/test_concrete.py b/bytecode/tests/test_concrete.py index efcae06b..0b7e6d65 100644 --- a/bytecode/tests/test_concrete.py +++ b/bytecode/tests/test_concrete.py @@ -335,6 +335,43 @@ def test_load_classderef(self): self.assertEqual( code.co_code, b'\x94\x01\x89\x01' if WORDCODE else b'\x94\x01\x00\x89\x01\x00') + def test_offset_index(self): + concrete = ConcreteBytecode() + concrete[:] = [ + ConcreteInstr('LOAD_FAST', 0), + ConcreteInstr('LOAD_FAST', 1), + SetLineno(2), + ConcreteInstr('BINARY_ADD'), + ConcreteInstr('RETURN_VALUE') + ] + # simple cases + self.assertEqual(concrete.index_at_code_offset(0), 0) + self.assertEqual(concrete.instr_at_code_offset(0), concrete[0]) + self.assertEqual(concrete.index_at_code_offset(3), 1) + self.assertEqual(concrete.instr_at_code_offset(3), concrete[1]) + self.assertEqual(concrete.index_at_code_offset(7), 4) + self.assertEqual(concrete.instr_at_code_offset(7), concrete[4]) + + # these indices are deliberately different + # the index returns the lower bound, the SetLineno + # the instruction returns the actual instruction + self.assertEqual(concrete.index_at_code_offset(6), 2) + self.assertEqual(concrete.instr_at_code_offset(6), concrete[3]) + + # asking for the index at the end is OK, but not the instruction + self.assertEqual(concrete.index_at_code_offset(8), 5) + self.assertRaisesRegex(IndexError, 'out of range', concrete.instr_at_code_offset, 8) + + # other disallowed things + self.assertRaisesRegex(IndexError, 'within', concrete.instr_at_code_offset, 1) + self.assertRaisesRegex(IndexError, 'within', concrete.instr_at_code_offset, 1) + self.assertRaisesRegex(IndexError, 'within', concrete.index_at_code_offset, 5) + self.assertRaisesRegex(IndexError, 'within', concrete.instr_at_code_offset, 5) + self.assertRaisesRegex(IndexError, 'out of range', concrete.index_at_code_offset, -1) + self.assertRaisesRegex(IndexError, 'out of range', concrete.instr_at_code_offset, -1) + self.assertRaisesRegex(IndexError, 'out of range', concrete.index_at_code_offset, 9) + self.assertRaisesRegex(IndexError, 'out of range', concrete.instr_at_code_offset, 9) + class ConcreteFromCodeTests(TestCase):