-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
176 lines (129 loc) · 5.04 KB
/
parser.py
File metadata and controls
176 lines (129 loc) · 5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import sys
from isa import Command, Opcodes, Registers, max_int_22, max_int_32, min_int_22, min_int_32, write_code
from tokenizer import TokenNames, TokenTypes, tokenize
class UnknownTypeOfCommandError(Exception):
pass
def is_register(token):
return isinstance(token.type.enum, Registers)
def is_number(token):
return token.type == TokenTypes[TokenNames.NUMBER]
def is_label(token):
return token.type == TokenTypes[TokenNames.IDENT]
def parse_second_reg(operand, command):
if is_register(operand):
command.r2 = operand.type.enum
if is_number(operand):
command.r2 = Registers.CR
number = int(operand.text)
assert min_int_22 <= number <= max_int_22, f"Number {number} is too large for 22 bits."
command.data = number
if is_label(operand):
command.r2 = Registers.CR
command.label = operand.text
class Parser:
def __init__(self, tokens):
self.tokens = tokens
self.labels = {}
self.code = []
def on_top(self, *type_names):
if len(self.tokens) < len(type_names):
return False
for i in range(len(type_names)):
if self.tokens[i].type != TokenTypes[type_names[i]]:
return False
return True
def on_top_operation(self, operands):
token_type = self.tokens[0].type
return isinstance(token_type.enum, Opcodes) and token_type.operands == operands and len(self.tokens) >= operands
def parse_code(self):
while len(self.tokens) > 0:
self.parse_code_line()
self.fill_label_addr()
return self.code
def parse_code_line(self):
pc = len(self.code)
# <label>: ...
if self.on_top(TokenNames.IDENT, TokenNames.COLON):
label = self.tokens.pop(0).text
assert label not in self.labels, f"Redefinition of label: {label}"
self.labels[label] = pc
self.tokens.pop(0) # :
return
# number mem alloc
if self.on_top(TokenNames.NUMBER):
token = self.tokens.pop(0)
number = int(token.text)
assert min_int_32 <= number <= max_int_32, f"Number {number} is too large for 32 bits."
self.code.append(Command(data=number))
return
# string mem alloc
if self.on_top(TokenNames.STRING):
token = self.tokens.pop(0)
length = len(token.text) - 2
self.code.append(Command(data=length))
for i in range(1, length + 1):
self.code.append(Command(data=ord(token.text[i])))
return
# alloc array
if self.on_top(TokenNames.ALLOC):
token = self.tokens.pop(0)
length = int(token.text[1:-1])
for i in range(length):
self.code.append(Command(data=0))
return
# op <...> <...>
if not self.try_parse_operation_command():
raise UnknownTypeOfCommandError
def try_parse_operation_command(self):
# op
if self.on_top_operation(0):
self.parse_void_operation()
return True
# op ...
if self.on_top_operation(1):
self.parse_unary_operation()
return True
# op ... ...
if self.on_top_operation(2):
self.parse_bin_operation()
return True
return False
def parse_void_operation(self):
token_type = self.tokens.pop(0).type
command = Command(token_type.enum)
self.code.append(command)
def parse_unary_operation(self):
operation_type = self.tokens.pop(0).type
operand = self.tokens.pop(0)
command = Command(operation_type.enum)
parse_second_reg(operand, command)
if operation_type.enum == Opcodes.POP:
assert command.r2 != Registers.CR, "Can pop only to register."
self.code.append(command)
def parse_bin_operation(self):
operation_type = self.tokens.pop(0).type
first = self.tokens.pop(0)
assert is_register(first), "First operand should be a register."
second = self.tokens.pop(0)
command = Command(operation_type.enum)
command.r1 = first.type.enum
parse_second_reg(second, command)
self.code.append(command)
def fill_label_addr(self):
for command in self.code:
if command.label is not None:
assert command.r2 == Registers.CR, "Use label with CR."
assert command.label in self.labels, f"Unknown label '{command.label}'"
command.data = self.labels[command.label]
def main(source, target):
with open(source, encoding="utf-8") as f:
source = f.read()
tokens = tokenize(source)
parser = Parser(tokens)
code = parser.parse_code()
write_code(target, code)
print("source LoC:", len(source.split("\n")), "code instr:", len(code))
if __name__ == "__main__":
assert len(sys.argv) == 3, "Wrong arguments: parser.py <input_file> <target_file>"
_, source, target = sys.argv
main(source, target)