-
Notifications
You must be signed in to change notification settings - Fork 0
/
uCInterpreter.py
628 lines (525 loc) · 20.7 KB
/
uCInterpreter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
# ---------------------------------------------------------------------------------
# uc: uc_interpreter.py
#
# uCInterpreter class: A simple interpreter for the uC intermediate representation
# see https://github.com/iviarcio/mc921
#
# Copyright (c) 2019-2020, Marcio M Pereira All rights reserved.
#
# This software is provided by the author, "as is" without any warranties
# Redistribution and use in source form with or without modification are
# permitted but the source code must retain the above copyright notice.
# ---------------------------------------------------------------------------------
'''
Second and Third Projects: Interpreter for uCIR intermediate code based on uC.
uCIR: check SecondProject.ipynb in 'Notebooks' submodule.
Subject:
MC921 - Construction of Compilers
Modified by:
Victor Ferreira Ferrari - RA 187890
Vinicius Couto Espindola - RA 188115
Modifications from source:
Changed class name;
Added generator and test method;
Compatibility change for labels.
University of Campinas - UNICAMP - 2020
Last Modified: 09/07/2020.
'''
import sys
from os.path import exists
class uCIRInterpreter(object):
"""
Runs an interpreter on the uC intermediate code generated for
uC compiler. The implementation idea is as follows. Given
a sequence of instruction tuples such as:
code = [
('literal_int', 1, '%1'),
('literal_int', 2, '%2'),
('add_int', '%1', '%2, '%3')
('print_int', '%3')
...
]
The class executes methods self.run_opcode(args). For example:
self.run_literal_int(1, '%1')
self.run_literal_int(2, '%2')
self.run_add_int('%1', '%2', '%3')
self.run_print_int('%3')
Instructions for use:
1. Instantiate an object of the Interpreter class
2. Call the run method of this object passing the produced
code as a parameter
"""
def __init__(self, generator):
global inputline, M
inputline = []
M = 10000 * [None] # Memory for global & local vars
self.globals = {} # Dictionary of address of global vars & constants
self.vars = {} # Dictionary of address of local vars relative to sp
self.offset = 0 # offset (index) of local & global vars. Note that
# each instance of var has absolute address in Memory
self.stack = [] # Stack to save address of vars between calls
self.sp = [] # Stack to save & restore the last offset
self.params = [] # List of parameters from caller (address)
self.result = None # Result Value (address) from the callee
self.registers = [] # Stack of register names (in the caller) to return value
self.returns = [] # Stack of return addresses (program counters)
self.pc = 0 # Program Counter
self.start = 0 # PC of the main function
self.code = None
self.generator = generator
def test(self, data, quiet=False):
self.generator.front_end.parser.lexer.reset_line_num()
# Scan and parse
if exists(data):
with open(data, 'r') as content_file :
data = content_file.read()
# Generate IR.
self.generator.generate(data)
if not quiet:
self.generator.print_code()
# Run IR code.
self.run(self.generator.code)
def _extract_operation(self, source):
_modifier = {}
_aux = source.split('_')
if _aux[0] not in {'fptosi', 'sitofp', 'label', 'jump', 'cbranch',
'define', 'call'}:
_opcode = _aux[0] + '_' + _aux[1]
for i, _val in enumerate(_aux[2:]):
if _val.isdigit():
_modifier['dim' + str(i)] = _val
elif _val == '*':
_modifier['ptr' + str(i)] = _val
else:
_opcode = _aux[0]
return (_opcode, _modifier)
def _copy_data(self, address, size, value):
if isinstance(value, str):
_value = list(value)
elif any(isinstance(item, list) for item in value):
_value = [item for sublist in value for item in sublist]
else:
_value = value
M[address:address+size] = _value
def run(self, ircode):
"""
Run intermediate code in the interpreter. ircode is a list
of instruction tuples. Each instruction (opcode, *args) is
dispatched to a method self.run_opcode(*args)
"""
# First, store the global vars & constants
# Also, set the start pc to the main function entry
self.code = ircode
self.pc = 0
self.offset = 0
while True:
try:
op = ircode[self.pc]
except IndexError:
break
if len(op) > 1: # that is, not label
opcode, modifier = self._extract_operation(op[0])
if opcode.startswith('global'):
self.globals[op[1]] = self.offset
# get the size of global var
if not modifier:
# size equals 1 or is a constant, so we use only
# one slot in the memory to make it simple.
if len(op) == 3:
M[self.offset] = op[2]
self.offset += 1
else:
_len = 1
for args in modifier.values():
if args.isdigit():
_len *= int(args)
if len(op) == 3:
self._copy_data(self.offset, _len, op[2])
self.offset += _len
elif opcode.startswith('define'):
self.globals[op[1]] = self.offset
M[self.offset] = self.pc
self.offset += 1
if op[1] == '@main':
self.start = self.pc
self.pc += 1
# Now, running the program starting from the main function
self.pc = self.start
while True:
try:
op = ircode[self.pc]
except IndexError:
break
self.pc += 1
if len(op) > 1 or op[0] == 'return_void':
opcode, modifier = self._extract_operation(op[0])
if hasattr(self, "run_" + opcode):
if not modifier:
getattr(self, "run_" + opcode)(*op[1:])
else:
getattr(self, "run_" + opcode + '_')(*op[1:], **modifier)
else:
print("Warning: No run_" + opcode + "() method", flush=True)
#
# Auxiliary methods
#
def _alloc_labels(self):
# Alloc labels for current function definition. Due to the uCIR and due to
# the chosen memory model, this is done every time we enter a function.
_lpc = self.pc
while True:
try:
_op = self.code[_lpc]
_opcode = _op[0]
_lpc += 1
if _opcode.startswith('define'):
break
elif len(_op) == 1 and _opcode != 'return_void':
# labels don't go to memory, just store the pc on dictionary
# labels appears as name:, so we need to extract just the name
self.vars['%' + _opcode] = _lpc
except IndexError:
break
def _alloc_reg(self, target):
# Alloc space in memory and save the offset in the dictionary
# for new vars or temporaries, only.
if target not in self.vars:
self.vars[target] = self.offset
self.offset += 1
def _get_address(self, source):
if source.startswith('@'):
return self.globals[source]
else:
return self.vars[source]
def _get_input(self):
global inputline
while True:
if len(inputline) > 0:
break
inputline = sys.stdin.readline()
if not inputline:
print("Unexpected end of input file.", flush=True)
inputline = inputline[:-1].strip().split()
def _get_value(self, source):
if source.startswith('@'):
return M[self.globals[source]]
else:
return M[self.vars[source]]
def _load_multiple_values(self, size, varname, target):
self.vars[target] = self.offset
self.offset += size
self._store_multiple_values(size, target, varname)
def _push(self, locs):
# save the addresses of the vars from caller & their last offset
self.stack.append(self.vars)
self.sp.append(self.offset)
# clear the dictionary of caller local vars and their offsets in memory
# and copy the parameters passed to the callee in their local vars.
# Finally, cleanup the parameters list used to transfer these vars
self.vars = {}
for idx, val in enumerate(self.params):
# Note that arrays (size >=1) are passed by reference only.
self.vars[locs[idx]] = self.offset
M[self.offset] = M[val]
self.offset += 1
self.params = []
self._alloc_labels()
def _pop(self, target):
if self.returns:
# get the return value
_value = M[target]
# restore the vars of the caller
self.vars = self.stack.pop()
# store in the caller return register the _value
M[self.vars[self.registers.pop()]] = _value
# restore the last offset from the caller
self.offset = self.sp.pop()
# jump to the return point in the caller
self.pc = self.returns.pop()
else:
# We reach the end of main function, so return to system
# with the code returned by main in the return register.
print(end="", flush=True)
if target is None:
# void main () was defined, so exit with value 0
sys.exit(0)
else:
sys.exit(M[target])
def _store_deref(self, target, value):
if target.startswith('@'):
M[M[self.globals[target]]] = value
else:
M[M[self.vars[target]]] = value
def _store_multiple_values(self, dim, target, value):
_left = self._get_address(target)
_right = self._get_address(value)
if value.startswith('@'):
if isinstance(M[_right], str):
_value = list(M[_right])
M[_left:_left+dim] = _value
return
M[_left:_left+dim] = M[_right:_right+dim]
def _store_value(self, target, value):
if target.startswith('@'):
M[self.globals[target]] = value
else:
M[self.vars[target]] = value
#
# Run Operations, except Binary, Relational & Cast
#
def run_alloc_int(self, varname):
self._alloc_reg(varname)
M[self.vars[varname]] = 0
run_alloc_float = run_alloc_int
run_alloc_char = run_alloc_int
def run_alloc_int_(self, varname, **kwargs):
_dim = 1
for arg in kwargs.values():
if arg.isdigit():
_dim *= int(arg)
self.vars[varname] = self.offset
M[self.offset:self.offset + _dim] = _dim * [0]
self.offset += _dim
run_alloc_float_ = run_alloc_int_
run_alloc_char_ = run_alloc_int_
def run_call(self, source, target):
# alloc register to return and append it to register stack
self._alloc_reg(target)
self.registers.append(target)
# save the return pc in the return stack
self.returns.append(self.pc)
# jump to the calle function
if source.startswith('@'):
self.pc = M[self.globals[source]]
else:
self.pc = M[self.vars[source]]
def run_cbranch(self, expr_test, true_target, false_target):
if M[self.vars[expr_test]]:
self.pc = self.vars[true_target]
else:
self.pc = self.vars[false_target]
# Enter the function
def run_define(self, source, args):
if source == '@main':
# alloc register to the return value but not initialize it.
# We use the "None" value to check if main function returns void.
self._alloc_reg('%0')
# alloc the labels with respective pc's
self._alloc_labels()
else:
# extract the location names of function args
_locs = [el[1] for el in args]
self._push(_locs)
def run_elem_int(self, source, index, target):
self._alloc_reg(target)
_aux = self._get_address(source)
_idx = self._get_value(index)
_address = _aux + _idx
self._store_value(target, _address)
run_elem_float = run_elem_int
run_elem_char = run_elem_int
def run_get_int(self, source, target):
# We never generate this code without * (ref) but we need to define it
pass
def run_get_int_(self, source, target, **kwargs):
# kwargs always contain * (ref), so we ignore it.
self._store_value(target, self._get_address(source))
run_get_float_ = run_get_int_
run_get_char_ = run_get_int_
def run_jump(self, target):
self.pc = self.vars[target]
# load literals into registers
def run_literal_int(self, value, target):
self._alloc_reg(target)
M[self.vars[target]] = value
run_literal_float = run_literal_int
run_literal_char = run_literal_int
# Load/stores
def run_load_int(self, varname, target):
self._alloc_reg(target)
M[self.vars[target]] = self._get_value(varname)
run_load_float = run_load_int
run_load_char = run_load_int
run_load_bool = run_load_int
def run_load_int_(self, varname, target, **kwargs):
_ref = 0
_dim = 1
for arg in kwargs.values():
if arg.isdigit():
_dim *= int(arg)
elif arg == '*':
_ref += 1
if _ref == 0:
self._load_multiple_values(_dim, varname, target)
elif _dim == 1 and _ref == 1:
self._alloc_reg(target)
M[self.vars[target]] = M[self._get_value(varname)]
run_load_float_ = run_load_int_
run_load_char_ = run_load_int_
def run_param_int(self, source):
self.params.append(self.vars[source])
run_param_float = run_param_int
run_param_char = run_param_int
def run_print_string(self, source):
_res = list(self._get_value((source)))
for c in _res:
print(c, end="", flush=True)
def run_print_int(self, source):
print(self._get_value(source), end="", flush=True)
run_print_float = run_print_int
run_print_char = run_print_int
run_print_bool = run_print_int
def run_print_void(self):
print(end="\n", flush=True)
def _read_int(self):
global inputline
self._get_input()
try:
v1 = inputline[0]
inputline = inputline[1:]
try:
v2 = int(v1)
except:
v2 = v1
except:
print("Illegal input value.", flush=True)
return v2
def _read_float(self):
global inputline
self._get_input()
try:
v1 = inputline[0]
inputline = inputline[1:]
try:
v2 = float(v1)
except:
v2 = v1
except:
print("Illegal input value.", flush=True)
return v2
def run_read_int(self, source):
_value = self._read_int()
self._store_value(source, _value)
def run_read_int_(self, source, **kwargs):
_value = self._read_int()
self._store_deref(source, _value)
def run_read_float(self, source):
_value = self._read_float()
self._store_value(source, _value)
def run_read_float_(self, source, **kwargs):
_value = self._read_float()
self._store_deref(source, _value)
def run_read_char(self, source):
global inputline
self._get_input()
v1 = inputline[0]
inputline = inputline[1:]
self._alloc_reg(source)
self._store_value(source, v1)
def run_read_char_(self, source, **kwargs):
global inputline
self._get_input()
v1 = inputline[0]
inputline = inputline[1:]
self._store_deref(source, v1)
def run_return_int(self, target):
self._pop(self.vars[target])
run_return_float = run_return_int
run_return_char = run_return_int
def run_return_void(self):
self._pop(M[self.vars['%0']])
def run_store_int(self, source, target):
self._store_value(target, self._get_value(source))
run_store_float = run_store_int
run_store_char = run_store_int
run_store_bool = run_store_int
def run_store_int_(self, source, target, **kwargs):
_ref = 0
_dim = 1
for arg in kwargs.values():
if arg.isdigit():
_dim *= int(arg)
elif arg == '*':
_ref += 1
if _ref == 0:
self._store_multiple_values(_dim, target, source)
elif _dim == 1 and _ref == 1:
self._store_deref(target, self._get_value(source))
run_store_float_ = run_store_int_
run_store_char_ = run_store_int_
#
# perform binary, relational & cast operations
#
def run_add_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] + M[self.vars[right]]
def run_sub_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] - M[self.vars[right]]
def run_mul_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] * M[self.vars[right]]
def run_mod_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] % M[self.vars[right]]
def run_div_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] // M[self.vars[right]]
def run_div_float(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] / M[self.vars[right]]
# Floating point ops (same as int)
run_add_float = run_add_int
run_sub_float = run_sub_int
run_mul_float = run_mul_int
# Integer comparisons
def run_lt_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] < M[self.vars[right]]
def run_le_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] <= M[self.vars[right]]
def run_gt_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] > M[self.vars[right]]
def run_ge_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] >= M[self.vars[right]]
def run_eq_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] == M[self.vars[right]]
def run_ne_int(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] != M[self.vars[right]]
# Float comparisons
run_lt_float = run_lt_int
run_le_float = run_le_int
run_gt_float = run_gt_int
run_ge_float = run_ge_int
run_eq_float = run_eq_int
run_ne_float = run_ne_int
# String comparisons
run_lt_char = run_lt_int
run_le_char = run_le_int
run_gt_char = run_gt_int
run_ge_char = run_ge_int
run_eq_char = run_eq_int
run_ne_char = run_ne_int
# Bool comparisons
run_eq_bool = run_eq_int
run_ne_bool = run_ne_int
def run_and_bool(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] and M[self.vars[right]]
def run_or_bool(self, left, right, target):
self._alloc_reg(target)
M[self.vars[target]] = M[self.vars[left]] or M[self.vars[right]]
def run_not_bool(self, source, target):
self._alloc_reg(target)
M[self.vars[target]] = not self._get_value(source)
def run_sitofp(self, source, target):
self._alloc_reg(target)
M[self.vars[target]] = float(self._get_value(source))
def run_fptosi(self, source, target):
self._alloc_reg(target)
M[self.vars[target]] = int(self._get_value(source))