Skip to content

Commit e5c36d1

Browse files
committed
polishing
1 parent d1b07aa commit e5c36d1

File tree

7 files changed

+81
-94
lines changed

7 files changed

+81
-94
lines changed

analyzer.py

Lines changed: 26 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,75 @@
11
from syntree import *
22
from symtable import *
33

4-
class LabelFactory: # this is a suffix to add to all function names
5-
counter = 0 # in particular, it is useful for function overloading
6-
@staticmethod # it is also useful for different goto labels (loops, conditional statements etc) in assembly code
7-
def new_label():
8-
LabelFactory.counter += 1
9-
return "uniqstr%d" % LabelFactory.counter
10-
11-
def build_symtable(ast):
4+
def decorate(ast):
125
if not isinstance(ast, Function) or ast.name != 'main' or ast.deco['type'] != Type.VOID or len(ast.args)>0:
136
raise Exception('Cannot find a valid entry point')
147
symtable = SymbolTable()
158
symtable.add_fun(ast.name, [], ast.deco)
16-
ast.deco['label'] = ast.name + '_' + LabelFactory.new_label() # unique label
17-
ast.deco['strings'] = [] # collection of constant strings from the program
9+
ast.deco['strings'] = set() # collection of constant strings from the program
10+
process_scope(ast, symtable)
1811
process_scope(ast, symtable)
1912
ast.deco['scope_cnt'] = symtable.scope_cnt # total number of functions, necessary for the static scope display table allocation
2013

2114
def process_scope(fun, symtable):
22-
fun.deco['local'] = [] # set of local variable names: len*4 is the memory necessary on the stack, the names are here to be put in comments
2315
symtable.push_scope(fun.deco)
2416
for v in fun.args: # process function arguments
25-
symtable.add_var(*v)
17+
symtable.add_var(v.name, v.deco)
2618
for v in fun.var: # process local variables
27-
symtable.add_var(*v)
28-
fun.deco['local'].append(v[0])
19+
symtable.add_var(v.name, v.deco)
2920
for f in fun.fun: # process nested functions: first add function symbols to the table
30-
symtable.add_fun(f.name, [d['type'] for v,d in f.args], f.deco)
31-
f.deco['label'] = f.name + '_' + LabelFactory.new_label() # still need unique labels
21+
symtable.add_fun(f.name, [v.deco['type'] for v in f.args], f.deco)
3222
for f in fun.fun: # then process nested function bodies
3323
process_scope(f, symtable)
3424
for s in fun.body: # process the list of statements
35-
process_stat(s, symtable)
25+
process_instruction(s, symtable)
3626
symtable.pop_scope()
3727

38-
def process_stat(n, symtable): # process "statement" syntax tree nodes
28+
def process_instruction(n, symtable):
3929
match n:
4030
case Print(): # no type checking is necessary
41-
process_expr(n.expr, symtable)
31+
process_instruction(n.expr, symtable)
4232
case Return():
43-
if n.expr is None: return
44-
process_expr(n.expr, symtable)
33+
if n.expr is None: return # TODO semantic check for return; in non-void functions
34+
process_instruction(n.expr, symtable)
4535
if symtable.ret_stack[-1]['type'] != n.expr.deco['type']:
4636
raise Exception('Incompatible types in return statement, line %s', n.deco['lineno'])
4737
case Assign():
48-
process_expr(n.expr, symtable)
49-
deco = symtable.find_var(n.name)
50-
n.deco |= { 'scope':deco['scope'], 'offset':deco['offset'], 'type':deco['type'] }
38+
process_instruction(n.expr, symtable)
39+
n.deco |= symtable.find_var(n.name)
5140
if n.deco['type'] != n.expr.deco['type']:
5241
raise Exception('Incompatible types in assignment statement, line %s', n.deco['lineno'])
53-
case FunCall(): # no type checking is necessary
54-
process_expr(n, symtable)
5542
case While():
56-
process_expr(n.expr, symtable)
43+
process_instruction(n.expr, symtable)
5744
if n.expr.deco['type'] != Type.BOOL:
5845
raise Exception('Non-boolean expression in while statement, line %s', n.deco['lineno'])
5946
for s in n.body:
60-
process_stat(s, symtable)
47+
process_instruction(s, symtable)
6148
case IfThenElse():
62-
process_expr(n.expr, symtable)
49+
process_instruction(n.expr, symtable)
6350
if n.expr.deco['type'] != Type.BOOL:
6451
raise Exception('Non-boolean expression in if statement, line %s', n.deco['lineno'])
6552
for s in n.ibody + n.ebody:
66-
process_stat(s, symtable)
67-
case other: raise Exception('Unknown statement type')
68-
69-
def process_expr(n, symtable): # process "expression" syntax tree nodes
70-
match n:
53+
process_instruction(s, symtable)
7154
case ArithOp():
72-
process_expr(n.left, symtable)
73-
process_expr(n.right, symtable)
55+
process_instruction(n.left, symtable)
56+
process_instruction(n.right, symtable)
7457
if n.left.deco['type'] != Type.INT or n.right.deco['type'] != Type.INT:
7558
raise Exception('Arithmetic operation over non-integer type in line %s', n.deco['lineno'])
7659
case LogicOp():
77-
process_expr(n.left, symtable)
78-
process_expr(n.right, symtable)
60+
process_instruction(n.left, symtable)
61+
process_instruction(n.right, symtable)
7962
if (n.left.deco['type'] != n.right.deco['type']) or \
8063
(n.op in ['<=', '<', '>=', '>'] and n.left.deco['type'] != Type.INT) or \
8164
(n.op in ['&&', '||'] and n.left.deco['type'] != Type.BOOL):
8265
raise Exception('Boolean operation over incompatible types in line %s', n.deco['lineno'])
8366
case Var(): # no type checking is necessary
84-
deco = symtable.find_var(n.name)
85-
n.deco |= { 'scope':deco['scope'], 'offset':deco['offset'], 'type':deco['type'] }
67+
n.deco |= symtable.find_var(n.name)
8668
case FunCall():
8769
for s in n.args:
88-
process_expr(s, symtable)
89-
deco = symtable.find_fun(n.name, [a.deco['type'] for a in n.args])
90-
n.deco['fundeco'] = deco # save the function symbol, useful for overloading and for stack preparation
91-
n.deco['type'] = deco['type']
70+
process_instruction(s, symtable)
71+
n.deco |= symtable.find_fun(n.name, [ a.deco['type'] for a in n.args ])
9272
case String(): # no type checking is necessary
93-
n.deco['label'] = LabelFactory.new_label() # unique label for assembly code
94-
symtable.ret_stack[1]['strings'].append((n.deco['label'], n.value))
73+
symtable.ret_stack[1]['strings'].add((n.deco['label'], n.value))
9574
case Integer() | Boolean(): pass # no type checking is necessary
96-
case other: raise Exception('Unknown expression type', n)
75+
case other: raise Exception('Unknown instruction', n)

compiler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
import io, sys
22
from lexer import WendLexer
33
from parser import WendParser
4-
from analyzer import *
5-
from transasm import *
4+
from analyzer import decorate
5+
from transasm import transasm
66

77
if len(sys.argv)!=2:
88
sys.exit('Usage: compiler.py path/source.wend')
99
try:
1010
f = open(sys.argv[1], 'r')
1111
tokens = WendLexer().tokenize(f.read())
1212
ast = WendParser().parse(tokens)
13-
build_symtable(ast)
13+
decorate(ast)
1414
print(transasm(ast))
1515
except Exception as e:
1616
print(e)

lexer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def tokenize(self, text):
5555
if sym1 != '"' or accum and accum[-1]=='\\': # if not quote mark (or if escaped quote mark),
5656
accum += sym1 # continue the scan
5757
else:
58-
yield Token('STRING', accum, lineno) # otherwise emit the token
58+
yield Token('STRING', accum, lineno) # otherwise emit the token
5959
state, accum = 0, '' # start new scan
6060
if sym1 == '\n':
6161
lineno += 1

parser.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def __eq__(self, other):
1919
class WendParser: # the grammar is a list of triplets [nonterminal, production rule, AST node constructor]
2020
grammar = [['fun', ['fun_type', 'ID', 'LPAREN', 'param_list', 'RPAREN', 'BEGIN', 'var_list', 'fun_list', 'statement_list', 'END'],
2121
lambda p: Function(p[1].value, p[3], p[6], p[7], p[8], {'type':p[0], 'lineno':p[1].lineno})],
22-
['var', ['TYPE', 'ID'], lambda p: (p[1].value, {'type':Type.INT if p[0].value=='int' else Type.BOOL, 'lineno':p[0].lineno})],
22+
['var', ['TYPE', 'ID'], lambda p: Var(p[1].value, {'type':Type.INT if p[0].value=='int' else Type.BOOL, 'lineno':p[0].lineno})],
2323
['param_list', ['var'], lambda p: p],
2424
['param_list', [], lambda p: p],
2525
['param_list', ['param_list', 'COMMA', 'var'], lambda p: p[0] + [ p[2] ]],
@@ -49,7 +49,7 @@ class WendParser: # the grammar is a list of triplets [nonterminal, production r
4949
['conjunction', ['literal'], lambda p: p[0]],
5050
['conjunction', ['conjunction', 'AND', 'literal'], lambda p: LogicOp(p[1].value, p[0], p[2], {'lineno':p[1].lineno})],
5151
['literal', ['comparand'], lambda p: p[0]],
52-
['literal', ['NOT', 'comparand'], lambda p: LogicOp('==', Boolean(False), p[1], {'lineno':p[0].lineno})],
52+
['literal', ['NOT', 'comparand'], lambda p: LogicOp('==', Boolean(False, {}), p[1], {'lineno':p[0].lineno})],
5353
['comparand', ['addend'], lambda p: p[0]],
5454
['comparand', ['addend', 'COMP', 'addend'], lambda p: LogicOp(p[1].value, p[0], p[2], {'lineno':p[1].lineno})],
5555
['addend', ['term'], lambda p: p[0]],
@@ -61,7 +61,7 @@ class WendParser: # the grammar is a list of triplets [nonterminal, production r
6161
['term', ['term', 'TIMES', 'factor'], lambda p: ArithOp(p[1].value, p[0], p[2], {'lineno':p[1].lineno})],
6262
['factor', ['atom'], lambda p: p[0]],
6363
['factor', ['PLUS', 'atom'], lambda p: p[1]],
64-
['factor', ['MINUS', 'atom'], lambda p: ArithOp('-', Integer(0), p[1], {'lineno':p[0].lineno})],
64+
['factor', ['MINUS', 'atom'], lambda p: ArithOp('-', Integer(0, {}), p[1], {'lineno':p[0].lineno})],
6565
['atom', ['BOOLEAN'], lambda p: Boolean(p[0].value=='true', {'lineno':p[0].lineno})],
6666
['atom', ['INTEGER'], lambda p: Integer(int(p[0].value), {'lineno':p[0].lineno})],
6767
['atom', ['ID', 'LPAREN', 'arg_list', 'RPAREN'], lambda p: FunCall(p[0].value, p[2], {'lineno':p[0].lineno})],

symtable.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ def __init__(self):
44
self.functions = [{}] # stack of function symbol tables
55
self.ret_stack = [ None ] # stack of enclosing function symbols, useful for return statements
66
self.scope_cnt = 0 # global scope counter for the display table allocation
7-
self.var_cnt = 0 # per scope variable counter, serves as an id in a stack frame
87

98
def add_fun(self, name, argtypes, deco): # a function can be identified by its name and a list of argument types, e.g.
109
signature = (name, *argtypes) # fun foo(x:bool, y:int) : int {...} has ('foo',Type.BOOL,Type.INT) signature
@@ -18,15 +17,15 @@ def add_var(self, name, deco):
1817
if name in self.variables[-1]:
1918
raise Exception('Double declaration of the variable %s' % name)
2019
self.variables[-1][name] = deco
21-
deco['scope'] = self.ret_stack[-1]['scope'] # pointer to the display entry
22-
deco['offset'] = self.var_cnt # id of the variable in the corresponding stack frame
23-
self.var_cnt += 1
20+
deco['scope'] = self.ret_stack[-1]['scope'] # pointer to the display entry
21+
deco['offset'] = self.ret_stack[-1]['var_cnt'] # id of the variable in the corresponding stack frame
22+
self.ret_stack[-1]['var_cnt'] += 1
2423

2524
def push_scope(self, deco):
2625
self.variables.append({})
2726
self.functions.append({})
2827
self.ret_stack.append(deco)
29-
self.var_cnt = 0 # reset the per scope variable counter
28+
deco['var_cnt'] = 0 # reset the per scope variable counter
3029

3130
def pop_scope(self):
3231
self.variables.pop()

syntree.py

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,73 @@
1+
class LabelFactory: # this is a suffix to add to all function names
2+
counter = 0 # in particular, it is useful for function overloading
3+
@staticmethod # it is also useful for different goto labels (loops, conditional statements etc) in assembly code
4+
def cur_label():
5+
return "uniqstr%d" % LabelFactory.counter
6+
def new_label():
7+
LabelFactory.counter += 1
8+
return "uniqstr%d" % LabelFactory.counter
9+
110
class Type:
211
VOID = 0
312
INT = 1
413
BOOL = 2
514
STRING = 3
615

716
class Function:
8-
def __init__(self, name, args, var, fun, body, deco=None):
9-
self.name = name # function name, string
10-
self.args = args # function arguments, list of tuples (name, type)
11-
self.var = var # local variables, list of tuples (name, type)
12-
self.fun = fun # nested functions, list of Function nodes
13-
self.body = body # function body, list of statement nodes (Print/Return/Assign/While/IfThenElse/FunCall)
14-
self.deco = deco or {} # decoration dictionary to be filled by the parser (line number) and by the semantic analyzer (return type, scope id etc)
17+
def __init__(self, name, args, var, fun, body, deco):
18+
self.name = name # function name, string
19+
self.args = args # function arguments, list of tuples (name, type)
20+
self.var = var # local variables, list of tuples (name, type)
21+
self.fun = fun # nested functions, list of Function nodes
22+
self.body = body # function body, list of statement nodes (Print/Return/Assign/While/IfThenElse/FunCall)
23+
self.deco = deco | {'label' : name+'_'+LabelFactory.new_label()} # decoration dictionary to be filled by the parser (line number) and by the semantic analyzer (return type, scope id etc)
1524

1625
# statements
1726
class Print:
18-
def __init__(self, expr, newline, deco=None):
19-
self.expr, self.newline, self.deco = expr, newline, deco or {}
27+
def __init__(self, expr, newline, deco):
28+
self.expr, self.newline, self.deco = expr, newline, deco
2029

2130
class Return:
22-
def __init__(self, expr, deco=None):
23-
self.expr, self.deco = expr, deco or {}
31+
def __init__(self, expr, deco):
32+
self.expr, self.deco = expr, deco
2433

2534
class Assign:
26-
def __init__(self, name, expr, deco=None):
27-
self.name, self.expr, self.deco = name, expr, deco or {}
35+
def __init__(self, name, expr, deco):
36+
self.name, self.expr, self.deco = name, expr, deco
2837

2938
class While:
30-
def __init__(self, expr, body, deco=None):
31-
self.expr, self.body, self.deco = expr, body, deco or {}
39+
def __init__(self, expr, body, deco):
40+
self.expr, self.body, self.deco = expr, body, deco
3241

3342
class IfThenElse:
34-
def __init__(self, expr, ibody, ebody, deco=None):
35-
self.expr, self.ibody, self.ebody, self.deco = expr, ibody, ebody, deco or {}
43+
def __init__(self, expr, ibody, ebody, deco):
44+
self.expr, self.ibody, self.ebody, self.deco = expr, ibody, ebody, deco
3645

3746
# expressions
3847
class ArithOp:
39-
def __init__(self, op, left, right, deco=None):
40-
self.op, self.left, self.right, self.deco = op, left, right, (deco or {}) | {'type':Type.INT}
48+
def __init__(self, op, left, right, deco):
49+
self.op, self.left, self.right, self.deco = op, left, right, deco | {'type' : Type.INT}
4150

4251
class LogicOp:
43-
def __init__(self, op, left, right, deco=None):
44-
self.op, self.left, self.right, self.deco = op, left, right, (deco or {}) | {'type':Type.BOOL}
52+
def __init__(self, op, left, right, deco):
53+
self.op, self.left, self.right, self.deco = op, left, right, deco | {'type' : Type.BOOL}
4554

4655
class Integer:
47-
def __init__(self, value, deco=None):
48-
self.value, self.deco = value, (deco or {}) | {'type':Type.INT}
56+
def __init__(self, value, deco):
57+
self.value, self.deco = value, deco | {'type' : Type.INT}
4958

5059
class Boolean:
51-
def __init__(self, value, deco=None):
52-
self.value, self.deco = value, (deco or {}) | {'type':Type.BOOL}
60+
def __init__(self, value, deco):
61+
self.value, self.deco = value, deco | {'type' : Type.BOOL}
5362

5463
class String:
55-
def __init__(self, value, deco=None):
56-
self.value, self.deco = value, (deco or {}) | {'type':Type.STRING}
64+
def __init__(self, value, deco):
65+
self.value, self.deco = value, deco | {'type' : Type.STRING, 'label' : LabelFactory.new_label() }
5766

5867
class Var:
59-
def __init__(self, name, deco=None):
60-
self.name, self.deco = name, deco or {}
68+
def __init__(self, name, deco):
69+
self.name, self.deco = name, deco
6170

6271
class FunCall: # depending on the context, a function call can be a statement or an expression
63-
def __init__(self, name, args, deco=None):
64-
self.name, self.args, self.deco = name, args, deco or {}
72+
def __init__(self, name, args, deco):
73+
self.name, self.args, self.deco = name, args, deco

transasm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ def expr(n): # convention: all expressions save their results to eax
7070
return templates['var'].format(scope = n.deco['scope']*4, variable = n.deco['offset']*4)
7171
case FunCall():
7272
return templates['funcall'].format(allocargs = ''.join(['%s\tpushl %%eax\n' % expr(a) for a in n.args]),
73-
varsize = len(n.deco['fundeco']['local'])*4,
74-
disphead = len(n.deco['fundeco']['local'])*4 + len(n.args)*4 - 4,
75-
scope = n.deco['fundeco']['scope']*4,
76-
funlabel = n.deco['fundeco']['label'])
73+
varsize = n.deco['var_cnt']*4,
74+
disphead = n.deco['var_cnt']*4 + len(n.args)*4 - 4,
75+
scope = n.deco['scope']*4,
76+
funlabel = n.deco['label'])
7777
case other: raise Exception('Unknown expression type', n)

0 commit comments

Comments
 (0)