LR(1) 语法分析 (Python)
作者:互联网
内容
- 生成 LR(1) 规范项集族
- 构造 LR(1) 分析表
- 自动机和分析表可视化
- 基于栈的自顶向上分析器
- 二义性文法处理(本程序中遇冲突直接覆写,可自由调整)
# 项:(prod_id, dot_pos, ahead)
import copy
def closure(prods, ps, first):
ps = ps[:]
for (prod_id, dot_pos, ahead) in ps:
prod = prods[prod_id]
if dot_pos + 1 < len(prod):
cur = prod[dot_pos+1]
# next_symbol = prod[dot_pos+2] if dot_pos+2 < len(prod) else ahead
nss = []
tmpseq = prod[dot_pos+2:] + [ahead]
i = 0
first['$'] = ['$']
while i < len(tmpseq):
for x in first[tmpseq[i]]:
if (x != '$' or tmpseq[i] == '$') and x not in nss:
nss.append(x)
if '$' not in first[tmpseq[i]]:
break
i += 1
for next_symbol in nss:
for ex_prod_id, ex_prod in enumerate(prods):
if ex_prod[0] == cur:
new_item = (ex_prod_id, 0, next_symbol)
if new_item not in ps:
ps.append(new_item)
return ps
def goto(prods, ps, a, first):
ans = []
for (i, j, ah) in ps:
prod = prods[i]
if j + 1 < len(prod):
cur = prod[j + 1]
if cur == a:
new_item = (i, j+1, ah)
if new_item not in ans:
ans.append(new_item)
return closure(prods, ans, first)
def prod2str(prod, dotpos):
prod = prod[:]
prod.append("")
prod[dotpos + 1] = '·' + prod[dotpos + 1]
return prod[0] + '→' + ''.join(prod[1:])
def make_lr1(prods):
table = [{}]
words = list(set(j for i in prods for j in i))
nonterminals = list(set(i[0] for i in prods))
terminals = [i for i in words if i not in nonterminals]
terminals .append('$')
nonterminals.sort()
terminals.sort()
# 预处理 first 集合
first = {}
for i in terminals:
first[i] = [i]
for i in nonterminals:
first[i] = []
while True:
old_first = copy.deepcopy(first)
for prod in prods:
left = prod[0]
right = prod[1:]
i = 0
while i < len(right):
for x in first[right[i]]:
if x != "$" and x not in first[left]:
first[left].append(x)
if "$" not in first[right[i]]:
break
i += 1
if i == len(right):
if "$" not in first[left]:
first[left].append("$")
if old_first == first:
break
I0 = closure(prods, [(0, 0, '$')], first)
cc = [I0]
i = 0
while i < len(cc):
ccc = {}
for j in cc[i]:
if (j[0], j[1]) not in ccc.keys():
ccc[(j[0], j[1])] = j[2]
else:
ccc[(j[0], j[1])] = j[2]
ccc = [[i[0], i[1], j] for i, j in ccc.items()]
print("\033[35m[I%d]:\n \033[31m" % i, '\n \033[31m'.join(prod2str(prods[j[0]], j[1])+' \t\033[33m'+j[2]+'\033[30m'
for j in ccc), end="\n \033[32m")
candiwords = []
for j in cc[i]:
prod = prods[j[0]]
dot_pos = j[1]+1
if dot_pos < len(prod) and prod[dot_pos] not in candiwords:
candiwords.append(prod[dot_pos])
if dot_pos == len(prod):
if prod[0] != "S'":
if j[2] not in table[i].keys():
table[i][j[2]] = 'r%d' % j[0]
else:
table[i][j[2]] = 'r%d' % j[0]
else:
if '$' not in table[i].keys():
table[i]['$'] = 'acc'
else:
table[i]['$'] += 'acc'
for j in candiwords:
tmp = goto(prods, cc[i], j, first)
if len(tmp) > 0 and tmp not in cc:
cc.append(tmp)
table.append({})
if len(tmp) > 0:
print(j, cc.index(tmp), sep=",", end=" ")
if j in terminals:
if j not in table[i].keys():
table[i][j] = "s%d" % cc.index(tmp)
else:
table[i][j] = "s%d" % cc.index(tmp)
else:
table[i][j] = "%d" % cc.index(tmp)
i += 1
print("\033[0m")
head = ['I']
for j in terminals:
head.append(j)
for j in nonterminals:
if j == "S'":
continue
head.append(j)
print('---'.join(''+'-'*(5) for i in head))
print(' | '.join(i+' '*(5-len(i)) for i in head))
print('-+-'.join(''+'-'*(5) for i in head))
for i, line in enumerate(table):
lo = [str(i)]
for j in terminals:
if j in line.keys():
lo.append(table[i][j])
else:
lo.append("")
for j in nonterminals:
if j == "S'":
continue
if j in line.keys():
lo.append(table[i][j])
else:
lo.append("")
print(' | '.join(i+' '*(5-len(i)) for i in lo))
return table
prod_file = open("prod.txt", "r")
prod_lines = prod_file.readlines()
prod_lines = [i[:-1] for i in prod_lines]
prod_lines = [i[:-1] if i[-1] == ' ' else i for i in prod_lines]
prods = [i.split(" ")[:1] + i.split(" ")[2:] for i in prod_lines]
print(prods)
table = make_lr1(prods)
tokens = []
token_file = open("tokens.txt", "r")
token_lines = token_file.readlines()
token_lines = [i[:-1] for i in token_lines]
token_lines = [i[:-1] if i[-1] == ' ' else i for i in token_lines]
token_lines = [i.split(' ') for i in token_lines]
tokens = [i[1] for i in token_lines]
tokens_str = [i[2] for i in token_lines]
tokens.append("$")
tokens_str.append("")
stack_token = ['#']
stack_state = [0]
input_ptr = 0
while len(stack_token) > 0:
cur_token = tokens[input_ptr]
cur_state = stack_state[-1]
if cur_token not in table[cur_state].keys():
print("errorA")
break
action = table[cur_state][cur_token]
if action == "acc":
print("succeed")
break
elif action[0] == 's':
next_state = int(action[1:])
stack_token.append(cur_token)
stack_state.append(next_state)
input_ptr += 1
print("shift", cur_token)
else:
prod_id = int(action[1:])
prod_len = len(prods[prod_id]) - 1
print("reduce", prods[prod_id])
stack_token = stack_token[:-prod_len]
stack_state = stack_state[:-prod_len]
cur_state = stack_state[-1]
goto_nt = prods[prod_id][0]
if goto_nt not in table[cur_state].keys():
print("errorB")
break
next_state = int(table[cur_state][goto_nt])
stack_token.append(goto_nt)
stack_state.append(next_state)
文法示例
S' -> L
L -> L S
L -> S
S -> S0 ;
S -> { L }
S0 -> Sa
Sa -> Id = E
S -> IfC S
S -> IfCEl S
IfCEl -> IfC S else
IfC -> if ( C )
C -> C1
C -> Ca C1
Ca -> C &&
C1 -> C2
C1 -> Co C2
Co -> C1 ||
C2 -> C3
C2 -> ! C3
C3 -> ( C )
C -> E Rop E
Rop -> ==
Rop -> !=
Rop -> <
Rop -> <=
Rop -> >
Rop -> >=
E -> E + Et
E -> E - Et
E -> Et
Et -> Et * Ef
Et -> Et / Ef
Et -> Et % Ef
Et -> Ef
Ef -> ( E )
Ef -> Id
E -> Const
项集族输出示例
分析表输出示例
分析输入示例
if (1 == 2) if (3 == 4) a = 1; else a = 2; else a = 3;
分析输出示例
标签:语法分析,Python,prods,token,LR,table,prod,append,first 来源: https://www.cnblogs.com/mollnn/p/15785388.html