编译原理-词法分析 python实现
作者:互联网
上学的时候,老师让写实验。我看到这破旧的vc++6.0,心生厌恶,于是申请用python写。
老师同意了,那么就有了我的代码。
我分享出来,希望供大家参考。
词法分析的python代码参考了C语言版本的代码 : https://www.cnblogs.com/zyrblog/p/6885922.html
# 参考C语言代码 : https://www.cnblogs.com/zyrblog/p/6885922.html
import re
import copy
class Scanner(object):
# 保留字 -- 1
reserveWord = [
"auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
"register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"
]
# 标识符 -- 2
# 无符号整形数 -- 3
# 运算符 -- 4
operator = [
"+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
"!=","&","&&","|","||","%","<<",">>","+="
]
# 分隔符 -- 5
Delimiter = [
";", "(", ")", "^", ",", "\"", "\'","[","]","{","}"
]
# 错误符 -- 6
# ------------------------------------------------------------------------------------------------------------- #
# 判断是否为保留字 -- 1
def searchReserve(self, reserveWord):
if reserveWord in self.reserveWord:
return True
else:
return False # 表示不是保留字,是标识符
pass
# 判断是否为字母
def IsLetter(self, letter):
if re.match(r'[a-zA-Z\_]', letter): # 正则表达式
return True
else:
return False
# 判断是否为数字
def IsDigit(self, digit):
if re.match(r'\d', digit):
return True
else:
return False
pass
# 判断是否为运算符或者分隔符
def IsSign(self, Sign):
if Sign in self.Delimiter:
return 5
elif Sign in self.operator:
return 4
else:
return -1
# 过滤器,过滤掉注释
def filterResource(self, code):
note = 0
code_temp=[]
for line in range(len(code)):
a = ''
s_line = code[line]
i = -1
while i < len(s_line)-1:
i = i + 1
if i<=len(s_line)-2 and s_line[i]=='/' and s_line[i+1]=='/' and note == 0:
break # 跳过单行注释
if i<=len(s_line)-2 and s_line[i]=='/' and s_line[i+1]=='*':
note = 1
continue # 注释开始
if i<=len(s_line)-2 and s_line[i]=='*' and s_line[i+1]=='/':
note = 0
i = i + 2
continue # 注释结束
# 跳过无用符号
if note == 0 and s_line[i]!='\t' and s_line[i]!='\n' and s_line[i]!='\v' and s_line[i]!='\r':
a = a + s_line[i]
# print(a)
if a != '':
code_temp.append(a)
code = copy.deepcopy(code_temp)
return code
def clear_number(number_list):
number_list[0] = number_list[1] = 0
pass
def clear_sign(sign_list):
sign_list[0] = ''
sign_list[1] = 0
def clear_word(word):
word = ''
# 主程序
# 读取文件
scanner = Scanner()
code = []
with open('D:/test.txt', 'r',encoding='UTF-8') as f: # 读取
for line in f.readlines():
code.append(line.strip())
# 代码过滤
code = scanner.filterResource(code)
# print(code)
# 代码识别:
# 字符分为符号和非符号,非符号之间用空格隔开,符号和非符号之间不需要隔开
# 利用空格或者符号进行识别
for line in code:
word = ''
number = 0
number_e = 0
sign = ''
sign_e = 0
number_list = [number, number_e]
sign_list = [sign, sign_e]
number_or_word = 0
number_plus = 1
i = -1
while i < len(line)-1:
i=i+1
bit = line[i]
number_or_word = 0
# (识别符号)符号打头,顺便去除符号-前面-的字母或者数字
if scanner.IsSign(bit) > 0 and sign_list[1] > 0 and scanner.IsSign(sign_list[0]+bit) < 0:
# 如果两个连续的符号不是符号,输出第一个符号,继续
print('(', sign_list[1], ',"', sign_list[0], '")')
clear_sign(sign_list)
if scanner.IsSign(bit) > 0: # 把符号保存
sign_list[0] = sign_list[0] + bit
sign_list[1] = scanner.IsSign(bit)
if scanner.IsSign(bit) > 0 and word != '':
# 字母+符号,输出字母
if scanner.searchReserve(word) == True:
print('(', 1, ',"', word, '")')
else:
print('(', 2, ',"', word, '")')
word = ''
number_or_word=0
# number_plus = 1
elif scanner.IsSign(bit) > 0 and number_list[1] != 0:
# 数字+符号,输出数字
print('(', 3, ',"', number_plus*number_list[0], '")')
clear_number(number_list)
number_or_word = 0
number_plus = 1
#字母打头(识别单词)
if scanner.IsLetter(bit) and sign_list[1] > 0:
# 符号 + 字母,识别符号,继续字母
print('(', sign_list[1], ',"', sign_list[0], '")')
clear_sign(sign_list)
if scanner.IsLetter(bit) and number_list[1]==0: # 遇见字母
word = word + bit
number_or_word = 1 # 标识前一个字符是单词
continue
elif word != '' and scanner.IsDigit(bit): #字母加数字
word = word + str(bit)
continue
elif word != '' and bit == ' ': # 字母加空格
if scanner.searchReserve(word) == True:
print('(', 1, ',"', word, '")')
else:
print('(', 2, ',"', word, '")')
word = ''
continue
# 数字打头(识别数字)
if scanner.IsDigit(bit)==True and sign_list[1] > 0:
# 符号 + 数字,识别符号,继续数字
# 在这里识别正负号!!
if sign_list[0] not in ['+', '-']: # 如果符号不是正负号
print('(', sign_list[1], ',"', sign_list[0], '")')
clear_sign(sign_list)
elif number_or_word > 0:# 如果前面存在数字或者单词,那么这个符号就是运算符
pass
elif number_or_word == 0:#符号是正负号,纳入数字
number_plus = int(sign_list[0]+'1 ')
clear_sign(sign_list)
# number_or_word = 1
if scanner.IsDigit(bit)==True and word == '': # 数字打头
number_list[1] = 1
number_list[0] = number_list[0] * 10 + int(bit)
number_or_word = 1 # 标识前一个字符是数字
elif number_list[1] == 1 and scanner.IsLetter(bit):
# 说明是数字 + 字母 ,是错误的
print('Error : (', 6, ',"', str(number_list[0]) + bit, '")')
clear_number(number_list)
clear_sign(sign_list)
word = ''
elif number_list[1] == 1 and bit == ' ':
# 遇到空格,数字识别成功,输出数字
print('(', 3, ',"', number_plus*number_list[0], '")')
number_plus = 1
clear_number(number_list)
if bit == ' ' and sign_list[1] != 0:
# 符号加空格,输出符号,清空标志,继续
print('(', sign_list[1], ',"', sign_list[0], '")')
clear_sign(sign_list)
if i == len(line) - 1:
print('(', sign_list[1], ',"', sign_list[0], '")')
标签:word,scanner,python,list,number,sign,词法,编译,bit 来源: https://www.cnblogs.com/amtop/p/16436985.html