编程语言
首页 > 编程语言> > 语法分析器python版本

语法分析器python版本

作者:互联网

可以直接去我的gitee空间下载代码,参考博客理论理解

1、题目:

在这里插入图片描述在这里插入图片描述
在这里插入图片描述

1.2、理论

在这里插入图片描述
在这里插入图片描述在这里插入图片描述
请添加图片描述

请添加图片描述
最终实现的结果:
在这里插入图片描述

2、代码

实现间接左递归、直接左递归、消除回溯以及题目要求信息
文件结构如下图:
Stack是自己实现栈的结构,Grammar是定义文法,Parser是转换,连接Grammar和Main
Main是程序入口
在这里插入图片描述

2.1、Stack.py

class Stack:  # 栈的实现
    def __init__(self):
        self.items = []

    def push(self, item):
        self.items.append(item)

    def pop(self):
        if self.size() > 0:
            return self.items.pop()
        else:
            return None

    def peek(self):
        if len(self.items) > 0:
            return self.items[len(self.items) - 1]
        else:
            return None

    def size(self):
        return len(self.items)

2.2、Grammar.py

from pandas import DataFrame
from copy import deepcopy


class Grammar:
    """
    对消除左递归和回溯的产生式进行语法分析,得到对应的LL(1)分析表:DataFrame类型
    """
    Vn = []  # 非终结符集
    Vt = []  # 终结符集
    S = ''  # 文法开始符
    M = DataFrame()  # 分析表
    FIRST = {}  # First集 {char:[]}
    FOLLOW = {}  # Follow集 {char:[]}
    Production = {}  # 产生式 {char:[]}

    def __init__(self, filename) -> '输入原始的产生式':
        self.Vn = []  # 非终结符集
        self.Vt = []  # 终结符集
        self.M = DataFrame()  # 分析表
        self.FIRST = {}  # First集
        self.FOLLOW = {}  # Follow集
        self.S = ''  # 文法开始符
        #  手工将测试的表达式写入文本文件,每个表达式写一行,用";"表示结束
        #  读入文本文件中的表达式;
        is_SetHead = True
        with open(filename, 'r', encoding="utf-8") as f:
            for lineStr in f:
                # 清除无效字符
                lineStr = lineStr.replace(' ', '')
                lineStr = lineStr.replace('\t', '')
                lineStr = lineStr.replace('\r', '')
                lineStr = lineStr.replace('\n', '')
                if is_SetHead and len(lineStr) > 0:
                    is_SetHead = False
                    self.S = lineStr[:lineStr.index('->')]  # 设置文法开始符
                if lineStr != '':
                    self.__saveInfoFromFile(lineStr)  # 保存未处理的产生式Production、非终结符集Vn、终结符集Vt
        self.__modifyProduction()
        self.__saveProduction2File()

    def __saveInfoFromFile(self, line) -> '预处理后的line保存产生式Production,非终结符集Vn,终结符集Vt':
        if line.find("->") == -1:
            raise Exception('语法错误')
        # 获取产生式左侧
        key = line[:line.index('->')]
        if key == '':
            raise Exception('语法错误')
        self.Vn.append(key)  # 保存终结符集Vt

        tmpString = line[line.index('>') + 1:]
        if tmpString == '':
            raise Exception('语法错误')

        leavesList = tmpString.strip(';').split('|')

        self.Production[key] = deepcopy(leavesList)  # 保存产生式Production
        for i in range(len(tmpString)):  # 遍历产生式右侧
            ch = tmpString[i]
            if not ch.isupper() and ch not in ['|', ';', 'ε', '\'']:
                self.Vt.append(ch)  # 保存终结符集Vt

    # 1、消除左递归和消除回溯(即提取公因字符串再去除),主要实现在最后
    def __modifyProduction(self) -> '预处理产生式':
        # 理论知识参考<a href=https://blog.csdn.net/u010204038/article/details/41942035?utm_source=blogxgwz2></a>
        # 1、把间接左递归文法改写为直接左递归文法
        self.__removeIndirectLeftRecursion()
        # 2、然后用消除直接左递归的方法改写文法。
        self.__removeDirectLeftRecursion()
        # 3、消除回溯
        self.__removeSamePrefix()

    def getVn(self):
        return self.Vn

    def getVt(self):
        return self.Vt

    def getS(self):
        return self.S

    def getFirst(self):
        return self.FIRST

    def getFollow(self):
        return self.FOLLOW

    def getTableM(self):
        return self.M

    def showProduction(self) -> '展示产生式':
        for key, value in self.Production.items():
            print(key, end='->')
            for i in range(len(value)):
                if i < len(value) - 1:
                    print(value[i], end="|")
                else:
                    print(value[i])

    def showFirst(self) -> '获取产生式中的First集合以字典类型':
        for key, value in self.FIRST.items():
            print('FIRST(%s' % key, end='):{')
            tmpList = list(value)
            for i in range(len(tmpList)):
                if i < len(tmpList) - 1:
                    print(tmpList[i], end=",")
                else:
                    print(tmpList[i], end='}\n')

    def showFollow(self) -> '获取产生式中的Follow集合以字典类型':
        for key, value in self.FOLLOW.items():
            print('FOLLOW(%s' % key, end='):{')
            tmpList = list(value)
            for i in range(len(tmpList)):
                if i < len(tmpList) - 1:
                    print(tmpList[i], end=",")
                else:
                    print(tmpList[i], end='}\n')

    def showTableM(self) -> '显示分析表':
        if len(self.M) == 0:
            self.setTableM()
        print(self.M)

    # 2、获取每个非终结符的First集合
    def setFirst(self):
        change = 1  # 如果first集合发生变化就继续,直到first集合不再变化
        # 非终结符的First集合
        while change:
            change = 0
            for i in self.Vn:  # 遍历所有的非终结符
                for j in self.Production[i]:  # 遍历所有的非终结符
                    empty_flag = 0  # 标志是否有空元素ε

                    for indexCh in range(len(j)):  # 对产生式的某一个叶子进行遍历
                        # 处理类似F'的'字符
                        if j[indexCh] == '\'':
                            continue
                        elif indexCh + 1 <= len(j) - 1 and j[indexCh + 1] == '\'':
                            nowIndex = indexCh
                            ch = j[indexCh] + j[indexCh + 1]
                            nowIndex += 1
                            while nowIndex + 1 <= len(j) - 1 and j[nowIndex + 1] == '\'':
                                ch = j[indexCh] + j[indexCh + 1]
                                nowIndex += 1
                        else:
                            ch = j[indexCh]
                        # 产生First的步骤1
                        if ch in self.Vt:  # 是否是终结符,是则直接加入First集合
                            if i not in self.FIRST.keys():
                                self.FIRST[i] = []
                            if ch not in self.FIRST[i]:
                                self.FIRST[i].append(ch)  # 是则直接加入First集合
                                change = 1
                            empty_flag = 0  # 没有空串ε
                        elif ch == 'ε':  # 产生First的步骤2
                            empty_flag = 1
                        elif ch in self.FIRST.keys():  # 产生First的步骤3,非终结符的First
                            for k in self.FIRST[ch]:  # ch已经建立好First集
                                if k == 'ε':
                                    empty_flag = 1
                                else:
                                    if i not in self.FIRST.keys():
                                        self.FIRST[i] = []
                                    if k not in self.FIRST[i]:
                                        self.FIRST[i].append(k)
                                        change = 1
                                    empty_flag = 0
                        if empty_flag == 0:  # 不含有空元素,不对同一个叶子继续遍历
                            break

                    if empty_flag == 1:  # 每个非终结符的First的集合都有空串
                        if i not in self.FIRST.keys():
                            self.FIRST[i] = []
                        if 'ε' not in self.FIRST[i]:
                            self.FIRST[i].append('ε')  # 是ε则直接加入First集合
                            change = 1
        # 终结符的First集合是它本身
        for i in self.Vt:
            self.FIRST[i] = [i]

    def getFirstByChar(self, ch) -> '获取单个字符的First集合':
        if ch in self.FIRST:
            return self.FIRST[ch]
        else:
            return []

    # 3、获取每个非终结符的Follow集合
    def setFollow(self):
        # 需要先求出First集合
        if len(self.FIRST) == 0:
            self.setFirst()
        # 语法规则1:
        self.FOLLOW[self.S] = ['#']
        change = 1
        while change:
            change = 0
            for i in self.Vn:  # 遍历所有的非终结符
                for j in self.Production[i]:  # 遍历非终结符对应的产生式的右侧的叶子们
                    for indexCh in range(len(j)):  # 对产生式的某一个叶子进行遍历
                        nowIndex = indexCh  # 代表当前字符下标

                        # 处理类似F'的'字符
                        if j[indexCh] == '\'':
                            continue
                        elif indexCh + 1 <= len(j) - 1 and j[indexCh + 1] == '\'':
                            ch = j[indexCh] + j[indexCh + 1]
                            nowIndex += 1
                            while nowIndex + 1 <= len(j) - 1 and j[nowIndex + 1] == '\'':
                                ch += j[nowIndex + 1]
                                nowIndex += 1
                        else:
                            ch = j[indexCh]

                        if ch in self.Vn:  # 非终结符
                            # 语法规则第2条
                            if nowIndex + 1 <= len(j) - 1:  # 形同A->aBb,取得子串b
                                s = j[nowIndex + 1]  # 需要考虑E''的情况

                                tmpIndex = nowIndex + 1  # 代表当前字符的后一个字符下标
                                while tmpIndex + 1 <= len(j) - 1 and j[tmpIndex + 1] == '\'':
                                    s += j[tmpIndex + 1]
                                    tmpIndex += 1

                                se = self.getFirstByChar(s)
                                # A->aBb,把子串b的First集的所有非空元素加入B的Follow集合中
                                for k in se:  # 遍历se,所有非空元素加入
                                    if k != 'ε':
                                        if ch not in self.FOLLOW.keys():
                                            self.FOLLOW[ch] = []
                                        if k not in self.FOLLOW[ch]:
                                            self.FOLLOW[ch].append(k)
                                            change = 1

                                if 'ε' in se:  # 语法规则第3条第2项,把i的Follow集加入B的Follow集合中
                                    if i in self.FOLLOW.keys():
                                        for k in self.FOLLOW[i]:  # 遍历i的Follow集
                                            if ch not in self.FOLLOW.keys():
                                                self.FOLLOW[ch] = []
                                            if k not in self.FOLLOW[ch]:
                                                self.FOLLOW[ch].append(k)
                                                change = 1
                                    else:
                                        self.FOLLOW[i] = []
                            else:  # 形同A->aB,语法规则第3条第1项,把i的Follow集加入B的Follow集合中
                                if i in self.FOLLOW.keys():
                                    for k in self.FOLLOW[i]:
                                        if ch not in self.FOLLOW.keys():
                                            self.FOLLOW[ch] = []
                                        if k not in self.FOLLOW[ch]:
                                            self.FOLLOW[ch].append(k)
                                            change = 1

    def getFollowByChar(self, ch) -> '获取单个字符的Follow集合':
        if ch in self.FIRST:
            return self.FIRST[ch]
        else:
            return []

    # 4、建立分析表
    def setTableM(self) -> '需要经过预处理--消除左递归和回溯,再调用这个函数返回分析表':
        if len(self.FOLLOW) == 0:
            self.setFollow()
        Columns = self.Vt + ['#']
        Index = self.Vn
        self.M = DataFrame(index=Index, columns=Columns)

        for i in range(len(Index)):  # 遍历非终结符
            A = Index[i]
            for j in self.Production[A]:  # 遍历非终结符对应的产生式的右侧的叶子们
                leaveString = A + "->" + j
                nowIndex = 0
                ch = j[0]
                # 语法规则1:需要对非终结符的First集处理
                if ch.isupper():  # 为非终结符,都是大写字母
                    while nowIndex + 1 <= len(j) - 1 and j[nowIndex + 1] == '\'':
                        ch += j[nowIndex + 1]
                        nowIndex += 1
                    for k in self.FIRST[ch]:  # 遍历非终结符的First集
                        if k == 'ε':
                            continue
                        y = Columns.index(k)
                        x = i
                        self.M.values[x, y] = leaveString
                elif ch in self.FIRST[A]:  # 语法规则2:对自己First集的每个终结符
                    if ch != 'ε':
                        y = Columns.index(ch)
                        x = i
                        self.M.values[x, y] = leaveString
                    else:  # 语法规则3:对任何属于Follow(A)的处理,注意:Follow集不可能有空串
                        for k in self.FOLLOW[A]:  # 遍历
                            y = Columns.index(k)
                            x = i
                            self.M.values[x, y] = leaveString

        self.M.fillna('ERROR', inplace=True)

    # 1.1、消除间接左递归(要求文法不存在A 经过一次或多次能推导出A和不存在ε产生式(形如A→ε))
    """
    前提:(1)文法中不含有回路(2)文法中不含有空串为右部的产生式
    步骤:
    (1)把非终结符整理成某个顺序
    A1→δ1 /δ2 /…/δk
    A2→A1r...
    A3→A2u|A1v...
    若多个非终结符都有间接左递归,其中排序排在最后的非终结符用作消除左递归   
    (2)如果一个文法不含有回路,即形如PP的推导,也不含有以ε为右部的产生式,那么就可以采用下述算法消除文法的所有左递归。
        for(i=1;i<=n;i++)
           for(j=1;j<=i-1;j++)
           {   把形如Ai→Ajγ的产生式改写成Ai→δ1γ /δ2γ /…/δkγ 
               其中Aj→δ1 /δ2 /…/δk是关于的Aj全部规则;
               再消除Ai规则中的直接左递归;
           }
    (3)化简得到的文法:去掉从开始符号S出发,在推导中无法出现的非终结符的产生式(去掉多余产生式)            
    """

    def __removeIndirectLeftRecursion(self):
        """
            测试案例:
            S->Qc|c;
            Q->Rb|b;
            R->Sa|a;
        """
        # 排序
        Vn = self.Vn  # 对Vn处理排序后的Vn副本
        i = len(Vn) - 2
        while i >= 0:
            ch0 = self.Vn[i]
            # 向后查看
            for j in range(i + 1, len(Vn)):
                ch1 = Vn[j]
                rightList = self.Production[ch1]
                for k in rightList:
                    if k[0] == ch0:
                        ch0Index = Vn.index(ch0)
                        ch1Index = Vn.index(ch1)
                        Vn[ch1Index], Vn[ch0Index] = Vn[ch0Index], Vn[ch1Index]
                        break
            i -= 1
        # 消除间接左递归
        for i in range(1, len(Vn)):  # 处理Ai->Ajγ等转换
            for j in range(i):
                isSame = False  # 在i的产生式右侧的候选项中出现Vn[j]
                ch = Vn[j]
                Right1 = self.Production[Vn[i]]  # Ai产生式的右侧
                Right2 = self.Production[Vn[j]]  # Aj产生式的右侧
                tmpList = []  # 存放缓冲
                for k1 in Right1:  # 遍历Ai产生式右侧子叶
                    if k1[0] == ch:  # Ai->Ajγ
                        isSame = True
                        for k2 in Right2:  # 遍历Aj产生式右侧子叶
                            if k2 != 'ε':
                                tmpList.append(k2 + k1[1:])
                if isSame:  # 前面产生式右侧出现本产生式的左侧字母
                    Right1Size = len(Right1)
                    for _ in range(Right1Size):  # 遍历Ai产生式右侧子叶
                        if Right1[0][0] == ch:
                            del self.Production[Vn[i]][0]
                        else:
                            tmpList.append(Right1[0])
                            del self.Production[Vn[i]][0]
                    self.Production[Vn[i]].extend(set(tmpList))
        saveSet = {self.S}
        saveSet = self._get_simple_Production(self.S, saveSet)
        for item in self.Vn:
            if item not in saveSet:
                del self.Production[item]
        self.Vn = [i for i in self.Production.keys()]  # 可能增加Vn,这里需要更新处理

    def _get_simple_Production(self, ch, result):
        # 传入的是产生式左侧,在消除间接左递归的最后检查是否有可以删去的多余产生式
        tmpResult = set()
        tmpDict = self.Production[ch]
        tmpSet = set(self.Vn) - {ch}
        for item in tmpSet:
            for rightItem in tmpDict:
                if item in rightItem and item not in result:
                    tmpResult.update(item)
                    break
        for item in tmpResult:
            result.add(item)
            result.update(self._get_simple_Production(item, result))
        return result

    # 1.2、消除直接左递归
    """
    1、把所有产生式写成候选式形式。如A→Aa1|Aa2……|Aan|b1|b2……|bm。其中每个a都不等于ε,而第个b都不以A开头。
   2、变换候选式成如下形式:
      A→b1A’|b2A’……|bmA’
      A’ →a1A’|a2A’……|anA’|ε
    """

    def __removeDirectLeftRecursion(self):
        for ch in list(self.Production.keys()):
            newCh = ch + '\''
            sameFlag = False
            while newCh in self.Production.keys():
                newCh += '\''
            for k in self.Production[ch]:
                if k[0] == ch:
                    sameFlag = True
                    self.Production[newCh] = []
                    break
            if not sameFlag:  # 没有出现和产生式左侧相同的,不需要继续
                continue
            tmpList = []  # 存放待替换原来的右侧的列表
            # 删除原来右侧,增加新产生式右侧
            oldList = self.Production[ch]
            while len(oldList) > 0:
                if oldList[0][0] == ch:
                    tmpStr = oldList[0][1:]
                    self.Production[newCh].append(tmpStr + newCh)
                else:
                    tmpStr = oldList[0]
                    tmpList.append(tmpStr + newCh)
                del oldList[0]
            self.Production[newCh].append('ε')  # 加入空串
            # 重进原来的右侧
            self.Production[ch].extend(tmpList)
            self.Production = dict(sorted(self.Production.items(), key=lambda x: x[0]))
        self.Vn = [i for i in self.Production.keys()]  # 可能增加Vn,这里需要更新处理

    # 1.3、消除回溯
    def __removeSamePrefix(self):
        samplesCopy = deepcopy(self.Production)
        samples = {}
        change = True
        while change:
            change = False
            for oldLeftKey, rightList in samplesCopy.items():
                # 判断是不是需要合并
                if len(rightList) == 0:
                    continue
                dictTmp = {}
                i = 0
                while i < len(rightList):
                    if rightList[i][0] in dictTmp.keys():
                        i += 1
                        continue
                    oldStartChar = rightList[i][0]
                    dictTmp[oldStartChar] = [rightList[i]]
                    j = i + 1
                    newStartChar = oldLeftKey + '\''
                    while newStartChar in dictTmp.keys() or newStartChar in samples.keys():
                        newStartChar += '\''
                    while j < len(rightList):  # 遍历右侧列表
                        if rightList[j][0] == oldStartChar:
                            change = True
                            if newStartChar not in dictTmp.keys():  # 遇到后面首字母重复的考虑是否已经在字典中建立key
                                dictTmp[newStartChar] = []
                            if oldStartChar + newStartChar not in dictTmp[oldStartChar]:  # 判断是否修改原理对应的字典key的valuelist
                                dictTmp[newStartChar].append(dictTmp[oldStartChar][0][1:])
                                dictTmp[oldStartChar] = [oldStartChar + newStartChar]
                            if rightList[j][1:] != '':
                                dictTmp[newStartChar].append(rightList[j][1:])  # 在新的字典key对应的valuelist中加入新字符串
                            else:
                                dictTmp[newStartChar].append('ε')
                        j += 1
                    i = i + 1
                # 将本次得到的dictTmp更新到全局变量字典类型的samples
                samples[oldLeftKey] = []
                for key, valueList in dictTmp.items():
                    if len(valueList) == 1:
                        samples[oldLeftKey].extend(valueList)
                    else:  # 可能还需要继续
                        samples.update({key: valueList})
            samplesCopy = deepcopy(samples)  # 更新当前全局变量到副本,再进入一次循环,看是否可以退出循环
        self.Production = dict(sorted(samples.items(), key=lambda x: x[0]))
        self.Vn = [i for i in self.Production.keys()]  # 可能增加Vn,这里需要更新处理

    # 1.4、保存产生式
    def __saveProduction2File(self):
        fileName = r'ModifiedRule.txt'
        if len(self.Production) == 0:
            return
        with open(fileName, 'w') as f:
            for key, valueList in self.Production.items():
                f.write(key + '->' + valueList[0])
                for i in range(1, len(valueList)):
                    f.write("|" + valueList[i])
                f.write(';\n')

2.3、Parser.py

# 语法分析的主类
from pandas import DataFrame
from copy import deepcopy
from Stack import Stack


class Parser:
    # M: 分析表
    # inputString: 输入串
    # vt: 终结符集合
    # vn: 非终结符集合
    __M, __inputString, __vt, __vn = DataFrame(), "", [], []
    __stack = Stack()  # 分析栈

    def __init__(self, M, inputString, vt, vn):
        """
        初始化语法分析器
        :param M: 分析表:DataFrame类型
        :param inputString: 输入串,以#结尾:str类型
        :param vt: 终结符集合:list
        :param vn: 非终结符集合:list,第一个是文法开始符S
        ----
        Example:parser = Parser(DataFrame(), " ", [], ['S'])
        """
        if len(inputString) == 0:
            raise Exception("输入字符串不能为空")
        if len(vn) == 0:
            raise Exception("Vn是非空有限集的非终结符集,第一个元素是文法开始符,Vn不能为空")
        self.__M, self.__inputString, self.__vt, self.__vn = deepcopy(M), deepcopy(inputString), deepcopy(vt), deepcopy(
            vn)
        self.__stack = Stack()
        self.__stack.push('#')
        self.__stack.push(self.__vn[0])

    def setM(self, M):
        self.__M = deepcopy(M)

    def setInputString(self, inputString):
        self.__inputString = inputString

    def isVt(self, c):
        """
        判断是否为终结符
        :param c: 输入字符
        :return: boolean结果
        """
        return c in self.__vt

    def isVn(self, c):
        """
        判断是否为非终结符
        :param c: 输入字符
        :return: boolean结果
        """
        return c in self.__vn

    def __showStack(self, resultStack):
        tmp = ''
        for ch in self.__stack.items:
            # print(ch, end="")
            tmp += ch
        # print("\t\t\t", end="")
        resultStack.push(tmp)

    """
    输出其分析过程,并在最后显示判断输入串是否为该文法的句子,并返回记录列表
    """
    def parse(self):
        if len(self.__inputString) == 0:
            raise Exception("输入字符串不能为空")
        resultStack = Stack()  # 保留记录,可以显示在前端
        # 输入的第一个字符a
        a = self.__inputString[0]
        # 栈顶元素
        x = self.__stack.peek()
        # 定义元素对应的下标
        indexVt = -1  # 终结符对应下标
        indexVn = -1  # 非终结符对应的下标
        indexInput = 0  # 字符串当前指针对应的下标
        # LL(1)的说明
        # express = 'None'
        # print('\033[31m符号栈', ' ' * 12, '当前输入符号', ' ' * 7, '输入串', ' ' * 7, '所用表达式\033[0m')
        resultStack.push('符号栈')
        resultStack.push('当前输入符号')
        resultStack.push('输入串')
        resultStack.push('所用表达式')
        cycleMark = True  # 循环是否继续
        while cycleMark:
            # 输出栈元素,传入resultStack,是为了保存值到resultStack,而不是遍历resultStack
            self.__showStack(resultStack)
            # 输出当前输入字符
            # print('\t', a, "\t\t\t\t\t", end="")
            resultStack.push(a)
            # 输入串
            # print(self.__inputString[indexInput + 1:], '\t\t\t\t', end="")
            resultStack.push(self.__inputString[indexInput + 1:])

            express = 'None'
            x = self.__stack.pop()  # 栈顶弹出
            if self.isVt(x):  # 判断是否为终结字符
                if x == a:
                    if a == '#':
                        # 输出说明
                        express = '分析成功'
                        # print(express)
                        resultStack.push(express)
                        break  # 成功,可以停止分析
                    # 弹出栈顶,字符串指针后移,对下个字符分析
                    indexInput += 1
                    a = self.__inputString[indexInput]
            elif self.isVn(x):  # 是非终结符
                indexVn = self.__vn.index(x)
                indexVt = self.__vt.index(a)
                express = self.__M.values[indexVn][indexVt]
                if '>' not in express:
                    raise Exception('语法错误')
                # 产生式右侧分割成数组
                tmpString = express[express.index('>') + 1:]
                charArray = []
                for i in range(len(tmpString)):
                    if tmpString[i] == '\'':
                        charArray[i - 1] += tmpString[i]
                    else:
                        charArray.append(tmpString[i])
                # 判断首个字符是否为空串,是则跳过
                if charArray[0] == 'ε':
                    # print(express)
                    resultStack.push(express)
                    continue
                # 逆序把产生式右侧加入栈
                for i in range(len(charArray) - 1, -1, -1):
                    self.__stack.push(charArray[i])
            else:
                raise Exception('未定义字符')
            # 输出说明
            # print(express)
            resultStack.push(express)
            if x == '#':
                cycleMark = False
        return resultStack

    def __is_Chinese(self, word):
        for ch in word:
            if '\u4e00' <= ch <= '\u9fff':
                return True
        return False

    def showResult(self):
        try:
            stack = self.parse()
            for i in range(stack.size()):
                if i % 4 == 0:
                    print()
                if not self.__is_Chinese(stack.items[i]):
                    print("%10s" % stack.items[i], end=" " * 4)
                else:
                    print('{0:{1}>6}'.format(stack.items[i], chr(12288)), end=' ' * 4)
            return stack
        except Exception as e:
            print('\033[31m' + str(e) + '\033[0m')
            return Stack()

2.4、Main.py

from Parser import Parser
from Grammar import Grammar


def main():
    g = Grammar(r'PureRule.txt')
    g.setTableM()
    testM = g.getTableM()
    Headers = testM.columns.values.tolist()
    Index = testM.index.values.tolist()
    # 终结符集
    testVt = Headers
    # 非终结符集
    testVn = Index
    # 输入字符串
    testInputString = 'i+i#'
    parser = Parser(testM, testInputString, testVt, testVn)
    # 获得
    try:
        parser.showResult()
    except Exception as e:
        print('\033[31m'+str(e)+'\033[0m')


if __name__ == '__main__':
    main()

""" 
    测试案例
    # from pandas import DataFrame
    # Index = ['E', 'E\'', 'T', 'T\'', 'F']
    # Headers = ['i', '+', '*', '(', ')', '#']
    # Data = [['E->TE\'', 'ERROR', 'ERROR', 'E->TE\'', 'ERROR', 'ERROR'],
    #         ['ERROR', 'E\'->+TE\'', 'ERROR', 'ERROR', 'E\'->ε', 'E\'->ε'],
    #         ['T->FT\'', 'ERROR', 'ERROR', 'T->FT\'', 'ERROR', 'ERROR'],
    #         ['ERROR', 'T\'->ε', 'T\'->*FT\'', 'ERROR', 'T\'->ε', 'T\'->ε'],
    #         ['F->i', 'ERROR', 'ERROR', 'F->(E)', 'ERROR', 'ERROR']]
    # # 分析表,包含产生式的关系
    # testM = DataFrame(Data, index=Index, columns=Headers)
"""

2.5、PureRule.txt

E->E+T|T;
T->T*F|F;
F->(E)|i;

对应生成的规则:
在这里插入图片描述

标签:ch,语法分析,python,self,Vn,Production,版本,终结符,def
来源: https://blog.csdn.net/weixin_44001521/article/details/112002614