其他分享
首页 > 其他分享> > 模糊匹配——基于difflib

模糊匹配——基于difflib

作者:互联网

import difflib
DIFF =difflib.Differ()

def fuzzy_match(seq:str,sub_seq:str ,difflib=None):
    """ 模糊匹配 """
    assert difflib!=None, "please import difflib.Differ"
    def match_(seq:str,sub_seq:str ):
        dif = list(difflib.compare(seq,sub_seq))
        start,end = 0,0 
        flag = True
        index= 0
        _num= 0
        for i,temp in enumerate(dif):
            if temp[0]== " ":
                _num = 0
                end = index
                if flag :
                    flag = False
                    start = index
            if temp[0] != "+":
                index+=1
            if not flag and temp[0] == "-":
                if _num >= len(sub_seq)*0.5:
                    num_space=[char for char  in dif[i:] if char[0]==" "]
                    if len(num_space)<2:
                        break
                _num+=1
        return start,end
    start,end = match_(seq,sub_seq)
    print(seq[start:end+1])
    if  start!=end:
        return start,end+1
    else:
        return -1

def find_all( s,sub):

    sub =sub.replace(",","")
    index = s.find(sub)   #精准匹配
    lens =len(sub)
    if index != -1:
        return index,index+lens
    else:
        return fuzzy_match(s,sub,DIFF)

s1="哪种产品是苏州市的东方洗涤剂厂在2015年7月4号生产的"
s2="2015-07-04"

# s1 = '你知道上海浦东新区的那个气象局缺几位预报员吗'
# s2 = '上海市浦东新区气象局'
find_all(s1,s2)

一个简单的模糊匹配,分享给大家

标签:index,匹配,sub,seq,模糊,difflib,flag,num
来源: https://blog.csdn.net/qq_33540705/article/details/123113649