模糊匹配——基于difflib
作者:互联网
import difflib
DIFF =difflib.Differ()
def fuzzy_match(seq:str,sub_seq:str ,difflib=None):
""" 模糊匹配 """
assert difflib!=None, "please import difflib.Differ"
def match_(seq:str,sub_seq:str ):
dif = list(difflib.compare(seq,sub_seq))
start,end = 0,0
flag = True
index= 0
_num= 0
for i,temp in enumerate(dif):
if temp[0]== " ":
_num = 0
end = index
if flag :
flag = False
start = index
if temp[0] != "+":
index+=1
if not flag and temp[0] == "-":
if _num >= len(sub_seq)*0.5:
num_space=[char for char in dif[i:] if char[0]==" "]
if len(num_space)<2:
break
_num+=1
return start,end
start,end = match_(seq,sub_seq)
print(seq[start:end+1])
if start!=end:
return start,end+1
else:
return -1
def find_all( s,sub):
sub =sub.replace(",","")
index = s.find(sub) #精准匹配
lens =len(sub)
if index != -1:
return index,index+lens
else:
return fuzzy_match(s,sub,DIFF)
s1="哪种产品是苏州市的东方洗涤剂厂在2015年7月4号生产的"
s2="2015-07-04"
# s1 = '你知道上海浦东新区的那个气象局缺几位预报员吗'
# s2 = '上海市浦东新区气象局'
find_all(s1,s2)
一个简单的模糊匹配,分享给大家
标签:index,匹配,sub,seq,模糊,difflib,flag,num 来源: https://blog.csdn.net/qq_33540705/article/details/123113649