【Python 学习】fuzzywuzzy
作者:互联网
我想找到两个相似的字符串。在
示例:
from fuzzywuzzy import fuzz
string1 = 'Green apple'
string2 = 'Apple, green'
string3 = 'Green apples - grow on trees'
#Test with Fuzzy Wuzzy
print(fuzz.partial_ratio(string1, string2))
> 50
print(fuzz.partial_ratio(string1, string3))
> 100
print(fuzz.partial_ratio(string2, string3))
> 58
#Testing with DiffLib SequenceMatcher
print(difflib.SequenceMatcher(None, string1, string2).ratio())
> 0.34782608695652173
print(difflib.SequenceMatcher(None, string1, string3).ratio())
> 0.5641025641025641
print(difflib.SequenceMatcher(None, string2, string3).ratio())
> 0.45
在fuzzywuzzy
中还有另一个方法叫做partial_token_set_ratio
。我想这能解决你的问题
from fuzzywuzzy import fuzz
string1 = 'Green apple'
string2 = 'Apple, green'
string3 = 'Green apples - grow on trees'
fuzz.partial_token_set_ratio(string1,string3)
100
fuzz.partial_token_set_ratio(string1,string2)
100
string4="apple"
fuzz.partial_token_set_ratio(string1,string4)
100
fuzz.partial_token_set_ratio(string4,string1)
100
string4="app"
fuzz.partial_token_set_ratio(string4,string1)
100
string4="appld"
fuzz.partial_token_set_ratio(string4,string1)
80
1 from fuzzywuzzy import fuzz
2 from fuzzywuzzy import process
3
4 state_to_code = {"VERMONT": "VT", "GEORGIA": "GA", "IOWA": "IA", "Armed Forces Pacific": "AP", "GUAM": "GU",
5 "KANSAS": "KS", "FLORIDA": "FL", "AMERICAN SAMOA": "AS", "NORTH CAROLINA": "NC", "HAWAII": "HI",
6 "NEW YORK": "NY", "CALIFORNIA": "CA", "ALABAMA": "AL", "IDAHO": "ID",
7 "FEDERATED STATES OF MICRONESIA": "FM",
8 "Armed Forces Americas": "AA", "DELAWARE": "DE", "ALASKA": "AK", "ILLINOIS": "IL",
9 "Armed Forces Africa": "AE", "SOUTH DAKOTA": "SD", "CONNECTICUT": "CT", "MONTANA": "MT",
10 "MASSACHUSETTS": "MA",
11 "PUERTO RICO": "PR", "Armed Forces Canada": "AE", "NEW HAMPSHIRE": "NH", "MARYLAND": "MD",
12 "NEW MEXICO": "NM",
13 "MISSISSIPPI": "MS", "TENNESSEE": "TN", "PALAU": "PW", "COLORADO": "CO",
14 "Armed Forces Middle East": "AE",
15 "NEW JERSEY": "NJ", "UTAH": "UT", "MICHIGAN": "MI", "WEST VIRGINIA": "WV", "WASHINGTON": "WA",
16 "MINNESOTA": "MN", "OREGON": "OR", "VIRGINIA": "VA", "VIRGIN ISLANDS": "VI", "MARSHALL ISLANDS": "MH",
17 "WYOMING": "WY", "OHIO": "OH", "SOUTH CAROLINA": "SC", "INDIANA": "IN", "NEVADA": "NV",
18 "LOUISIANA": "LA",
19 "NORTHERN MARIANA ISLANDS": "MP", "NEBRASKA": "NE", "ARIZONA": "AZ", "WISCONSIN": "WI",
20 "NORTH DAKOTA": "ND",
21 "Armed Forces Europe": "AE", "PENNSYLVANIA": "PA", "OKLAHOMA": "OK", "KENTUCKY": "KY",
22 "RHODE ISLAND": "RI",
23 "DISTRICT OF COLUMBIA": "DC", "ARKANSAS": "AR", "MISSOURI": "MO", "TEXAS": "TX", "MAINE": "ME"
24 }
25 def studyfuzzy():
26 process.extractOne("Minnesotta", choices=state_to_code.keys())
27 process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=80)
28 process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=96)
29
30 state_to_code.keys()
31 state_to_code.values()
32 state_to_code.viewkeys()
33 state_to_code.viewvalues()
34 state_to_code.viewitems()
35 process.extractOne("AlaBAMMazzz", choices=state_to_code.keys(), score_cutoff=80)
36 process.extractOne("AlaBAMMazzz",choices=state_to_code.keys())
In[6]: from fuzzywuzzy import fuzz
In[7]: from fuzzywuzzy import process
In[8]: state_to_code = {"VERMONT": "VT", "GEORGIA": "GA", "IOWA": "IA", "Armed Forces Pacific": "AP", "GUAM": "GU",
"KANSAS": "KS", "FLORIDA": "FL", "AMERICAN SAMOA": "AS", "NORTH CAROLINA": "NC", "HAWAII": "HI",
"NEW YORK": "NY", "CALIFORNIA": "CA", "ALABAMA": "AL", "IDAHO": "ID",
"FEDERATED STATES OF MICRONESIA": "FM",
"Armed Forces Americas": "AA", "DELAWARE": "DE", "ALASKA": "AK", "ILLINOIS": "IL",
"Armed Forces Africa": "AE", "SOUTH DAKOTA": "SD", "CONNECTICUT": "CT", "MONTANA": "MT",
"MASSACHUSETTS": "MA",
"PUERTO RICO": "PR", "Armed Forces Canada": "AE", "NEW HAMPSHIRE": "NH", "MARYLAND": "MD",
"NEW MEXICO": "NM",
"MISSISSIPPI": "MS", "TENNESSEE": "TN", "PALAU": "PW", "COLORADO": "CO",
"Armed Forces Middle East": "AE",
"NEW JERSEY": "NJ", "UTAH": "UT", "MICHIGAN": "MI", "WEST VIRGINIA": "WV", "WASHINGTON": "WA",
"MINNESOTA": "MN", "OREGON": "OR", "VIRGINIA": "VA", "VIRGIN ISLANDS": "VI", "MARSHALL ISLANDS": "MH",
"WYOMING": "WY", "OHIO": "OH", "SOUTH CAROLINA": "SC", "INDIANA": "IN", "NEVADA": "NV",
"LOUISIANA": "LA",
"NORTHERN MARIANA ISLANDS": "MP", "NEBRASKA": "NE", "ARIZONA": "AZ", "WISCONSIN": "WI",
"NORTH DAKOTA": "ND",
"Armed Forces Europe": "AE", "PENNSYLVANIA": "PA", "OKLAHOMA": "OK", "KENTUCKY": "KY",
"RHODE ISLAND": "RI",
"DISTRICT OF COLUMBIA": "DC", "ARKANSAS": "AR", "MISSOURI": "MO", "TEXAS": "TX", "MAINE": "ME"
}
Out[19]: ('MINNESOTA', 95)
In[20]: process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=80)
Out[20]: ('MINNESOTA', 95)
In[21]: process.extractOne("Minnesotta", choices=state_to_code.keys(), score_cutoff=96)
In[22]: process.extractOne("AlaBAMMazzz", choices=state_to_code.keys(), score_cutoff=80)
In[23]: process.extractOne("AlaBAMMazzz",choices=state_to_code.keys())
Out[23]: ('ALABAMA', 78)
转载:https://www.cnpython.com/qa/522980
标签:fuzzywuzzy,ratio,Python,学习,state,code,Forces,fuzz,string1 来源: https://www.cnblogs.com/yidianling/p/16078907.html