python: 字频统计
作者:互联网
import re
import time
import os.path
work_dir = os.getcwd() + '\\'
file_time = time.strftime("%y%m%d%H%M%S")
file_name = ""
terminal_words = ""
frequency_number = 0
def nameMark():
print(" " + "*" * 20 + " 字频统计 " + "*" * 20)
def separatorLine():
print("*" * 50)
def processTxt(words):
punctuation = ",.。,??1234567890ABCD" # defines characters that are not counted
words = words.replace(" ", "")
words = re.sub(r"[%s]+" % punctuation, "", words)
return words
def getTerminalInput(ter_words):
ter_words = processTxt(ter_words)
print("The terminal has been read.")
separatorLine()
return ter_words
def isRulePath(file_path):
re_path = r'^(?P<path>(?:[a-zA-Z]:)?\\(?:[^\\\?\/\*\|<>:"]+\\)+)' \
r'(?P<filename>(?P<name>[^\\\?\/\*\|<>:"]+?)\.' \
r'(?P<ext>[^.\\\?\/\*\|<>:"]+))$'
path_flag = re.search(re_path, file_path)
if path_flag:
rule_path = path_flag.group()
return rule_path
else:
print("Invalid path")
return False
def getFilesInput(rule_path):
with open(rule_path, 'r', encoding='utf-8') as file:
words = file.readline()
words = processTxt(words)
print()
print(f"{rule_path} content has been read!")
separatorLine()
return words
def getFrequency():
global frequency_number
while True:
frequency_number = input("Please input number : \n Note: words with frequency less than input times will not "
"be recorded! \n :-->")
if frequency_number.isdigit():
separatorLine()
return frequency_number
break
else:
continue
def analyseText(words, number):
result = {}
for i in words:
k = words.count(i)
if i in result:
result[i] += 1
else:
result[i] = 1
for k in list(result.keys()):
if result[k] < int(number):
del result[k]
result_sort_list = sorted(result.items(), key=lambda d: d[1], reverse=True)
return result_sort_list
def writeTxtFile(result_file):
with open(work_dir + 'result_' + file_time + '.txt', 'w', encoding="UTF-8") as file:
if file_name != "":
file.write("Analyzing source files:" + file_name)
file.write("\n" * 2)
file.write(f"PS:This file records only {frequency_number} and above!")
file.write("\n" * 2)
file.write("+" * 50)
file.write("\n" * 2)
else:
file.write("The terminal input is as follows:")
file.write("\n" * 2)
file.write("+" * 50)
file.write("\n" * 2)
file.write(terminal_words)
file.write("\n" * 2)
file.write("+" * 50)
file.write("\n" * 2)
file.write(str(result_file))
file.write("\n" * 2)
file.write("+" * 50)
file.write("\n" * 2)
for i in result_file:
file.write(str(i))
file.write("\n")
file.write("+" * 50)
print(f"Completed and saved!\n {work_dir}result_{file_time}.txt")
while True:
nameMark()
flag = input("File(F) or Terminal(T)[quit -q]: \n")
if flag.lower() == 'q':
break
elif flag.lower() == "t":
terminal_words = input("input text: \n")
t_words = processTxt(terminal_words)
# t_words = getTerminalInput(terminal_input)
show_time = getFrequency()
analysis_result = analyseText(t_words, show_time)
writeTxtFile(analysis_result)
separatorLine()
answer = input('To exit? y/n \n')
if answer.upper() == 'Y':
print('Thank you for using!!!')
break
else:
continue
elif flag.lower() == "f":
while 1:
file_name = input("file path[quit -q]:-->> ")
if file_name.lower() == 'q':
break
file_name = isRulePath(file_name)
if file_name:
f_words = getFilesInput(file_name)
show_time = getFrequency()
analysis_result = analyseText(f_words, show_time)
writeTxtFile(analysis_result)
separatorLine()
answer = input('To exit? y/n \n')
if answer.upper() == 'Y':
print('Thank you for using!!!')
break
else:
continue
else:
continue
else:
print("Input error! \n <TXT File> --> F or <Terminal input> --> T ")
separatorLine()
continue
标签:words,python,字频,write,result,file,input,path,统计 来源: https://www.cnblogs.com/annzi/p/15427694.html