编程语言
首页 > 编程语言> > python: 字频统计

python: 字频统计

作者:互联网

import re
import time
import os.path

work_dir = os.getcwd() + '\\'
file_time = time.strftime("%y%m%d%H%M%S")
file_name = ""
terminal_words = ""
frequency_number = 0


def nameMark():
    print(" " + "*" * 20 + " 字频统计 " + "*" * 20)


def separatorLine():
    print("*" * 50)


def processTxt(words):
    punctuation = ",.。,??1234567890ABCD"  # defines characters that are not counted
    words = words.replace(" ", "")
    words = re.sub(r"[%s]+" % punctuation, "", words)
    return words


def getTerminalInput(ter_words):
    ter_words = processTxt(ter_words)
    print("The terminal has been read.")
    separatorLine()
    return ter_words


def isRulePath(file_path):
    re_path = r'^(?P<path>(?:[a-zA-Z]:)?\\(?:[^\\\?\/\*\|<>:"]+\\)+)' \
              r'(?P<filename>(?P<name>[^\\\?\/\*\|<>:"]+?)\.' \
              r'(?P<ext>[^.\\\?\/\*\|<>:"]+))$'
    path_flag = re.search(re_path, file_path)
    if path_flag:
        rule_path = path_flag.group()
        return rule_path
    else:
        print("Invalid path")
        return False


def getFilesInput(rule_path):
    with open(rule_path, 'r', encoding='utf-8') as file:
        words = file.readline()
        words = processTxt(words)
        print()
        print(f"{rule_path} content has been read!")
        separatorLine()
        return words


def getFrequency():
    global frequency_number
    while True:
        frequency_number = input("Please input number : \n Note: words with frequency less than input times will not "
                                 "be recorded!   \n :-->")
        if frequency_number.isdigit():
            separatorLine()
            return frequency_number
            break
        else:
            continue


def analyseText(words, number):
    result = {}
    for i in words:
        k = words.count(i)
        if i in result:
            result[i] += 1
        else:
            result[i] = 1
    for k in list(result.keys()):
        if result[k] < int(number):
            del result[k]
    result_sort_list = sorted(result.items(), key=lambda d: d[1], reverse=True)
    return result_sort_list


def writeTxtFile(result_file):
    with open(work_dir + 'result_' + file_time + '.txt', 'w', encoding="UTF-8") as file:
        if file_name != "":
            file.write("Analyzing source files:" + file_name)
            file.write("\n" * 2)
            file.write(f"PS:This file records only {frequency_number} and above!")
            file.write("\n" * 2)
            file.write("+" * 50)
            file.write("\n" * 2)
        else:
            file.write("The terminal input is as follows:")
            file.write("\n" * 2)
            file.write("+" * 50)
            file.write("\n" * 2)
            file.write(terminal_words)
            file.write("\n" * 2)
            file.write("+" * 50)
            file.write("\n" * 2)
        file.write(str(result_file))
        file.write("\n" * 2)
        file.write("+" * 50)
        file.write("\n" * 2)
        for i in result_file:
            file.write(str(i))
            file.write("\n")
        file.write("+" * 50)
        print(f"Completed and saved!\n {work_dir}result_{file_time}.txt")


while True:
    nameMark()
    flag = input("File(F) or Terminal(T)[quit -q]: \n")
    if flag.lower() == 'q':
        break
    elif flag.lower() == "t":

        terminal_words = input("input text: \n")
        t_words = processTxt(terminal_words)
        # t_words = getTerminalInput(terminal_input)
        show_time = getFrequency()
        analysis_result = analyseText(t_words, show_time)
        writeTxtFile(analysis_result)
        separatorLine()
        answer = input('To exit? y/n \n')
        if answer.upper() == 'Y':
            print('Thank you for using!!!')
            break
        else:
            continue
    elif flag.lower() == "f":
        while 1:
            file_name = input("file path[quit -q]:-->> ")
            if file_name.lower() == 'q':
                break
            file_name = isRulePath(file_name)
            if file_name:
                f_words = getFilesInput(file_name)
                show_time = getFrequency()
                analysis_result = analyseText(f_words, show_time)
                writeTxtFile(analysis_result)
                separatorLine()
                answer = input('To exit? y/n \n')
                if answer.upper() == 'Y':
                    print('Thank you for using!!!')
                    break
                else:
                    continue
            else:
                continue

    else:
        print("Input error! \n <TXT File> --> F   or   <Terminal input> --> T ")
        separatorLine()
        continue


标签:words,python,字频,write,result,file,input,path,统计
来源: https://www.cnblogs.com/annzi/p/15427694.html