其他分享
首页 > 其他分享> > Unicode 汉字列表 使用代码自动生成文件

Unicode 汉字列表 使用代码自动生成文件

作者:互联网

 

 

使用下述代码生成文件:

 1 import os
 2 
 3 
 4 
 5 basic_chinese_characters = ("4E00", "9FA5", 20902, "basic_chinese_characters", "基本汉字")  # 基本汉字
 6 basic_chinese_characters_supplement = ("9FA6", "9FFF", 90, "basic_chinese_characters_supplement", "基本汉字补充")  # 基本汉字补充
 7 chinese_characters_extension_a = ("3400", "4DBF", 6592, "chinese_characters_extension_a", "汉字扩展A")  # 汉字扩展A
 8 chinese_characters_extension_b = ("20000", "2A6DF", 42720, "chinese_characters_extension_b", "汉字扩展B")  # 汉字扩展B
 9 chinese_characters_extension_c = ("2A700", "2B738", 4153, "chinese_characters_extension_c", "汉字扩展C")  # 汉字扩展C
10 chinese_characters_extension_d = ("2B740", "2B81D", 222, "chinese_characters_extension_d", "汉字扩展D")  # 汉字扩展D
11 chinese_characters_extension_e = ("2B820", "2CEA1", 5762, "chinese_characters_extension_e", "汉字扩展E")  # 汉字扩展E
12 chinese_characters_extension_f = ("2CEB0", "2EBE0", 7473, "chinese_characters_extension_f", "汉字扩展F")  # 汉字扩展F
13 chinese_characters_extension_g = ("30000", "3134A", 4939, "chinese_characters_extension_g", "汉字扩展G")  # 汉字扩展G
14 kangxi_radical = ("2F00", "2FD5", 214, "kangxi_radical", "康熙部首")  # 康熙部首
15 radical_extension = ("2E80", "2EF3", 115, "radical_extension", "部首扩展")  # 部首扩展
16 compatible_chinese_characters = ("F900", "FAD9", 477, "compatible_chinese_characters", "兼容汉字")  # 兼容汉字
17 compatible_extension = ("2F800", "2FA1D", 542, "compatible_extension", "兼容扩展")  # 兼容扩展
18 PUA_GBK_parts = ("E815", "E86F", 81, "PUA_GBK_parts", "PUA(GBK)部件")  # PUA(GBK)部件
19 parts_extension = ("E400", "E5E8", 452, "parts_extension", "部件扩展")  # 部件扩展
20 PUA_supplement = ("E600", "E6CF", 207, "PUA_supplement", "PUA增补")  # PUA增补
21 chinese_character_strokes = ("31C0", "31E3", 36, "chinese_character_strokes", "汉字笔画")  # 汉字笔画
22 chinese_character_structure = ("2FF0", "2FFB", 12, "chinese_character_structure", "汉字结构")  # 汉字结构
23 chinese_phonetic_phonetic = ("3105", "312F", 43, "chinese_phonetic_phonetic", "汉语注音")  # 汉语注音
24 phonetic_extension = ("31A0", "31BA", 22, "phonetic_extension", "注音扩展")  # 注音扩展
25 chinese_phonetic_zero = ("3007", "3007", 1, "chinese_phonetic_zero", "汉字〇")  # 汉字〇
26 
27 unicode_chinese_characters_all = [basic_chinese_characters,
28                                   basic_chinese_characters_supplement,
29                                   chinese_characters_extension_a,  # 汉字扩展A
30                                   chinese_characters_extension_b,  # 汉字扩展B
31                                   chinese_characters_extension_c,  # 汉字扩展C
32                                   chinese_characters_extension_d,  # 汉字扩展D
33                                   chinese_characters_extension_e,  # 汉字扩展E
34                                   chinese_characters_extension_f,  # 汉字扩展F
35                                   chinese_characters_extension_g,  # 汉字扩展G
36                                   kangxi_radical,  # 康熙部首
37                                   radical_extension,  # 部首扩展
38                                   compatible_chinese_characters,  # 兼容汉字
39                                   compatible_extension,  # 兼容扩展
40                                   PUA_GBK_parts,  # PUA(GBK)部件
41                                   parts_extension,  # 部件扩展
42                                   PUA_supplement,  # PUA增补
43                                   chinese_character_strokes,  # 汉字笔画
44                                   chinese_character_structure,  # 汉字结构
45                                   chinese_phonetic_phonetic,  # 汉语注音
46                                   phonetic_extension,  # 注音扩展
47                                   chinese_phonetic_zero,  # 汉字〇
48                                   ]
49 
50 with open(file="unicode_chinese.txt", mode="w+", encoding="utf-8") as f_demo:
51     for i in unicode_chinese_characters_all:
52         write_str = ""
53         node = 0
54         max_num = int(i[1], 16)
55         for num_ in range(int(i[0], 16), (int(i[1], 16) + 1)):
56             node = node + 1
57             if write_str:
58                 write_str = write_str + "'" + chr(num_) + "'"
59             else:
60                 write_str = "# " + i[4] + "\t【" + str(i[2]) + "个】\t(" + i[0] + "-" + i[1] + ")\n" + i[3] + " = [" + "'" + chr(num_) + "'"
61 
62             if num_ == max_num:
63                 write_str = write_str + "]\n\n"
64             else:
65                 write_str = write_str + ", "
66 
67             # if node > 75:
68             #     write_str = write_str + "\n\t\t\t"
69             #     node = 0
70             # else:
71             #     pass
72 
73         # write_str = write_str + "]\n\n"
74         f_demo.write(write_str)
75         f_demo.flush()

 

生成的文件如下图:

 

标签:Unicode,chinese,extension,扩展,汉字,列表,str,characters
来源: https://www.cnblogs.com/kaixin2018/p/15547128.html