首页 > 其他分享> > 【hust数据结构】huffman树及编码/解码实验

【hust数据结构】huffman树及编码/解码实验

2021-11-11 15:33:22 作者：互联网

做了好久...

最后调出来还是蛮有成就感的，总结一个博客出来吧2333

要求

一些细节

中文字符的处理

在头文件之后写：

#ifdef _WIN32
    #include<windows.h>//用来修控制台中文乱码
#endif

在main函数的最前面写：

#ifdef _WIN32
    SetConsoleOutputCP(65001);//用来修控制台的中文乱码
#endif

就可以改成65001编码了，能正确处理中文字符。

中文字符占三个char，英文字符还是一个char。

map<struct,int>：重载struct的运算符

以struct为下标的map，需要重载运算符。

struct source_data{
    char ch1,ch2,ch3;
        bool operator < (const source_data &a) const{
        return ch1>a.ch1;//升序 
    }
};
map<source_data,int>nod;//存储字符对应的编号

代码

  1 /*
  2 author : qwerta
  3 date : 2021.11.04 - 11.11
  4 */
  5 #include<algorithm>
  6 #include<iostream>
  7 #include<cstdio>
  8 #include<cstring>
  9 #include<cstdlib>
 10 #include<queue>
 11 #include<stack>
 12 #include<map>
 13 #include<vector>
 14 #ifdef _WIN32
 15     #include<windows.h>//用来修控制台中文乱码
 16 #endif
 17 using namespace std;
 18 const int ALL_CHARACTER=1000+3;//总字符数
 19 struct source_data{
 20     char ch1,ch2,ch3;
 21         bool operator < (const source_data &a) const{
 22         return ch1>a.ch1;//升序 
 23     }
 24 };
 25 map<source_data,int>nod;//存储字符对应的编号
 26 int cont[ALL_CHARACTER];//记录编号为i的字符的出现次数
 27 int tot_char=0;//总字符数，从1开始存
 28 int tot_node=0;//总节点数
 29 void read_source_txt()
 30 {
 31     freopen("a.in","r",stdin);
 32     char ch;
 33     while((ch=getchar())!=EOF)
 34     {
 35         if(ch>=0)//英文
 36         {
 37             if(nod.find((source_data){ch,' ',' '})==nod.end())
 38             {
 39                 nod[((source_data){ch,' ',' '})]=++tot_char;
 40                 cont[tot_char]=1;
 41             }
 42             else cont[nod[((source_data){ch,' ',' '})]]++;
 43             //putchar(ch);
 44         }
 45         else
 46         {
 47             //printf("汉字get\n");
 48             char ch0=getchar(),ch00=getchar();
 49             if(nod.find((source_data){ch,ch0,ch00})==nod.end())
 50             {
 51                 nod[((source_data){ch,ch0,ch00})]=++tot_char;
 52                 cont[tot_char]=1;
 53             }
 54             else cont[nod[((source_data){ch,ch0,ch00})]]++;
 55             //putchar(ch);putchar(ch0);putchar(ch00);
 56             //putchar(' ');
 57         }
 58     }
 59     fclose(stdin);
 60     cin.clear();
 61     //printf("\nread finished\n");
 62     return;
 63 }
 64 struct NODE{
 65     NODE *lson,*rson;//lson:0 rson:1
 66     NODE *fa;//父节点
 67     int node=0,tim;//当前点的编号 & 出现次数之和
 68     source_data raw;
 69     int huff;//huffman编码
 70 };
 71 struct cmp{
 72     bool operator() (const NODE *a,const NODE *b){
 73         return (a->tim) > (b->tim);
 74     }
 75 };
 76 priority_queue<NODE *,vector<NODE*>,cmp>q;
 77 NODE *s;//树根root
 78 void huffman_build()
 79 {
 80     map<source_data,int>::reverse_iterator iter;
 81     int i;
 82     for(iter = nod.rbegin(),i=1;iter != nod.rend();iter++,++i){
 83         source_data c = iter->first;        
 84         NODE *p = (NODE*)malloc(sizeof(NODE));
 85         p->node=i;
 86         p->tim=cont[nod[c]];
 87         p->lson=p->rson=nullptr;
 88         p->raw = c;
 89         q.push(p);
 90     }
 91     for(int i=1;i<=tot_char;++i){
 92 
 93     }
 94     tot_node=tot_char;
 95     while(q.size()>1){
 96         NODE *p = (NODE*)malloc(sizeof(NODE));
 97         NODE *c1=q.top();q.pop();
 98         NODE *c2=q.top();q.pop();
 99         p->lson=c1;p->rson=c2;//注意
100         c1->fa = c2->fa = p;
101         p->tim=(c1->tim) + (c2->tim);
102         p->node=++tot_node;
103         p->raw.ch1=0;
104         q.push(p);
105     }
106     s=q.top();q.pop();
107     //printf("huffman_build finished\n");
108     return;
109 }
110 int huffman_code[ALL_CHARACTER];//存编号为i的字符的huffman编码
111 void huffman_dfs(NODE *c,int now)//now:储存当前huffman编码（最高位之前，比实际huffman编码多加了一位1）
112 {
113     if(c->lson)huffman_dfs(c->lson,(now<<1));
114     //
115     if((c->raw).ch1)//到叶子节点了
116     {
117         huffman_code[nod[c->raw]]=c->huff=now;
118         //cout<<" "<<now<<endl;
119         return;
120     }
121     //
122     if(c->rson)huffman_dfs(c->rson,((now<<1)|1));
123     return;
124 }
125 stack<NODE *>st;
126 void huffman_with_stack()//非递归遍历huffman树
127 {
128     st.push(s);
129     s->huff=1;
130     while(!st.empty()){
131         NODE *c=st.top();st.pop();
132         if(c->lson){
133             (c->lson)->huff = (c->huff)<<1;
134             st.push(c->lson);
135         }
136         //
137         if((c->raw).ch1)huffman_code[c->node]=c->huff;
138         if(c->rson){
139             (c->rson)->huff = ((c->huff)<<1)|1;
140             st.push(c->rson);
141         }
142     }
143     return;
144 }
145 void print_char(source_data c){
146     if(c.ch1>0)putchar(c.ch1);
147     else{putchar(c.ch1);putchar(c.ch2);putchar(c.ch3);}
148     return;
149 }
150 void print_huffman(int x){
151     //printf("\n print %d now :",x);
152     bool ans[103];
153     int tot=0;
154     for(;(1<<tot)<=x;++tot){
155         if((x>>tot)&1)ans[tot]=1;
156         else ans[tot]=0;
157     }
158     for(int i=tot-2;i>=0;--i)
159     printf("%d",ans[i]);
160     return;
161 }
162 void print_tim(){
163     printf("\n------------------字符出现次数-------------------\n\n");
164     map<source_data,int>::reverse_iterator iter;
165     int tot=0;
166     for(iter = nod.rbegin();iter != nod.rend();iter++){
167         source_data c = iter->first;
168         printf("--%d--//",tot++);
169         print_char(c);
170         printf("//，次数：%d",cont[nod[c]]);
171         putchar('\n');
172     }
173     return;
174 }
175 void print_tree_dfs(NODE *c)//采用递归遍历该huffman树
176 {
177     if(c->lson)print_tree_dfs(c->lson);
178     if(c->rson)print_tree_dfs(c->rson);
179     printf("序号:%d--W%d--",c->node,c->tim);
180     if(c!=s)printf("//P%d",c->fa->node);else printf("//P0");
181     if(c->lson)printf("//L%d",c->lson->node);else printf("//L0");
182     if(c->lson)printf("//R%d//",c->rson->node);else printf("//R0//");
183     if((c->raw).ch1){
184         print_char(c->raw);
185         printf("==>");
186         print_huffman(c->huff);
187     }
188     putchar('\n');
189     return;
190 }
191 vector<char>txt;//="In the animation industry, cartoon videos are produced from hand drawings of expert animators using a complex and precise procedure. To draw each frame of an animation video manually would consume tremendous time, thus leading to a prohibitively high cost. In practice, the animation producers usually replicate one drawing two or three times to reduce the cost, which results in the actual low frame rate of animation videos. Therefore, it is highly desirable to develop computational algorithms to interpolate the intermediate animation frames automatically.In recent years, video interpolation has made great progress on natural videos. However, in animations, existing video interpolation methods are not able to produce satisfying in-between frames. An example from the film Children Who Chase Lost Voices is illustrated in Figure 1, where the current state-of-the-art methods fail to generate a piece of complete luggage due to the incorrect motion estimation, which is shown in the lower-left corner of the image. The challenges here stem from the two unique characteristics of animation videos: 1) First, cartoon images consist of explicit sketches and lines, which split the image into segments of smooth color pieces. Pixels in one segment are similar, which yields insufficient textures to match the corresponding pixels between two frames and hence increases the difficulty to predict accurate motions. 2) Second, cartoon animations use exaggerated expressions in pursuit of artistic effects, which result in non-linear and extremely large motions between adjacent frames. Two typical cases are depicted in Figure 2 (a) and (b) which illustrate these challenges respectively. Due to these difficulties mentioned above, video interpolation in animations remains a challenging task.";//待编码文件
192 vector<bool>vec;//存编码后的字符串
193 void read_target_txt()
194 {
195     freopen("b.in","r",stdin);
196     char ch;
197     while((ch=getchar())!=EOF)
198     {
199         txt.push_back(ch);
200     }
201     fclose(stdin);
202     cin.clear();
203     return;
204 }
205 void huffman_encode(){
206     for(int i=0;i<txt.size();++i){
207         int x = huffman_code[nod[(source_data){txt[i],' ',' '}]];
208         bool ans[103];
209         int tot=0;
210         for(;(1<<tot)<=x;++tot){
211             if((x>>tot)&1)ans[tot]=1;
212             else ans[tot]=0;
213         }
214         for(int i=tot-2;i>=0;--i)
215         vec.push_back(ans[i]);
216     }
217     //
218     printf("\n------------------对b.in编码-------------------\n");
219     printf("\nvec.size : %d \n",vec.size());
220     printf("txt encoded : ");
221     for(int i=0;i<vec.size();++i)
222         cout<<vec[i];
223     printf("\n\n");
224     return;
225 }
226 void huffman_decode(){
227     printf("vec decoded : ");
228     NODE *p = s;
229     for(int i=0;i<vec.size();++i){
230         if(!vec[i]){
231             p = p->lson;
232         }
233         else{
234             p = p->rson;
235         }
236         //
237         if((p->raw).ch1){
238             print_char(p->raw);
239             p = s;
240         }
241     }
242     return;
243 }
244 void print_compress_rate(){
245     double rate=(double)(vec.size())/(8*txt.size());
246     printf("\nCompress rate for the txt : \n%.6f",rate);
247     return;
248 }
249 int main()
250 {
251     #ifdef _WIN32
252         SetConsoleOutputCP(65001);//用来修控制台的中文乱码
253     #endif
254     //
255     read_source_txt();
256     huffman_build();
257     //
258     bool need_dfs = 1;
259     if(need_dfs){
260         huffman_dfs(s,1);//递归遍历
261         //huffman_with_stack();//非递归遍历
262     }
263     //
264     print_tim();
265     putchar('\n');
266     printf("\n------------------递归输出字符huffman编码-------------------\n\n");
267     print_tree_dfs(s);
268     //
269     bool need_encode = 1;
270     if(need_encode){
271         read_target_txt();
272         huffman_encode();
273         huffman_decode();
274         print_compress_rate();
275     }
276     return 0;
277 }

标签：NODE,int,hust,tot,source,printf,树及,huffman
来源： https://www.cnblogs.com/qwerta/p/15540165.html