【hust数据结构】huffman树及编码/解码实验
作者:互联网
做了好久...
最后调出来还是蛮有成就感的,总结一个博客出来吧2333
要求
一些细节
中文字符的处理
在头文件之后写:
#ifdef _WIN32 #include<windows.h>//用来修控制台中文乱码 #endif
在main函数的最前面写:
#ifdef _WIN32 SetConsoleOutputCP(65001);//用来修控制台的中文乱码 #endif
就可以改成65001编码了,能正确处理中文字符。
中文字符占三个char,英文字符还是一个char。
map<struct,int>:重载struct的运算符
以struct为下标的map,需要重载运算符。
struct source_data{ char ch1,ch2,ch3; bool operator < (const source_data &a) const{ return ch1>a.ch1;//升序 } }; map<source_data,int>nod;//存储字符对应的编号
代码
1 /* 2 author : qwerta 3 date : 2021.11.04 - 11.11 4 */ 5 #include<algorithm> 6 #include<iostream> 7 #include<cstdio> 8 #include<cstring> 9 #include<cstdlib> 10 #include<queue> 11 #include<stack> 12 #include<map> 13 #include<vector> 14 #ifdef _WIN32 15 #include<windows.h>//用来修控制台中文乱码 16 #endif 17 using namespace std; 18 const int ALL_CHARACTER=1000+3;//总字符数 19 struct source_data{ 20 char ch1,ch2,ch3; 21 bool operator < (const source_data &a) const{ 22 return ch1>a.ch1;//升序 23 } 24 }; 25 map<source_data,int>nod;//存储字符对应的编号 26 int cont[ALL_CHARACTER];//记录编号为i的字符的出现次数 27 int tot_char=0;//总字符数,从1开始存 28 int tot_node=0;//总节点数 29 void read_source_txt() 30 { 31 freopen("a.in","r",stdin); 32 char ch; 33 while((ch=getchar())!=EOF) 34 { 35 if(ch>=0)//英文 36 { 37 if(nod.find((source_data){ch,' ',' '})==nod.end()) 38 { 39 nod[((source_data){ch,' ',' '})]=++tot_char; 40 cont[tot_char]=1; 41 } 42 else cont[nod[((source_data){ch,' ',' '})]]++; 43 //putchar(ch); 44 } 45 else 46 { 47 //printf("汉字get\n"); 48 char ch0=getchar(),ch00=getchar(); 49 if(nod.find((source_data){ch,ch0,ch00})==nod.end()) 50 { 51 nod[((source_data){ch,ch0,ch00})]=++tot_char; 52 cont[tot_char]=1; 53 } 54 else cont[nod[((source_data){ch,ch0,ch00})]]++; 55 //putchar(ch);putchar(ch0);putchar(ch00); 56 //putchar(' '); 57 } 58 } 59 fclose(stdin); 60 cin.clear(); 61 //printf("\nread finished\n"); 62 return; 63 } 64 struct NODE{ 65 NODE *lson,*rson;//lson:0 rson:1 66 NODE *fa;//父节点 67 int node=0,tim;//当前点的编号 & 出现次数之和 68 source_data raw; 69 int huff;//huffman编码 70 }; 71 struct cmp{ 72 bool operator() (const NODE *a,const NODE *b){ 73 return (a->tim) > (b->tim); 74 } 75 }; 76 priority_queue<NODE *,vector<NODE*>,cmp>q; 77 NODE *s;//树根root 78 void huffman_build() 79 { 80 map<source_data,int>::reverse_iterator iter; 81 int i; 82 for(iter = nod.rbegin(),i=1;iter != nod.rend();iter++,++i){ 83 source_data c = iter->first; 84 NODE *p = (NODE*)malloc(sizeof(NODE)); 85 p->node=i; 86 p->tim=cont[nod[c]]; 87 p->lson=p->rson=nullptr; 88 p->raw = c; 89 q.push(p); 90 } 91 for(int i=1;i<=tot_char;++i){ 92 93 } 94 tot_node=tot_char; 95 while(q.size()>1){ 96 NODE *p = (NODE*)malloc(sizeof(NODE)); 97 NODE *c1=q.top();q.pop(); 98 NODE *c2=q.top();q.pop(); 99 p->lson=c1;p->rson=c2;//注意 100 c1->fa = c2->fa = p; 101 p->tim=(c1->tim) + (c2->tim); 102 p->node=++tot_node; 103 p->raw.ch1=0; 104 q.push(p); 105 } 106 s=q.top();q.pop(); 107 //printf("huffman_build finished\n"); 108 return; 109 } 110 int huffman_code[ALL_CHARACTER];//存编号为i的字符的huffman编码 111 void huffman_dfs(NODE *c,int now)//now:储存当前huffman编码(最高位之前,比实际huffman编码多加了一位1) 112 { 113 if(c->lson)huffman_dfs(c->lson,(now<<1)); 114 // 115 if((c->raw).ch1)//到叶子节点了 116 { 117 huffman_code[nod[c->raw]]=c->huff=now; 118 //cout<<" "<<now<<endl; 119 return; 120 } 121 // 122 if(c->rson)huffman_dfs(c->rson,((now<<1)|1)); 123 return; 124 } 125 stack<NODE *>st; 126 void huffman_with_stack()//非递归遍历huffman树 127 { 128 st.push(s); 129 s->huff=1; 130 while(!st.empty()){ 131 NODE *c=st.top();st.pop(); 132 if(c->lson){ 133 (c->lson)->huff = (c->huff)<<1; 134 st.push(c->lson); 135 } 136 // 137 if((c->raw).ch1)huffman_code[c->node]=c->huff; 138 if(c->rson){ 139 (c->rson)->huff = ((c->huff)<<1)|1; 140 st.push(c->rson); 141 } 142 } 143 return; 144 } 145 void print_char(source_data c){ 146 if(c.ch1>0)putchar(c.ch1); 147 else{putchar(c.ch1);putchar(c.ch2);putchar(c.ch3);} 148 return; 149 } 150 void print_huffman(int x){ 151 //printf("\n print %d now :",x); 152 bool ans[103]; 153 int tot=0; 154 for(;(1<<tot)<=x;++tot){ 155 if((x>>tot)&1)ans[tot]=1; 156 else ans[tot]=0; 157 } 158 for(int i=tot-2;i>=0;--i) 159 printf("%d",ans[i]); 160 return; 161 } 162 void print_tim(){ 163 printf("\n------------------字符出现次数-------------------\n\n"); 164 map<source_data,int>::reverse_iterator iter; 165 int tot=0; 166 for(iter = nod.rbegin();iter != nod.rend();iter++){ 167 source_data c = iter->first; 168 printf("--%d--//",tot++); 169 print_char(c); 170 printf("//,次数:%d",cont[nod[c]]); 171 putchar('\n'); 172 } 173 return; 174 } 175 void print_tree_dfs(NODE *c)//采用递归遍历该huffman树 176 { 177 if(c->lson)print_tree_dfs(c->lson); 178 if(c->rson)print_tree_dfs(c->rson); 179 printf("序号:%d--W%d--",c->node,c->tim); 180 if(c!=s)printf("//P%d",c->fa->node);else printf("//P0"); 181 if(c->lson)printf("//L%d",c->lson->node);else printf("//L0"); 182 if(c->lson)printf("//R%d//",c->rson->node);else printf("//R0//"); 183 if((c->raw).ch1){ 184 print_char(c->raw); 185 printf("==>"); 186 print_huffman(c->huff); 187 } 188 putchar('\n'); 189 return; 190 } 191 vector<char>txt;//="In the animation industry, cartoon videos are produced from hand drawings of expert animators using a complex and precise procedure. To draw each frame of an animation video manually would consume tremendous time, thus leading to a prohibitively high cost. In practice, the animation producers usually replicate one drawing two or three times to reduce the cost, which results in the actual low frame rate of animation videos. Therefore, it is highly desirable to develop computational algorithms to interpolate the intermediate animation frames automatically.In recent years, video interpolation has made great progress on natural videos. However, in animations, existing video interpolation methods are not able to produce satisfying in-between frames. An example from the film Children Who Chase Lost Voices is illustrated in Figure 1, where the current state-of-the-art methods fail to generate a piece of complete luggage due to the incorrect motion estimation, which is shown in the lower-left corner of the image. The challenges here stem from the two unique characteristics of animation videos: 1) First, cartoon images consist of explicit sketches and lines, which split the image into segments of smooth color pieces. Pixels in one segment are similar, which yields insufficient textures to match the corresponding pixels between two frames and hence increases the difficulty to predict accurate motions. 2) Second, cartoon animations use exaggerated expressions in pursuit of artistic effects, which result in non-linear and extremely large motions between adjacent frames. Two typical cases are depicted in Figure 2 (a) and (b) which illustrate these challenges respectively. Due to these difficulties mentioned above, video interpolation in animations remains a challenging task.";//待编码文件 192 vector<bool>vec;//存编码后的字符串 193 void read_target_txt() 194 { 195 freopen("b.in","r",stdin); 196 char ch; 197 while((ch=getchar())!=EOF) 198 { 199 txt.push_back(ch); 200 } 201 fclose(stdin); 202 cin.clear(); 203 return; 204 } 205 void huffman_encode(){ 206 for(int i=0;i<txt.size();++i){ 207 int x = huffman_code[nod[(source_data){txt[i],' ',' '}]]; 208 bool ans[103]; 209 int tot=0; 210 for(;(1<<tot)<=x;++tot){ 211 if((x>>tot)&1)ans[tot]=1; 212 else ans[tot]=0; 213 } 214 for(int i=tot-2;i>=0;--i) 215 vec.push_back(ans[i]); 216 } 217 // 218 printf("\n------------------对b.in编码-------------------\n"); 219 printf("\nvec.size : %d \n",vec.size()); 220 printf("txt encoded : "); 221 for(int i=0;i<vec.size();++i) 222 cout<<vec[i]; 223 printf("\n\n"); 224 return; 225 } 226 void huffman_decode(){ 227 printf("vec decoded : "); 228 NODE *p = s; 229 for(int i=0;i<vec.size();++i){ 230 if(!vec[i]){ 231 p = p->lson; 232 } 233 else{ 234 p = p->rson; 235 } 236 // 237 if((p->raw).ch1){ 238 print_char(p->raw); 239 p = s; 240 } 241 } 242 return; 243 } 244 void print_compress_rate(){ 245 double rate=(double)(vec.size())/(8*txt.size()); 246 printf("\nCompress rate for the txt : \n%.6f",rate); 247 return; 248 } 249 int main() 250 { 251 #ifdef _WIN32 252 SetConsoleOutputCP(65001);//用来修控制台的中文乱码 253 #endif 254 // 255 read_source_txt(); 256 huffman_build(); 257 // 258 bool need_dfs = 1; 259 if(need_dfs){ 260 huffman_dfs(s,1);//递归遍历 261 //huffman_with_stack();//非递归遍历 262 } 263 // 264 print_tim(); 265 putchar('\n'); 266 printf("\n------------------递归输出字符huffman编码-------------------\n\n"); 267 print_tree_dfs(s); 268 // 269 bool need_encode = 1; 270 if(need_encode){ 271 read_target_txt(); 272 huffman_encode(); 273 huffman_decode(); 274 print_compress_rate(); 275 } 276 return 0; 277 }
标签:NODE,int,hust,tot,source,printf,树及,huffman 来源: https://www.cnblogs.com/qwerta/p/15540165.html