#include "stdlib.h" #include "stdio.h" #include "string.h" #include "huffman_.h" // huffman编码的实现 #define DBG_WARN printf #define DBG_LOG printf typedef struct _huff_tree{ uint8_t data; uint8_t pos;// 位置,左为1,右为0 uint16_t count; struct _huff_tree *parant; struct _huff_tree *left; struct _huff_tree *right; }huff_tree; typedef struct{ huff_tree *tree; int index_table_index; huff_tree *index_table[256]; uint16_t count_table[256]; uint8_t *out; int out_len; uint8_t *in; int in_len; int in_bit_count; int arr_bit_index; }huffman_def; static int hm_calc_value_of_tree(huff_tree *t); static int hm_calc_deep_of_child(huff_tree* t); // 按出现频次排序 static void hm_sort_index_table(huff_tree **table,int num) { for(int i=0;ihm_calc_value_of_tree(item)) { table[i]=table[j]; table[j]=item; item=table[i]; } } } } // 打印index_table static void hm_index_table_print(huffman_def *h){ DBG_LOG("-----index_table-----\n"); for(int i=0;iindex_table_index;i++){ DBG_LOG("index:%d,data:%02x,count:%d\n",i,h->index_table[i]->data,h->index_table[i]->count); } } // 打印数据的编码 static void hm_data_code_print(huffman_def *h){ huff_tree *t; DBG_LOG("------data code------\n"); for(int i=0;iindex_table_index;i++){ t=h->index_table[i]; DBG_LOG("%c:",t->data); while(t->parant){ DBG_LOG("%d",t->pos); t=t->parant; } DBG_LOG("\n"); } } static void hm_calc_count(huffman_def *h,const uint8_t *d,const int d_len) { int num = d_len; int index; memset(h->count_table,0,256); // DBG_LOG("calc count_table\n"); for(int i=0;icount_table[d[i]]++; } // DBG_LOG("calc index_table\n"); for(int i=0;i<256;i++) { if(h->count_table[i]>0){ index=h->index_table_index; h->index_table[index]=calloc(1,sizeof(huff_tree)); h->index_table[index]->count=h->count_table[i]; h->index_table[index]->data=i; h->index_table_index++; } } // DBG_LOG("sort index_table\n"); hm_sort_index_table(h->index_table,h->index_table_index); // hm_index_table_print(h); } // 计算编码后的长度 // 需要先计算index_table和生成huffman树 static int hm_calc_encode_len(huffman_def* h) { // index_table_len(1byte)+index_data(index_table_len bytes) int sum =1+ h->index_table_index; int bit_count = 0; huff_tree* t; for (int i = 0; i < h->index_table_index; i++) { // 计数占用的字节数 t = h->index_table[i]; sum += t->count/255+1; // 压缩后占用的bit数 bit_count += hm_calc_deep_of_child(t) * t->count; } // 补零数目字节 sum += 1; sum += (bit_count + 7) / 8; DBG_LOG("data len for encode:%d\n", sum); return sum; } // 计算树的值 static int hm_calc_value_of_tree(huff_tree *t) { int sum=0; if(t->left&&t->right) sum=hm_calc_value_of_tree(t->left)+hm_calc_value_of_tree(t->right); else sum=t->count; // DBG_LOG("tree sum:%d\n",sum); return sum; } // 计算子节点的深度 static int hm_calc_deep_of_child(huff_tree* t) { int deep = 0; while (t->parant) { deep++; t = t->parant; } return deep; } // 打印huffman树 static void hm_tree_print(huff_tree *t) { if(t->left&&t->right){ DBG_LOG("point:,count:%d\n",hm_calc_value_of_tree(t)); hm_tree_print(t->left); hm_tree_print(t->right); }else{ DBG_LOG("data:%d,count:%d\n",t->data,t->count); } } // 建立huffman树 static void hm_creat_tree(huffman_def *h) { int tail=h->index_table_index; huff_tree *sub1,*sub2; huff_tree **table=calloc(tail,sizeof(huff_tree *)); for(int i=0;iindex_table[i]; } while(tail>1){ huff_tree *temp; sub1=table[tail-1]; sub2=table[tail-2]; // 大在左,小在右 temp=calloc(1,sizeof(huff_tree)); sub1->parant=temp; sub2->parant=temp; // 左为1,右为0 if(hm_calc_value_of_tree(sub1)>hm_calc_value_of_tree(sub2)){ temp->left=sub1; sub1->pos=1; temp->right=sub2; sub2->pos=0; }else{ temp->left=sub2; sub2->pos=1; temp->right=sub1; sub1->pos=0; } table[tail-2]=temp; tail--; hm_sort_index_table(table,tail); // DBG_LOG("-----table-----\n"); // for(int i=0;itree=table[0]; free(table); } // 删除树 static void hm_del_tree(huff_tree *t) { if(t->left&&t->right){ hm_del_tree(t->left); hm_del_tree(t->right); } free(t); } // 数据中添加一个bit static void hm_add_bit(uint8_t *d,int *d_len,int bit,int *index) { if(*index<(*d_len )*8){ uint8_t c = d[*d_len - 1]; c|=bit<<(*index%8); d[*d_len - 1] = c; }else{ d[*d_len] = bit; (*d_len)++; } (*index)++; } // 根据数据添加bit static int hm_encode_byte(huffman_def *h,uint8_t d) { huff_tree *t=0; // 这里默认一定能找到对应的值 for(int i=0;iindex_table_index;i++) { t=h->index_table[i]; if(t->data==d) break; } if(t->data!=d){ DBG_WARN("can not encode.\n"); exit(-1); } while(t->parant){ hm_add_bit(h->out,&h->out_len,t->pos,&h->arr_bit_index); t=t->parant; } // char *str=arr_string(h->out); // DBG_LOG("index:%d,out data:%s\n",h->arr_bit_index,str); // free(str); return 0; } // 生成索引 static int hm_creat_index_table(huffman_def *h,uint8_t *data,int *data_len) { int temp; int diff; int temp_num; data[*data_len] = h->index_table_index; (*data_len)++; hm_index_table_print(h); for(int i=0;iindex_table_index;i++) { data[*data_len] = h->index_table[i]->data; (*data_len)++; temp=h->index_table[i]->count; temp_num = temp / 255 + 1; for (int i = 0; i < temp_num; i++) { if (i < temp_num - 1) { data[*data_len] = 255; (*data_len)++; } else { data[*data_len] = temp % 255; (*data_len)++; } } } // 填充0个数 temp=8-(h->arr_bit_index%8); //DBG_LOG("fill with 0 by:%d\n", temp); data[*data_len] = temp; (*data_len)++; return 0; } // huffman编码 /* 压缩后数据格式 data[0]:索引表长度 data[1~n]:索引表,每个索引由值(1byte)和频次(1byte,小于255)(2byte,大于等于255,频次由两个字节相加) data[n+1]:数据中填充0个数 data[n+2~m]:压缩后的数据 */ int hm_encode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len) { int input_len = in_len; int output_len=0; int output_index = 0; huffman_def *h=calloc(1,sizeof(huffman_def)); hm_calc_count(h,in, input_len); hm_creat_tree(h); output_len = hm_calc_encode_len(h); (*out) = calloc(output_len + 1, sizeof(uint8_t)); hm_creat_index_table(h, *out, &output_index); DBG_LOG("output_len=%d\n", output_index); h->out = &(*out)[output_index]; for(int i=0;iarr_bit_index); (*out)[output_index-1] = h->out_len*8- h->arr_bit_index; DBG_LOG("fill with 0 by:%d\n", (*out)[output_index - 1]); (*out_len) = output_len; hm_del_tree(h->tree); DBG_LOG("lenth_in:%d,length_encode:%d\n",input_len, output_len); free(h); return 0; } // 读取编码表,返回数据开始的位置 static int hm_unpack_count(huffman_def *h,const uint8_t *d,int d_len) { int num = d[0]; int index=1; uint8_t temp; for(int i=0;iindex_table[i]=calloc(1,sizeof(huff_tree)); h->index_table[i]->data=d[index];index++; do{ temp= d[index];index++; h->index_table[i]->count+=temp; }while(temp==0xff); h->index_table_index++; } temp= d[index];index++; h->in_bit_count=(d_len -index)*8-temp; h->in=&d[index]; // hm_index_table_print(h); printf("bitcount:%d,\n",h->in_bit_count); return index; } // 获取指定index的bit值 static inline int hm_get_bit(const uint8_t *d,int index) { uint8_t t=d[index/8]; return t&(1<<(index%8))?1:0; } // 对比树节点,匹配返回bit数,不匹配返回0 static inline int hm_cmp_bits(huffman_def *h,huff_tree *t) { int count=0; // DBG_LOG("tree pos:",t->pos); while(t){ // DBG_LOG("%d",t->pos); if(hm_get_bit(h->in,h->arr_bit_index+count)!=t->pos){ // DBG_LOG(" |failed\n"); return 0; } else{ count++; t=t->parant; } } h->arr_bit_index+=count; // DBG_LOG(" |ok,\n"); return count; } static uint8_t hm_decode_byte(huffman_def *h) { huff_tree *t=h->tree; int bit; // DBG_LOG("decode:"); while(t->left&&t->right){ bit=hm_get_bit(h->in,h->arr_bit_index-1); // DBG_LOG("%d",bit); if(bit==t->left->pos) t=t->left; else t=t->right; h->arr_bit_index--; } // DBG_LOG(" | decode byte:%c\n",t->data); return t->data; } static int hm_calc_decode_len(huffman_def *h) { int sum=0; for(int i=0;iindex_table_index;i++){ sum+=h->index_table[i]->count; } DBG_LOG("data len for decode:%d\n",sum); return sum; } // huffman解码 /* */ int hm_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len) { int decode_len,decode_index; uint8_t *decode_data=0; uint8_t c; huffman_def *h=calloc(1,sizeof(huffman_def)); if (h == 0) { return -1; } hm_unpack_count(h,in,in_len); hm_creat_tree(h); // hm_data_code_print(h); // hm_tree_print(h->tree); decode_len=hm_calc_decode_len(h); decode_index=decode_len; decode_data=calloc(decode_len+1,sizeof(uint8_t)); h->arr_bit_index=h->in_bit_count; while(decode_index >0){ c=hm_decode_byte(h); decode_data[decode_index-1]=c; decode_index--; } hm_del_tree(h->tree); free(h); (*out) = decode_data; (*out_len) = decode_len; return 0; }