From c475f0e4f0a45a9563d33a600160fd9f9bef2a02 Mon Sep 17 00:00:00 2001 From: andy Date: Wed, 22 Nov 2023 23:05:01 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=B8=8D=E4=BE=9D=E8=B5=96by?= =?UTF-8?q?tearray=E7=9A=84=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 5 + .vscode/settings.json | 3 +- ReadMe.txt | 2 + huffman_.c | 461 ++++++++++++++++++++++++++++++++++++++++++ huffman_.h | 19 ++ 5 files changed, 489 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 huffman_.c create mode 100644 huffman_.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f592887 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ + +.vs/ +*.exe + + diff --git a/.vscode/settings.json b/.vscode/settings.json index 018b3b0..854061e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,6 @@ { "files.associations": { - "coder_lib.h": "c" + "coder_lib.h": "c", + "huffman_.h": "c" } } \ No newline at end of file diff --git a/ReadMe.txt b/ReadMe.txt index af09aaa..f23f638 100644 --- a/ReadMe.txt +++ b/ReadMe.txt @@ -8,3 +8,5 @@ gcc hello.c -o hello 2023.11.22 huffman数据压缩算法验证成功 +2023.11.22 + 移除huffman对 bytearray的依赖 diff --git a/huffman_.c b/huffman_.c new file mode 100644 index 0000000..72e486f --- /dev/null +++ b/huffman_.c @@ -0,0 +1,461 @@ + +#include "stdlib.h" +#include "stdio.h" +#include "string.h" +#include "huffman_.h" + +// huffman编码的实现 + +#define DBG_WARN printf +#define DBG_LOG printf + +typedef struct _huff_tree{ + uint8_t data; + uint8_t pos;// 位置,左为1,右为0 + uint16_t count; + struct _huff_tree *parant; + struct _huff_tree *left; + struct _huff_tree *right; +}huff_tree; + + + +typedef struct{ + huff_tree *tree; + int index_table_index; + huff_tree *index_table[256]; + uint16_t count_table[256]; + uint8_t *out; + int out_len; + uint8_t *in; + int in_len; + int in_bit_count; + int arr_bit_index; +}huffman_def; + + +static int hm_calc_value_of_tree(huff_tree *t); +static int hm_calc_deep_of_child(huff_tree* t); + +// 按出现频次排序 +static void hm_sort_index_table(huff_tree **table,int num) +{ + for(int i=0;ihm_calc_value_of_tree(item)) + { + table[i]=table[j]; + table[j]=item; + item=table[i]; + } + } + } +} + + + +// 打印index_table +static void hm_index_table_print(huffman_def *h){ + DBG_LOG("-----index_table-----\n"); + for(int i=0;iindex_table_index;i++){ + DBG_LOG("index:%d,data:%02x,count:%d\n",i,h->index_table[i]->data,h->index_table[i]->count); + } +} + + +// 打印数据的编码 +static void hm_data_code_print(huffman_def *h){ + huff_tree *t; + DBG_LOG("------data code------\n"); + for(int i=0;iindex_table_index;i++){ + t=h->index_table[i]; + DBG_LOG("%c:",t->data); + while(t->parant){ + DBG_LOG("%d",t->pos); + t=t->parant; + } + DBG_LOG("\n"); + } +} + + + +static void hm_calc_count(huffman_def *h,const uint8_t *d,const int d_len) +{ + int num = d_len; + int index; + memset(h->count_table,0,256); + // DBG_LOG("calc count_table\n"); + for(int i=0;icount_table[d[i]]++; + } + // DBG_LOG("calc index_table\n"); + for(int i=0;i<256;i++) + { + if(h->count_table[i]>0){ + index=h->index_table_index; + h->index_table[index]=calloc(1,sizeof(huff_tree)); + h->index_table[index]->count=h->count_table[i]; + h->index_table[index]->data=i; + h->index_table_index++; + } + } + // DBG_LOG("sort index_table\n"); + hm_sort_index_table(h->index_table,h->index_table_index); + // hm_index_table_print(h); +} + +// 计算编码后的长度 +// 需要先计算index_table和生成huffman树 +static int hm_calc_encode_len(huffman_def* h) +{ + // index_table_len(1byte)+index_data(index_table_len bytes) + int sum =1+ h->index_table_index; + int bit_count = 0; + huff_tree* t; + for (int i = 0; i < h->index_table_index; i++) { + // 计数占用的字节数 + t = h->index_table[i]; + sum += t->count/255+1; + // 压缩后占用的bit数 + bit_count += hm_calc_deep_of_child(t) * t->count; + } + // 补零数目字节 + sum += 1; + sum += (bit_count + 7) / 8; + DBG_LOG("data len for encode:%d\n", sum); + return sum; +} + + +// 计算树的值 +static int hm_calc_value_of_tree(huff_tree *t) +{ + int sum=0; + if(t->left&&t->right) + sum=hm_calc_value_of_tree(t->left)+hm_calc_value_of_tree(t->right); + else + sum=t->count; + // DBG_LOG("tree sum:%d\n",sum); + return sum; +} + + +// 计算子节点的深度 +static int hm_calc_deep_of_child(huff_tree* t) +{ + int deep = 0; + while (t->parant) { + deep++; + t = t->parant; + } + return deep; +} + + + +// 打印huffman树 +static void hm_tree_print(huff_tree *t) +{ + if(t->left&&t->right){ + DBG_LOG("point:,count:%d\n",hm_calc_value_of_tree(t)); + hm_tree_print(t->left); + hm_tree_print(t->right); + }else{ + DBG_LOG("data:%d,count:%d\n",t->data,t->count); + } + +} + + + + +// 建立huffman树 +static void hm_creat_tree(huffman_def *h) +{ + int tail=h->index_table_index; + huff_tree *sub1,*sub2; + huff_tree **table=calloc(tail,sizeof(huff_tree *)); + for(int i=0;iindex_table[i]; + } + while(tail>1){ + huff_tree *temp; + sub1=table[tail-1]; + sub2=table[tail-2]; + // 大在左,小在右 + temp=calloc(1,sizeof(huff_tree)); + sub1->parant=temp; + sub2->parant=temp; + // 左为1,右为0 + if(hm_calc_value_of_tree(sub1)>hm_calc_value_of_tree(sub2)){ + temp->left=sub1; + sub1->pos=1; + temp->right=sub2; + sub2->pos=0; + }else{ + temp->left=sub2; + sub2->pos=1; + temp->right=sub1; + sub1->pos=0; + } + table[tail-2]=temp; + tail--; + hm_sort_index_table(table,tail); + // DBG_LOG("-----table-----\n"); + // for(int i=0;itree=table[0]; + free(table); +} + + +// 删除树 +static void hm_del_tree(huff_tree *t) +{ + if(t->left&&t->right){ + hm_del_tree(t->left); + hm_del_tree(t->right); + } + free(t); +} + +// 数据中添加一个bit +static void hm_add_bit(uint8_t *d,int *d_len,int bit,int *index) +{ + if(*index<(*d_len )*8){ + uint8_t c = d[*d_len - 1]; + c|=bit<<(*index%8); + d[*d_len - 1] = c; + }else{ + d[*d_len] = bit; + (*d_len)++; + } + (*index)++; +} + + +// 根据数据添加bit +static int hm_encode_byte(huffman_def *h,uint8_t d) +{ + huff_tree *t=0; + // 这里默认一定能找到对应的值 + for(int i=0;iindex_table_index;i++) + { + t=h->index_table[i]; + if(t->data==d) + break; + } + if(t->data!=d){ + DBG_WARN("can not encode.\n"); + exit(-1); + } + + while(t->parant){ + hm_add_bit(h->out,&h->out_len,t->pos,&h->arr_bit_index); + t=t->parant; + } + // char *str=arr_string(h->out); + // DBG_LOG("index:%d,out data:%s\n",h->arr_bit_index,str); + // free(str); + return 0; +} + +// 生成索引 +static int hm_creat_index_table(huffman_def *h,uint8_t *data,int *data_len) +{ + int temp; + int diff; + int temp_num; + data[*data_len] = h->index_table_index; (*data_len)++; + hm_index_table_print(h); + for(int i=0;iindex_table_index;i++) + { + data[*data_len] = h->index_table[i]->data; (*data_len)++; + temp=h->index_table[i]->count; + temp_num = temp / 255 + 1; + for (int i = 0; i < temp_num; i++) { + if (i < temp_num - 1) { + data[*data_len] = 255; (*data_len)++; + } + else { + data[*data_len] = temp % 255; (*data_len)++; + } + } + } + // 填充0个数 + temp=8-(h->arr_bit_index%8); + //DBG_LOG("fill with 0 by:%d\n", temp); + data[*data_len] = temp; (*data_len)++; + return 0; +} + +// huffman编码 +/* +压缩后数据格式 +data[0]:索引表长度 +data[1~n]:索引表,每个索引由值(1byte)和频次(1byte,小于255)(2byte,大于等于255,频次由两个字节相加) +data[n+1]:数据中填充0个数 +data[n+2~m]:压缩后的数据 + +*/ +int hm_encode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len) +{ + int input_len = in_len; + int output_len=0; + int output_index = 0; + huffman_def *h=calloc(1,sizeof(huffman_def)); + hm_calc_count(h,in, input_len); + hm_creat_tree(h); + output_len = hm_calc_encode_len(h); + (*out) = calloc(output_len + 1, sizeof(uint8_t)); + hm_creat_index_table(h, *out, &output_index); + DBG_LOG("output_len=%d\n", output_index); + h->out = &(*out)[output_index]; + for(int i=0;iarr_bit_index); + (*out)[output_index-1] = h->out_len*8- h->arr_bit_index; + DBG_LOG("fill with 0 by:%d\n", (*out)[output_index - 1]); + (*out_len) = output_len; + hm_del_tree(h->tree); + + DBG_LOG("lenth_in:%d,length_encode:%d\n",input_len, output_len); + free(h); + return 0; +} + + +// 读取编码表,返回数据开始的位置 +static int hm_unpack_count(huffman_def *h,const uint8_t *d,int d_len) +{ + int num = d[0]; + int index=1; + uint8_t temp; + for(int i=0;iindex_table[i]=calloc(1,sizeof(huff_tree)); + h->index_table[i]->data=d[index];index++; + do{ + temp= d[index];index++; + h->index_table[i]->count+=temp; + }while(temp==0xff); + h->index_table_index++; + } + temp= d[index];index++; + h->in_bit_count=(d_len -index)*8-temp; + h->in=&d[index]; + // hm_index_table_print(h); + printf("bitcount:%d,\n",h->in_bit_count); + return index; +} + + +// 获取指定index的bit值 +static inline int hm_get_bit(const uint8_t *d,int index) +{ + uint8_t t=d[index/8]; + return t&(1<<(index%8))?1:0; +} + + +// 对比树节点,匹配返回bit数,不匹配返回0 +static inline int hm_cmp_bits(huffman_def *h,huff_tree *t) +{ + int count=0; + // DBG_LOG("tree pos:",t->pos); + while(t){ + // DBG_LOG("%d",t->pos); + if(hm_get_bit(h->in,h->arr_bit_index+count)!=t->pos){ + // DBG_LOG(" |failed\n"); + return 0; + } + else{ + count++; + t=t->parant; + } + } + h->arr_bit_index+=count; + // DBG_LOG(" |ok,\n"); + return count; +} + + +static uint8_t hm_decode_byte(huffman_def *h) +{ + huff_tree *t=h->tree; + int bit; + // DBG_LOG("decode:"); + while(t->left&&t->right){ + bit=hm_get_bit(h->in,h->arr_bit_index-1); + // DBG_LOG("%d",bit); + if(bit==t->left->pos) + t=t->left; + else + t=t->right; + h->arr_bit_index--; + } + // DBG_LOG(" | decode byte:%c\n",t->data); + return t->data; +} + + + +static int hm_calc_decode_len(huffman_def *h) +{ + int sum=0; + for(int i=0;iindex_table_index;i++){ + sum+=h->index_table[i]->count; + } + DBG_LOG("data len for decode:%d\n",sum); + return sum; +} + + + +// huffman解码 +/* +*/ +int hm_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len) +{ + int decode_len,decode_index; + uint8_t *decode_data=0; + uint8_t c; + huffman_def *h=calloc(1,sizeof(huffman_def)); + if (h == 0) { + return -1; + } + hm_unpack_count(h,in,in_len); + hm_creat_tree(h); + // hm_data_code_print(h); + // hm_tree_print(h->tree); + decode_len=hm_calc_decode_len(h); + decode_index=decode_len; + decode_data=calloc(decode_len+1,sizeof(uint8_t)); + h->arr_bit_index=h->in_bit_count; + while(decode_index >0){ + c=hm_decode_byte(h); + decode_data[decode_index-1]=c; + decode_index--; + } + hm_del_tree(h->tree); + free(h); + (*out) = decode_data; + (*out_len) = decode_len; + return 0; +} + + + + + + + diff --git a/huffman_.h b/huffman_.h new file mode 100644 index 0000000..7c60158 --- /dev/null +++ b/huffman_.h @@ -0,0 +1,19 @@ + +#ifndef huffman_h__ +#define huffman_h__ + +#include "stdint.h" + +// huffman编码的实现 + +int hm_encode(const uint8_t *in,const int in_len,uint8_t **out,int *out_len); + +int hm_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len); + + + + +#endif + + +