Files
c_progarm/huffman.c

425 lines
8.7 KiB
C
Raw Normal View History

2023-11-22 18:02:53 +08:00
#include "stdlib.h"
#include "stdio.h"
#include "bytearray.h"
#include "string.h"
// huffman编码的实现
#define DBG_WARN printf
#define DBG_LOG printf
typedef struct _huff_tree{
uint8_t data;
uint8_t pos;// 位置左为1右为0
uint16_t count;
struct _huff_tree *parant;
struct _huff_tree *left;
struct _huff_tree *right;
}huff_tree;
typedef struct{
huff_tree *tree;
int index_table_index;
huff_tree *index_table[256];
uint16_t count_table[256];
array_def *out;
array_def *in;
int in_bit_count;
int arr_bit_index;
}huffman_def;
static int hm_calc_value_of_tree(huff_tree *t);
// 按出现频次排序
static void hm_sort_index_table(huff_tree **table,int num)
{
for(int i=0;i<num;i++)
{
huff_tree *item=table[i];
for (int j=i;j<num;j++)
{
if(hm_calc_value_of_tree(table[j])>hm_calc_value_of_tree(item))
{
table[i]=table[j];
table[j]=item;
item=table[i];
}
}
}
}
// 打印index_table
static void hm_index_table_print(huffman_def *h){
DBG_LOG("-----index_table-----\n");
for(int i=0;i<h->index_table_index;i++){
DBG_LOG("index:%d,data:%02x,count:%d\n",i,h->index_table[i]->data,h->index_table[i]->count);
}
}
// 打印数据的编码
static void hm_data_code_print(huffman_def *h){
huff_tree *t;
DBG_LOG("------data code------\n");
for(int i=0;i<h->index_table_index;i++){
t=h->index_table[i];
DBG_LOG("%c:",t->data);
while(t->parant){
DBG_LOG("%d",t->pos);
t=t->parant;
}
DBG_LOG("\n");
}
}
static void hm_calc_count(huffman_def *h,array_def *d)
{
int num=arr_length(d);
int index;
memset(h->count_table,0,256);
// DBG_LOG("calc count_table\n");
for(int i=0;i<num;i++)
{
h->count_table[arr_get(d,i)]++;
}
// DBG_LOG("calc index_table\n");
for(int i=0;i<256;i++)
{
if(h->count_table[i]>0){
index=h->index_table_index;
h->index_table[index]=calloc(1,sizeof(huff_tree));
h->index_table[index]->count=h->count_table[i];
h->index_table[index]->data=i;
h->index_table_index++;
}
}
// DBG_LOG("sort index_table\n");
hm_sort_index_table(h->index_table,h->index_table_index);
// hm_index_table_print(h);
}
// 计算树的值
static int hm_calc_value_of_tree(huff_tree *t)
{
int sum=0;
if(t->left&&t->right)
sum=hm_calc_value_of_tree(t->left)+hm_calc_value_of_tree(t->right);
else
sum=t->count;
// DBG_LOG("tree sum:%d\n",sum);
return sum;
}
// 打印huffman树
static void hm_tree_print(huff_tree *t)
{
if(t->left&&t->right){
DBG_LOG("point:,count:%d\n",hm_calc_value_of_tree(t));
hm_tree_print(t->left);
hm_tree_print(t->right);
}else{
DBG_LOG("data:%d,count:%d\n",t->data,t->count);
}
}
// 建立huffman树
static void hm_creat_tree(huffman_def *h)
{
int tail=h->index_table_index;
huff_tree *sub1,*sub2;
huff_tree **table=calloc(tail,sizeof(huff_tree *));
for(int i=0;i<tail;i++){
table[i]=h->index_table[i];
}
while(tail>1){
huff_tree *temp;
sub1=table[tail-1];
sub2=table[tail-2];
// 大在左,小在右
temp=calloc(1,sizeof(huff_tree));
sub1->parant=temp;
sub2->parant=temp;
// 左为1右为0
if(hm_calc_value_of_tree(sub1)>hm_calc_value_of_tree(sub2)){
temp->left=sub1;
sub1->pos=1;
temp->right=sub2;
sub2->pos=0;
}else{
temp->left=sub2;
sub2->pos=1;
temp->right=sub1;
sub1->pos=0;
}
table[tail-2]=temp;
tail--;
hm_sort_index_table(table,tail);
// DBG_LOG("-----table-----\n");
// for(int i=0;i<tail;i++){
// DBG_LOG("index:%d,count:%d\n",i,hm_calc_value_of_tree(table[i]));
// }
}
h->tree=table[0];
free(table);
}
// 删除树
static void hm_del_tree(huff_tree *t)
{
if(t->left&&t->right){
hm_del_tree(t->left);
hm_del_tree(t->right);
}
free(t);
}
// 数据中添加一个bit
static void hm_add_bit(array_def *d,int bit,int *index)
{
int len=arr_length(d);
if(*index<len*8){
uint8_t c=arr_get(d,-1);
c|=bit<<(*index%8);
arr_reset(d,c,-1);
}else{
arr_append(d,bit);
}
(*index)++;
}
// 根据数据添加bit
static int hm_encode_byte(huffman_def *h,uint8_t d)
{
huff_tree *t=0;
// 这里默认一定能找到对应的值
for(int i=0;i<h->index_table_index;i++)
{
t=h->index_table[i];
if(t->data==d)
break;
}
if(t->data!=d){
DBG_WARN("can not encode.\n");
exit(-1);
}
while(t->parant){
hm_add_bit(h->out,t->pos,&h->arr_bit_index);
t=t->parant;
}
// char *str=arr_string(h->out);
// DBG_LOG("index:%d,out data:%s\n",h->arr_bit_index,str);
// free(str);
return 0;
}
// 生成索引
static array_def *hm_creat_index_table(huffman_def *h)
{
array_def *a=arr_creat();
int temp;
int diff;
arr_append(a,h->index_table_index);
hm_index_table_print(h);
for(int i=0;i<h->index_table_index;i++)
{
arr_append(a,h->index_table[i]->data);
temp=h->index_table[i]->count;
while(temp>0){
if(temp>=255) diff=255;
else diff=temp;
arr_append(a,diff);
temp-=diff;
}
}
// 填充0个数
temp=arr_length(h->out)*8-h->arr_bit_index;
arr_append(a,temp);
// char *str=arr_string(a);
// DBG_LOG("arr index table:%s\n",str);
// free(str);
return arr_temp(a);
}
// huffman编码
/*
data[0]:
data[1~n]:(1byte)(1byte,255)(2byte,255,)
data[n+1]:0
data[n+2~m]:
*/
array_def *hm_encode(array_def *data)
{
int input_len=arr_length(data);
huffman_def *h=calloc(1,sizeof(huffman_def));
array_def *ret=0;
h->out=arr_creat();
// DBG_LOG("calc count\n");
hm_calc_count(h,data);
// DBG_LOG("creat tree\n");
hm_creat_tree(h);
// DBG_LOG("encode byte\n");
for(int i=0;i<input_len;i++)
{
hm_encode_byte(h,arr_get(data,i));
}
ret=hm_creat_index_table(h);
hm_del_tree(h->tree);
arr_appends_from(ret,h->out);
arr_delete(h->out);
free(h);
DBG_LOG("lenth_in:%d,length_encode:%d\n",input_len,arr_length(ret));
return arr_temp(ret);
}
// 读取编码表,返回数据开始的位置
static int hm_unpack_count(huffman_def *h,array_def *d)
{
int num=arr_get(d,0);
int index=1;
uint8_t temp;
for(int i=0;i<num;i++)
{
h->index_table[i]=calloc(1,sizeof(huff_tree));
h->index_table[i]->data=arr_get(d,index);index++;
do{
temp=arr_get(d,index);index++;
h->index_table[i]->count+=temp;
}while(temp==0xff);
h->index_table_index++;
}
temp=arr_get(d,index);index++;
h->in_bit_count=(arr_length(d)-index)*8-temp;
h->in=arr_mid(d,index,arr_length(d)-index);
// hm_index_table_print(h);
printf("bitcount:%d,\n",h->in_bit_count);
return index;
}
// 获取指定index的bit值
static inline int hm_get_bit(array_def *d,int index)
{
uint8_t t=arr_get(d,index/8);
return t&(1<<(index%8))?1:0;
}
// 对比树节点匹配返回bit数不匹配返回0
static inline int hm_cmp_bits(huffman_def *h,huff_tree *t)
{
int count=0;
// DBG_LOG("tree pos:",t->pos);
while(t){
// DBG_LOG("%d",t->pos);
if(hm_get_bit(h->in,h->arr_bit_index+count)!=t->pos){
// DBG_LOG(" |failed\n");
return 0;
}
else{
count++;
t=t->parant;
}
}
h->arr_bit_index+=count;
// DBG_LOG(" |ok,\n");
return count;
}
static uint8_t hm_decode_byte(huffman_def *h)
{
huff_tree *t=h->tree;
int bit;
// DBG_LOG("decode:");
while(t->left&&t->right){
bit=hm_get_bit(h->in,h->arr_bit_index-1);
// DBG_LOG("%d",bit);
if(bit==t->left->pos)
t=t->left;
else
t=t->right;
h->arr_bit_index--;
}
// DBG_LOG(" | decode byte:%c\n",t->data);
return t->data;
}
static int hm_calc_decode_len(huffman_def *h)
{
int sum=0;
for(int i=0;i<h->index_table_index;i++){
sum+=h->index_table[i]->count;
}
DBG_LOG("data len for decode:%d\n",sum);
return sum;
}
// huffman解码
/*
*/
array_def *hm_decode(array_def *data)
{
huffman_def *h=calloc(1,sizeof(huffman_def));
int decode_len,decode_index;
array_def *ret=arr_creat();
uint8_t *decode_data=0;
uint8_t c;
hm_unpack_count(h,data);
hm_creat_tree(h);
// hm_data_code_print(h);
// hm_tree_print(h->tree);
decode_len=hm_calc_decode_len(h);
decode_index=decode_len;
decode_data=calloc(decode_len+1,sizeof(uint8_t));
h->arr_bit_index=h->in_bit_count;
while(h->arr_bit_index>0){
c=hm_decode_byte(h);
decode_data[decode_index-1]=c;
decode_index--;
}
DBG_LOG("del tree\n");
hm_del_tree(h->tree);
DBG_LOG("del h->in\n");
//arr_delete(h->in);
DBG_LOG("free h\n");
free(h);
// arr_appends(ret,decode_data,decode_len);
DBG_LOG("decode:%s\n",decode_data);
free(decode_data);
return arr_temp(ret);
}