Files
c_progarm/other/huffman.ctt
2023-12-02 11:52:15 +08:00

425 lines
8.7 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "stdlib.h"
#include "stdio.h"
#include "bytearray.h"
#include "string.h"
// huffman编码的实现
#define DBG_WARN printf
#define DBG_LOG printf
typedef struct _huff_tree{
uint8_t data;
uint8_t pos;// 位置左为1右为0
uint16_t count;
struct _huff_tree *parant;
struct _huff_tree *left;
struct _huff_tree *right;
}huff_tree;
typedef struct{
huff_tree *tree;
int index_table_index;
huff_tree *index_table[256];
uint16_t count_table[256];
array_def *out;
array_def *in;
int in_bit_count;
int arr_bit_index;
}huffman_def;
static int hm_calc_value_of_tree(huff_tree *t);
// 按出现频次排序
static void hm_sort_index_table(huff_tree **table,int num)
{
for(int i=0;i<num;i++)
{
huff_tree *item=table[i];
for (int j=i;j<num;j++)
{
if(hm_calc_value_of_tree(table[j])>hm_calc_value_of_tree(item))
{
table[i]=table[j];
table[j]=item;
item=table[i];
}
}
}
}
// 打印index_table
static void hm_index_table_print(huffman_def *h){
DBG_LOG("-----index_table-----\n");
for(int i=0;i<h->index_table_index;i++){
DBG_LOG("index:%d,data:%02x,count:%d\n",i,h->index_table[i]->data,h->index_table[i]->count);
}
}
// 打印数据的编码
static void hm_data_code_print(huffman_def *h){
huff_tree *t;
DBG_LOG("------data code------\n");
for(int i=0;i<h->index_table_index;i++){
t=h->index_table[i];
DBG_LOG("%c:",t->data);
while(t->parant){
DBG_LOG("%d",t->pos);
t=t->parant;
}
DBG_LOG("\n");
}
}
static void hm_calc_count(huffman_def *h,array_def *d)
{
int num=arr_length(d);
int index;
memset(h->count_table,0,256);
// DBG_LOG("calc count_table\n");
for(int i=0;i<num;i++)
{
h->count_table[arr_get(d,i)]++;
}
// DBG_LOG("calc index_table\n");
for(int i=0;i<256;i++)
{
if(h->count_table[i]>0){
index=h->index_table_index;
h->index_table[index]=calloc(1,sizeof(huff_tree));
h->index_table[index]->count=h->count_table[i];
h->index_table[index]->data=i;
h->index_table_index++;
}
}
// DBG_LOG("sort index_table\n");
hm_sort_index_table(h->index_table,h->index_table_index);
// hm_index_table_print(h);
}
// 计算树的值
static int hm_calc_value_of_tree(huff_tree *t)
{
int sum=0;
if(t->left&&t->right)
sum=hm_calc_value_of_tree(t->left)+hm_calc_value_of_tree(t->right);
else
sum=t->count;
// DBG_LOG("tree sum:%d\n",sum);
return sum;
}
// 打印huffman树
static void hm_tree_print(huff_tree *t)
{
if(t->left&&t->right){
DBG_LOG("point:,count:%d\n",hm_calc_value_of_tree(t));
hm_tree_print(t->left);
hm_tree_print(t->right);
}else{
DBG_LOG("data:%d,count:%d\n",t->data,t->count);
}
}
// 建立huffman树
static void hm_creat_tree(huffman_def *h)
{
int tail=h->index_table_index;
huff_tree *sub1,*sub2;
huff_tree **table=calloc(tail,sizeof(huff_tree *));
for(int i=0;i<tail;i++){
table[i]=h->index_table[i];
}
while(tail>1){
huff_tree *temp;
sub1=table[tail-1];
sub2=table[tail-2];
// 大在左,小在右
temp=calloc(1,sizeof(huff_tree));
sub1->parant=temp;
sub2->parant=temp;
// 左为1右为0
if(hm_calc_value_of_tree(sub1)>hm_calc_value_of_tree(sub2)){
temp->left=sub1;
sub1->pos=1;
temp->right=sub2;
sub2->pos=0;
}else{
temp->left=sub2;
sub2->pos=1;
temp->right=sub1;
sub1->pos=0;
}
table[tail-2]=temp;
tail--;
hm_sort_index_table(table,tail);
// DBG_LOG("-----table-----\n");
// for(int i=0;i<tail;i++){
// DBG_LOG("index:%d,count:%d\n",i,hm_calc_value_of_tree(table[i]));
// }
}
h->tree=table[0];
free(table);
}
// 删除树
static void hm_del_tree(huff_tree *t)
{
if(t->left&&t->right){
hm_del_tree(t->left);
hm_del_tree(t->right);
}
free(t);
}
// 数据中添加一个bit
static void hm_add_bit(array_def *d,int bit,int *index)
{
int len=arr_length(d);
if(*index<len*8){
uint8_t c=arr_get(d,-1);
c|=bit<<(*index%8);
arr_reset(d,c,-1);
}else{
arr_append(d,bit);
}
(*index)++;
}
// 根据数据添加bit
static int hm_encode_byte(huffman_def *h,uint8_t d)
{
huff_tree *t=0;
// 这里默认一定能找到对应的值
for(int i=0;i<h->index_table_index;i++)
{
t=h->index_table[i];
if(t->data==d)
break;
}
if(t->data!=d){
DBG_WARN("can not encode.\n");
exit(-1);
}
while(t->parant){
hm_add_bit(h->out,t->pos,&h->arr_bit_index);
t=t->parant;
}
// char *str=arr_string(h->out);
// DBG_LOG("index:%d,out data:%s\n",h->arr_bit_index,str);
// free(str);
return 0;
}
// 生成索引
static array_def *hm_creat_index_table(huffman_def *h)
{
array_def *a=arr_creat();
int temp;
int diff;
arr_append(a,h->index_table_index);
hm_index_table_print(h);
for(int i=0;i<h->index_table_index;i++)
{
arr_append(a,h->index_table[i]->data);
temp=h->index_table[i]->count;
while(temp>0){
if(temp>=255) diff=255;
else diff=temp;
arr_append(a,diff);
temp-=diff;
}
}
// 填充0个数
temp=arr_length(h->out)*8-h->arr_bit_index;
arr_append(a,temp);
// char *str=arr_string(a);
// DBG_LOG("arr index table:%s\n",str);
// free(str);
return arr_temp(a);
}
// huffman编码
/*
压缩后数据格式
data[0]:索引表长度
data[1~n]:索引表,每个索引由值(1byte)和频次(1byte,小于255)(2byte,大于等于255,频次由两个字节相加)
data[n+1]:数据中填充0个数
data[n+2~m]:压缩后的数据
*/
array_def *hm_encode(array_def *data)
{
int input_len=arr_length(data);
huffman_def *h=calloc(1,sizeof(huffman_def));
array_def *ret=0;
h->out=arr_creat();
// DBG_LOG("calc count\n");
hm_calc_count(h,data);
// DBG_LOG("creat tree\n");
hm_creat_tree(h);
// DBG_LOG("encode byte\n");
for(int i=0;i<input_len;i++)
{
hm_encode_byte(h,arr_get(data,i));
}
ret=hm_creat_index_table(h);
hm_del_tree(h->tree);
arr_appends_from(ret,h->out);
arr_delete(h->out);
free(h);
DBG_LOG("lenth_in:%d,length_encode:%d\n",input_len,arr_length(ret));
return arr_temp(ret);
}
// 读取编码表,返回数据开始的位置
static int hm_unpack_count(huffman_def *h,array_def *d)
{
int num=arr_get(d,0);
int index=1;
uint8_t temp;
for(int i=0;i<num;i++)
{
h->index_table[i]=calloc(1,sizeof(huff_tree));
h->index_table[i]->data=arr_get(d,index);index++;
do{
temp=arr_get(d,index);index++;
h->index_table[i]->count+=temp;
}while(temp==0xff);
h->index_table_index++;
}
temp=arr_get(d,index);index++;
h->in_bit_count=(arr_length(d)-index)*8-temp;
h->in=arr_mid(d,index,arr_length(d)-index);
// hm_index_table_print(h);
printf("bitcount:%d,\n",h->in_bit_count);
return index;
}
// 获取指定index的bit值
static inline int hm_get_bit(array_def *d,int index)
{
uint8_t t=arr_get(d,index/8);
return t&(1<<(index%8))?1:0;
}
// 对比树节点匹配返回bit数不匹配返回0
static inline int hm_cmp_bits(huffman_def *h,huff_tree *t)
{
int count=0;
// DBG_LOG("tree pos:",t->pos);
while(t){
// DBG_LOG("%d",t->pos);
if(hm_get_bit(h->in,h->arr_bit_index+count)!=t->pos){
// DBG_LOG(" |failed\n");
return 0;
}
else{
count++;
t=t->parant;
}
}
h->arr_bit_index+=count;
// DBG_LOG(" |ok,\n");
return count;
}
static uint8_t hm_decode_byte(huffman_def *h)
{
huff_tree *t=h->tree;
int bit;
// DBG_LOG("decode:");
while(t->left&&t->right){
bit=hm_get_bit(h->in,h->arr_bit_index-1);
// DBG_LOG("%d",bit);
if(bit==t->left->pos)
t=t->left;
else
t=t->right;
h->arr_bit_index--;
}
// DBG_LOG(" | decode byte:%c\n",t->data);
return t->data;
}
static int hm_calc_decode_len(huffman_def *h)
{
int sum=0;
for(int i=0;i<h->index_table_index;i++){
sum+=h->index_table[i]->count;
}
DBG_LOG("data len for decode:%d\n",sum);
return sum;
}
// huffman解码
/*
*/
array_def *hm_decode(array_def *data)
{
huffman_def *h=calloc(1,sizeof(huffman_def));
int decode_len,decode_index;
array_def *ret=arr_creat();
uint8_t *decode_data=0;
uint8_t c;
hm_unpack_count(h,data);
hm_creat_tree(h);
// hm_data_code_print(h);
// hm_tree_print(h->tree);
decode_len=hm_calc_decode_len(h);
decode_index=decode_len;
decode_data=calloc(decode_len+1,sizeof(uint8_t));
h->arr_bit_index=h->in_bit_count;
while(h->arr_bit_index>0){
c=hm_decode_byte(h);
decode_data[decode_index-1]=c;
decode_index--;
}
DBG_LOG("del tree\n");
hm_del_tree(h->tree);
DBG_LOG("del h->in\n");
//arr_delete(h->in);
DBG_LOG("free h\n");
free(h);
// arr_appends(ret,decode_data,decode_len);
DBG_LOG("decode:%s\n",decode_data);
free(decode_data);
return arr_temp(ret);
}