Files
c_progarm/huffman/huffman_.c
2023-12-02 11:52:15 +08:00

482 lines
10 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "stdlib.h"
#include "stdio.h"
#include "string.h"
#include "huffman_.h"
#include <stdlib.h>
// huffman编码的实现
#define DBG_WARN printf
#define DBG_LOG printf
typedef struct _huff_tree{
uint8_t data;
uint8_t pos;// 位置左为1右为0
uint32_t count;
struct _huff_tree *parant;
struct _huff_tree *left;
struct _huff_tree *right;
}huff_tree;
typedef struct{
huff_tree *tree;
uint32_t index_table_index;
huff_tree *index_table[256];
uint32_t count_table[256];
uint8_t *out;
uint32_t out_len;
const uint8_t *in;
uint32_t in_len;
uint32_t in_bit_count;
uint32_t arr_bit_index;
/* 以下成员调试时使用 */
uint32_t tree_point_num;// 使用的树节点个数
}huffman_def;
static int hm_calc_value_of_tree(huff_tree *t);
static int hm_calc_deep_of_child(huff_tree* t);
// 生成一个树节点
static huff_tree *hm_creat_tree_point(huffman_def *h)
{
h->tree_point_num++;
return calloc(1,sizeof(huff_tree));
}
// 删除一个树节点
static void hm_del_tree_point(huffman_def *h,huff_tree *t)
{
if(h->tree_point_num>0){
h->tree_point_num--;
free(t);
}
}
// 按出现频次排序
static void hm_sort_index_table(huff_tree **table,int num)
{
for(int i=0;i<num;i++)
{
huff_tree *item=table[i];
for (int j=i;j<num;j++)
{
if(hm_calc_value_of_tree(table[j])>hm_calc_value_of_tree(item))
{
table[i]=table[j];
table[j]=item;
item=table[i];
}
}
}
}
// 打印index_table
static void hm_index_table_print(huffman_def *h){
DBG_LOG("-----index_table-----\n");
for(int i=0;i<h->index_table_index;i++){
DBG_LOG("index:%d,data:%02x,count:%d\n",i,h->index_table[i]->data,h->index_table[i]->count);
}
}
// 打印数据的编码
static void hm_data_code_print(huffman_def *h){
huff_tree *t;
DBG_LOG("------data code------\n");
for(int i=0;i<h->index_table_index;i++){
t=h->index_table[i];
DBG_LOG("%c:",t->data);
while(t->parant){
DBG_LOG("%d",t->pos);
t=t->parant;
}
DBG_LOG("\n");
}
}
static void hm_calc_count(huffman_def *h,const uint8_t *d,const int d_len)
{
int num = d_len;
int index;
memset(h->count_table,0,256);
// DBG_LOG("calc count_table\n");
for(int i=0;i<num;i++)
{
h->count_table[d[i]]++;
}
// DBG_LOG("calc index_table\n");
for(int i=0;i<256;i++)
{
if(h->count_table[i]>0){
index=h->index_table_index;
h->index_table[index]=hm_creat_tree_point(h);
h->index_table[index]->count=h->count_table[i];
h->index_table[index]->data=i;
h->index_table_index++;
}
}
// DBG_LOG("sort index_table\n");
hm_sort_index_table(h->index_table,h->index_table_index);
// hm_index_table_print(h);
}
// 计算编码后的长度
// 需要先计算index_table和生成huffman树
static int hm_calc_encode_len(huffman_def* h)
{
// index_table_len(1byte)+index_data(index_table_len bytes)
int sum =1+ h->index_table_index;
int bit_count = 0;
huff_tree* t;
for (int i = 0; i < h->index_table_index; i++) {
// 计数占用的字节数
t = h->index_table[i];
sum += t->count/255+1;
// 压缩后占用的bit数
bit_count += hm_calc_deep_of_child(t) * t->count;
}
// 补零数目字节
sum += 1;
sum += (bit_count + 7) / 8;
DBG_LOG("data len for encode:%d\n", sum);
return sum;
}
// 计算树的值
static int hm_calc_value_of_tree(huff_tree *t)
{
int sum=0;
if(t->left&&t->right)
sum=hm_calc_value_of_tree(t->left)+hm_calc_value_of_tree(t->right);
else
sum=t->count;
// DBG_LOG("tree sum:%d\n",sum);
return sum;
}
// 计算子节点的深度
static int hm_calc_deep_of_child(huff_tree* t)
{
int deep = 0;
while (t->parant) {
deep++;
t = t->parant;
}
return deep;
}
// 打印huffman树
static void hm_tree_print(huff_tree *t)
{
if(t->left&&t->right){
DBG_LOG("point:,count:%d\n",hm_calc_value_of_tree(t));
hm_tree_print(t->left);
hm_tree_print(t->right);
}else{
DBG_LOG("data:%d,count:%d\n",t->data,t->count);
}
}
// 建立huffman树
static void hm_creat_tree(huffman_def *h)
{
int tail=h->index_table_index;
huff_tree *sub1,*sub2;
huff_tree **table=calloc(tail,sizeof(huff_tree *));
for(int i=0;i<tail;i++){
table[i]=h->index_table[i];
}
while(tail>1){
huff_tree *temp;
sub1=table[tail-1];
sub2=table[tail-2];
// 大在左,小在右
temp=hm_creat_tree_point(h);
sub1->parant=temp;
sub2->parant=temp;
// 左为1右为0
if(hm_calc_value_of_tree(sub1)>hm_calc_value_of_tree(sub2)){
temp->left=sub1;
sub1->pos=1;
temp->right=sub2;
sub2->pos=0;
}else{
temp->left=sub2;
sub2->pos=1;
temp->right=sub1;
sub1->pos=0;
}
table[tail-2]=temp;
tail--;
hm_sort_index_table(table,tail);
// DBG_LOG("-----table-----\n");
// for(int i=0;i<tail;i++){
// DBG_LOG("index:%d,count:%d\n",i,hm_calc_value_of_tree(table[i]));
// }
}
h->tree=table[0];
free(table);
}
// 删除树
static void hm_del_tree(huffman_def *h,huff_tree *t)
{
if(t->left&&t->right){
hm_del_tree(h,t->left);
hm_del_tree(h,t->right);
}
hm_del_tree_point(h,t);
}
// 数据中添加一个bit
static void hm_add_bit(uint8_t *d,int *d_len,int bit,int *index)
{
if(*index<(*d_len )*8){
uint8_t c = d[*d_len - 1];
c|=bit<<(*index%8);
d[*d_len - 1] = c;
}else{
d[*d_len] = bit;
(*d_len)++;
}
(*index)++;
}
// 根据数据添加bit
static int hm_encode_byte(huffman_def *h,uint8_t d)
{
huff_tree *t=0;
// 这里默认一定能找到对应的值
for(int i=0;i<h->index_table_index;i++)
{
t=h->index_table[i];
if(t->data==d)
break;
}
if(t->data!=d){
DBG_WARN("can not encode.\n");
exit(-1);
}
while(t->parant){
hm_add_bit(h->out,&h->out_len,t->pos,&h->arr_bit_index);
t=t->parant;
}
return 0;
}
// 生成索引
static int hm_creat_index_table(huffman_def *h,uint8_t *data,int *data_len)
{
int temp;
int diff;
int temp_num;
data[*data_len] = h->index_table_index; (*data_len)++;
// hm_index_table_print(h);
for(int i=0;i<h->index_table_index;i++)
{
data[*data_len] = h->index_table[i]->data; (*data_len)++;
temp=h->index_table[i]->count;
temp_num = temp / 255 + 1;
for (int i = 0; i < temp_num; i++) {
if (i < temp_num - 1) {
data[*data_len] = 255; (*data_len)++;
}
else {
data[*data_len] = temp % 255; (*data_len)++;
}
}
}
// 填充0个数
temp=8-(h->arr_bit_index%8);
//DBG_LOG("fill with 0 by:%d\n", temp);
data[*data_len] = temp; (*data_len)++;
return 0;
}
// huffman编码
/*
压缩后数据格式
data[0]:索引表长度
data[1~n]:索引表,每个索引由值(1byte)和频次(1byte,小于255)(2byte,大于等于255,频次由两个字节相加)
data[n+1]:数据中填充0个数
data[n+2~m]:压缩后的数据
*/
int hm_encode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len)
{
int input_len = in_len;
int output_len=0;
int output_index = 0;
huffman_def *h=calloc(1,sizeof(huffman_def));
hm_calc_count(h,in, input_len);
hm_creat_tree(h);
DBG_LOG("huffman tree point num:%d\n",h->tree_point_num);
output_len = hm_calc_encode_len(h);
(*out) = calloc(output_len + 1, sizeof(uint8_t));
hm_creat_index_table(h, *out, &output_index);
DBG_LOG("output_len=%d\n", output_index);
h->out = &(*out)[output_index];
for(int i=0;i<input_len;i++)
{
hm_encode_byte(h,in[i]);
}
DBG_LOG("bitcount:%d\n", h->arr_bit_index);
(*out)[output_index-1] = h->out_len*8- h->arr_bit_index;
DBG_LOG("fill with 0 by:%d\n", (*out)[output_index - 1]);
(*out_len) = output_len;
hm_del_tree(h,h->tree);
DBG_LOG("after del tree point num:%d\n",h->tree_point_num);
DBG_LOG("lenth_in:%d,length_encode:%d\n",input_len, output_len);
free(h);
return 0;
}
// 读取编码表,返回数据开始的位置
static int hm_unpack_count(huffman_def *h,const uint8_t *d,int d_len)
{
int num = d[0];
int index=1;
uint8_t temp;
for(int i=0;i<num;i++)
{
h->index_table[i]=hm_creat_tree_point(h);
h->index_table[i]->data=d[index];index++;
do{
temp= d[index];index++;
h->index_table[i]->count+=temp;
}while(temp==0xff);
h->index_table_index++;
}
temp= d[index];index++;
h->in_bit_count=(d_len -index)*8-temp;
h->in=&d[index];
// hm_index_table_print(h);
printf("bitcount:%d,\n",h->in_bit_count);
return index;
}
// 获取指定index的bit值
static inline int hm_get_bit(const uint8_t *d,int index)
{
uint8_t t=d[index/8];
return t&(1<<(index%8))?1:0;
}
// 对比树节点匹配返回bit数不匹配返回0
static inline int hm_cmp_bits(huffman_def *h,huff_tree *t)
{
int count=0;
// DBG_LOG("tree pos:",t->pos);
while(t){
// DBG_LOG("%d",t->pos);
if(hm_get_bit(h->in,h->arr_bit_index+count)!=t->pos){
// DBG_LOG(" |failed\n");
return 0;
}
else{
count++;
t=t->parant;
}
}
h->arr_bit_index+=count;
// DBG_LOG(" |ok,\n");
return count;
}
static uint8_t hm_decode_byte(huffman_def *h)
{
huff_tree *t=h->tree;
int bit;
// DBG_LOG("decode:");
while(t->left&&t->right){
bit=hm_get_bit(h->in,h->arr_bit_index-1);
// DBG_LOG("%d",bit);
if(bit==t->left->pos)
t=t->left;
else
t=t->right;
h->arr_bit_index--;
}
// DBG_LOG(" | decode byte:%c\n",t->data);
return t->data;
}
static int hm_calc_decode_len(huffman_def *h)
{
int sum=0;
for(int i=0;i<h->index_table_index;i++){
sum+=h->index_table[i]->count;
}
DBG_LOG("data len for decode:%d\n",sum);
return sum;
}
// huffman解码
/*
*/
int hm_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len)
{
int decode_len,decode_index;
uint8_t *decode_data=0;
uint8_t c;
huffman_def *h=calloc(1,sizeof(huffman_def));
if (h == 0) {
return -1;
}
hm_unpack_count(h,in,in_len);
hm_creat_tree(h);
// hm_data_code_print(h);
// hm_tree_print(h->tree);
DBG_LOG("huffman tree point num:%d\n",h->tree_point_num);
decode_len=hm_calc_decode_len(h);
decode_index=decode_len;
decode_data=calloc(decode_len+1,sizeof(uint8_t));
h->arr_bit_index=h->in_bit_count;
while(decode_index >0){
c=hm_decode_byte(h);
decode_data[decode_index-1]=c;
decode_index--;
}
hm_del_tree(h,h->tree);
DBG_LOG("after del tree point num:%d\n",h->tree_point_num);
free(h);
(*out) = decode_data;
(*out_len) = decode_len;
return 0;
}