Files
checker_slave/source/soft/huffman.c
2023-11-22 20:27:42 +08:00

309 lines
5.8 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "mystdlib.h"
#include "stdlib.h"
#include "bytearray.h"
//#include "list.h"
//#include "mystring.h"
#include "string.h"
#include "debug.h"
// huffman编码的实现
typedef struct _huff_tree{
uint8_t data;
uint8_t pos;// 位置左为1右为0
uint16_t count;
struct _huff_tree *parant;
struct _huff_tree *left;
struct _huff_tree *right;
}huff_tree;
typedef struct{
huff_tree *tree;
int index_table_index;
huff_tree *index_table[256];
uint16_t count_table[256];
array_def *out;
array_def *in;
int in_bit_count;
int arr_bit_index;
}huffman_def;
// 按出现频次排序
static void hm_sort_index_table(huffman_def *h)
{
for(int i=0;i<h->index_table_index;i++)
{
huff_tree *item=h->index_table[i];
for (int j=i;j<h->index_table_index;j++)
{
if(h->index_table[j]->count>item->count)
{
h->index_table[i]=h->index_table[j];
h->index_table[j]=item;
item=h->index_table[i];
}
}
}
}
static void hm_calc_count(huffman_def *h,array_def *d)
{
int num=arr_length(d);
memset(h->count_table,0,256);
for(int i=0;i<num;i++)
{
h->count_table[arr_get(d,i)]++;
}
for(int i=0;i<256;i++)
{
if(h->count_table[i]>0){
h->index_table[i]=calloc(1,sizeof(huff_tree));
h->index_table[i]->count=h->count_table[i];
h->index_table[i]->data=i;
h->index_table_index++;
}
}
hm_sort_index_table(h);
}
// 计算树的值
static int hm_calc_value_of_tree(huff_tree *t)
{
int sum=0;
if(t->left&&t->right)
sum=hm_calc_value_of_tree(t->left)+hm_calc_value_of_tree(t->right);
else
sum=t->count;
return sum;
}
// 建立huffman树
static void hm_creat_tree(huffman_def *h)
{
int tail=h->index_table_index;
huff_tree *sub=0;
h->tree=h->index_table[tail-1];
tail--;
while(tail>0){
sub=h->index_table[tail-1];
tail--;
// 大在左,小在右
huff_tree *temp=h->tree;
h->tree=calloc(1,sizeof(huff_tree));
sub->parant=h->tree;
temp->parant=h->tree;
// 左为1右为0
if(hm_calc_value_of_tree(sub)>hm_calc_value_of_tree(h->tree)){
h->tree->left=sub;
sub->pos=1;
h->tree->right=temp;
temp->pos=0;
}else{
h->tree->left=temp;
temp->pos=1;
h->tree->right=sub;
sub->pos=0;
}
}
}
// 删除树
static void hm_del_tree(huff_tree *t)
{
if(t->left&&t->right){
hm_del_tree(t->left);
hm_del_tree(t->right);
}
free(t);
}
// 数据中添加一个bit
static void hm_add_bit(array_def *d,int bit,int *index)
{
int len=arr_length(d);
if(*index<len*8){
uint8_t c=arr_get(d,-1);
c|=bit<<(*index%8);
arr_reset(d,c,-1);
}else{
arr_append(d,bit);
}
(*index)++;
}
// 根据数据添加bit
static int hm_encode_byte(huffman_def *h,uint8_t d)
{
huff_tree *t=0;
// 这里默认一定能找到对应的值
for(int i=0;i<h->index_table_index;i++)
{
t=h->index_table[i];
if(t->data==d)
break;
}
while(t){
hm_add_bit(h->out,t->pos,&h->arr_bit_index);
t=t->parant;
}
return 0;
}
// 生成索引
static array_def *hm_creat_index_table(huffman_def *h)
{
array_def *a=arr_creat();
int temp;
int diff;
arr_append(a,h->index_table_index);
for(int i=0;i<h->index_table_index;i++)
{
arr_append(a,h->index_table[i]->data);
temp=h->index_table[i]->count;
while(temp>0){
if(temp>=255) diff=255;
else diff=temp;
arr_append(a,diff);
temp-=diff;
}
}
// 填充0个数
temp=arr_length(h->out)*8-h->arr_bit_index;
arr_append(a,temp);
return arr_temp(a);
}
// huffman编码
/*
压缩后数据格式
data[0]:索引表长度
data[1~n]:索引表,每个索引由值(1byte)和频次(1byte,小于255)(2byte,大于等于255,频次由两个字节相加)
data[n+1]:数据中填充0个数
data[n+2~m]:压缩后的数据
*/
array_def *hm_encode(array_def *data)
{
int input_len=arr_length(data);
huffman_def *h=calloc(1,sizeof(huffman_def));
array_def *ret=0;
h->out=arr_creat();
hm_calc_count(h,data);
hm_creat_tree(h);
for(int i=0;i<input_len;i++)
{
hm_encode_byte(h,arr_get(data,i));
}
hm_del_tree(h->tree);
ret=hm_creat_index_table(h);
arr_appends_from(ret,h->out);
arr_delete(h->out);
free(h);
return arr_temp(ret);
}
// 读取编码表,返回数据开始的位置
static int hm_unpack_count(huffman_def *h,array_def *d)
{
int num=arr_get(d,0);
int index=1;
uint8_t temp;
for(int i=0;i<num;i++)
{
h->index_table[i]=calloc(1,sizeof(huff_tree));
h->index_table[i]->data=arr_get(d,index);index++;
do{
temp=arr_get(d,index);index++;
h->index_table[i]->count+=temp;
}while(temp==0xff);
h->index_table_index++;
}
temp=arr_get(d,index);index++;
h->in_bit_count=(arr_length(d)-index)*8-temp;
h->in=arr_mid(d,index,arr_length(d)-index);
return index;
}
// 获取指定index的bit值
static inline int hm_get_bit(array_def *d,int index)
{
uint8_t t=arr_get(d,index/8);
return t&(1<<(index%8))?1:0;
}
// 对比树节点匹配返回bit数不匹配返回0
static inline int hm_cmp_bits(huffman_def *h,huff_tree *t)
{
int count=0;
while(t){
if(hm_get_bit(h->in,h->arr_bit_index+count)!=t->pos)
return 0;
else{
count++;
t=t->parant;
}
}
h->arr_bit_index+=count;
return count;
}
static uint8_t hm_decode_byte(huffman_def *h)
{
huff_tree *t=0;
for(int i=0;i<h->index_table_index;i++){
t=h->index_table[i];
if(hm_cmp_bits(h,t)){
return t->data;
}
}
DBG_WARN("can not decode byte");
return 0;
}
// huffman解码
/*
*/
array_def *hm_decode(array_def *data)
{
huffman_def *h=calloc(1,sizeof(huffman_def));
array_def *ret=arr_creat();
uint8_t c;
hm_unpack_count(h,data);
hm_creat_tree(h);
while(h->arr_bit_index<h->in_bit_count){
c=hm_decode_byte(h);
arr_append(ret,c);
}
hm_del_tree(h->tree);
arr_delete(h->in);
free(h);
return arr_temp(ret);
}