添加lzw压缩算法
This commit is contained in:
15
huffman/Makefile
Normal file
15
huffman/Makefile
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
|
||||
CC = gcc
|
||||
|
||||
|
||||
SRCS = $(wildcard *.c)
|
||||
|
||||
|
||||
STR = $(subst from,to,from your heart)
|
||||
|
||||
all:
|
||||
$(CC) $(SRCS) -o hello
|
||||
|
||||
clean:
|
||||
rm -rf *.exe
|
104
huffman/hello.c
Normal file
104
huffman/hello.c
Normal file
@@ -0,0 +1,104 @@
|
||||
|
||||
#include "stdio.h"
|
||||
#include "string.h"
|
||||
// #include "coder_lib.h"
|
||||
// #include "huffman.h"
|
||||
#include "huffman_.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
// 验证管壳码算法
|
||||
// int main(int num,char *str[]){
|
||||
|
||||
|
||||
// uint8_t d_[12]={0};
|
||||
// char uid[20]={0};
|
||||
|
||||
|
||||
// coder_shell_to_uid("2023","5830628A00004",uid);
|
||||
// printf("uid=%s\r\n",uid);
|
||||
// coder_uid_to_save(uid,d_);
|
||||
|
||||
// printf("slave:, uid_pw=%02X %02X %02X %02X %02X %02X %02X %02X "
|
||||
// "%02X %02X %02X %02X ",d_[0],d_[1],d_[2],d_[3],d_[4],d_[5],d_[6],d_[7],
|
||||
// d_[8],d_[9],d_[10],d_[11]);
|
||||
|
||||
|
||||
// return 0;
|
||||
// }
|
||||
|
||||
|
||||
|
||||
// 验证huffman算法
|
||||
|
||||
// int main(int argc, char *argv[])
|
||||
// {
|
||||
// uint8_t str_in[]="2023 5830628A000005830628A000015830628A000025830628A000035830628A000045830628A000055830628A000065830628A000075830628A000085830628A00009";
|
||||
// array_def *a=arr_creat();
|
||||
// arr_appends(a,str_in,strlen(str_in));
|
||||
// array_def *out=hm_encode(a);
|
||||
// printf("endode:%s\n",arr_string(out));
|
||||
// array_def *de=hm_decode(out);
|
||||
// printf("decde:%s\n",arr_data(de));
|
||||
// return 0;
|
||||
// }
|
||||
|
||||
|
||||
|
||||
|
||||
long calc_file_size(FILE *stream)
|
||||
{
|
||||
long file_size = -1;
|
||||
long cur_offset = ftell(stream); // 获取当前偏移位置
|
||||
if (cur_offset == -1) {
|
||||
printf("ftell failed :%s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
if (fseek(stream, 0, SEEK_END) != 0) { // 移动文件指针到文件末尾
|
||||
printf("fseek failed: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
file_size = ftell(stream); // 获取此时偏移值,即文件大小
|
||||
if (file_size == -1) {
|
||||
printf("ftell failed :%s\n", strerror(errno));
|
||||
}
|
||||
if (fseek(stream, cur_offset, SEEK_SET) != 0) { // 将文件指针恢复初始位置
|
||||
printf("fseek failed: %s\n", strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
return file_size;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int encode_file(const char *filename)
|
||||
{
|
||||
FILE *f=fopen(filename, "rb");
|
||||
int file_size=calc_file_size(f);
|
||||
uint8_t *file_data=calloc(file_size+1,sizeof(uint8_t));
|
||||
uint8_t *encode_data=0;
|
||||
int encode_size;
|
||||
fread(file_data,1,file_size,f);
|
||||
hm_encode(file_data,file_size,&encode_data,&encode_size);
|
||||
free(file_data);
|
||||
free(encode_data);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
encode_file(argv[1]);
|
||||
// const uint8_t file_data[]="2023 5830628A000005830628A000015830628A000025830628A000035830628A000045830628A000055830628A000065830628A000075830628A000085830628A00009";
|
||||
// uint8_t *encode_data=0;
|
||||
// int encode_size;
|
||||
// hm_encode(file_data,sizeof(file_data),&encode_data,&encode_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
481
huffman/huffman_.c
Normal file
481
huffman/huffman_.c
Normal file
@@ -0,0 +1,481 @@
|
||||
|
||||
#include "stdlib.h"
|
||||
#include "stdio.h"
|
||||
#include "string.h"
|
||||
#include "huffman_.h"
|
||||
#include <stdlib.h>
|
||||
// huffman编码的实现
|
||||
|
||||
#define DBG_WARN printf
|
||||
#define DBG_LOG printf
|
||||
|
||||
typedef struct _huff_tree{
|
||||
uint8_t data;
|
||||
uint8_t pos;// 位置,左为1,右为0
|
||||
uint32_t count;
|
||||
struct _huff_tree *parant;
|
||||
struct _huff_tree *left;
|
||||
struct _huff_tree *right;
|
||||
}huff_tree;
|
||||
|
||||
|
||||
|
||||
typedef struct{
|
||||
huff_tree *tree;
|
||||
uint32_t index_table_index;
|
||||
huff_tree *index_table[256];
|
||||
uint32_t count_table[256];
|
||||
uint8_t *out;
|
||||
uint32_t out_len;
|
||||
const uint8_t *in;
|
||||
uint32_t in_len;
|
||||
uint32_t in_bit_count;
|
||||
uint32_t arr_bit_index;
|
||||
/* 以下成员调试时使用 */
|
||||
uint32_t tree_point_num;// 使用的树节点个数
|
||||
}huffman_def;
|
||||
|
||||
|
||||
static int hm_calc_value_of_tree(huff_tree *t);
|
||||
static int hm_calc_deep_of_child(huff_tree* t);
|
||||
|
||||
|
||||
|
||||
// 生成一个树节点
|
||||
static huff_tree *hm_creat_tree_point(huffman_def *h)
|
||||
{
|
||||
h->tree_point_num++;
|
||||
return calloc(1,sizeof(huff_tree));
|
||||
}
|
||||
|
||||
// 删除一个树节点
|
||||
static void hm_del_tree_point(huffman_def *h,huff_tree *t)
|
||||
{
|
||||
if(h->tree_point_num>0){
|
||||
h->tree_point_num--;
|
||||
free(t);
|
||||
}
|
||||
}
|
||||
|
||||
// 按出现频次排序
|
||||
static void hm_sort_index_table(huff_tree **table,int num)
|
||||
{
|
||||
for(int i=0;i<num;i++)
|
||||
{
|
||||
huff_tree *item=table[i];
|
||||
for (int j=i;j<num;j++)
|
||||
{
|
||||
if(hm_calc_value_of_tree(table[j])>hm_calc_value_of_tree(item))
|
||||
{
|
||||
table[i]=table[j];
|
||||
table[j]=item;
|
||||
item=table[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// 打印index_table
|
||||
static void hm_index_table_print(huffman_def *h){
|
||||
DBG_LOG("-----index_table-----\n");
|
||||
for(int i=0;i<h->index_table_index;i++){
|
||||
DBG_LOG("index:%d,data:%02x,count:%d\n",i,h->index_table[i]->data,h->index_table[i]->count);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// 打印数据的编码
|
||||
static void hm_data_code_print(huffman_def *h){
|
||||
huff_tree *t;
|
||||
DBG_LOG("------data code------\n");
|
||||
for(int i=0;i<h->index_table_index;i++){
|
||||
t=h->index_table[i];
|
||||
DBG_LOG("%c:",t->data);
|
||||
while(t->parant){
|
||||
DBG_LOG("%d",t->pos);
|
||||
t=t->parant;
|
||||
}
|
||||
DBG_LOG("\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void hm_calc_count(huffman_def *h,const uint8_t *d,const int d_len)
|
||||
{
|
||||
int num = d_len;
|
||||
int index;
|
||||
memset(h->count_table,0,256);
|
||||
// DBG_LOG("calc count_table\n");
|
||||
for(int i=0;i<num;i++)
|
||||
{
|
||||
h->count_table[d[i]]++;
|
||||
}
|
||||
// DBG_LOG("calc index_table\n");
|
||||
for(int i=0;i<256;i++)
|
||||
{
|
||||
if(h->count_table[i]>0){
|
||||
index=h->index_table_index;
|
||||
h->index_table[index]=hm_creat_tree_point(h);
|
||||
h->index_table[index]->count=h->count_table[i];
|
||||
h->index_table[index]->data=i;
|
||||
h->index_table_index++;
|
||||
}
|
||||
}
|
||||
// DBG_LOG("sort index_table\n");
|
||||
hm_sort_index_table(h->index_table,h->index_table_index);
|
||||
// hm_index_table_print(h);
|
||||
}
|
||||
|
||||
// 计算编码后的长度
|
||||
// 需要先计算index_table和生成huffman树
|
||||
static int hm_calc_encode_len(huffman_def* h)
|
||||
{
|
||||
// index_table_len(1byte)+index_data(index_table_len bytes)
|
||||
int sum =1+ h->index_table_index;
|
||||
int bit_count = 0;
|
||||
huff_tree* t;
|
||||
for (int i = 0; i < h->index_table_index; i++) {
|
||||
// 计数占用的字节数
|
||||
t = h->index_table[i];
|
||||
sum += t->count/255+1;
|
||||
// 压缩后占用的bit数
|
||||
bit_count += hm_calc_deep_of_child(t) * t->count;
|
||||
}
|
||||
// 补零数目字节
|
||||
sum += 1;
|
||||
sum += (bit_count + 7) / 8;
|
||||
DBG_LOG("data len for encode:%d\n", sum);
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
// 计算树的值
|
||||
static int hm_calc_value_of_tree(huff_tree *t)
|
||||
{
|
||||
int sum=0;
|
||||
if(t->left&&t->right)
|
||||
sum=hm_calc_value_of_tree(t->left)+hm_calc_value_of_tree(t->right);
|
||||
else
|
||||
sum=t->count;
|
||||
// DBG_LOG("tree sum:%d\n",sum);
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
// 计算子节点的深度
|
||||
static int hm_calc_deep_of_child(huff_tree* t)
|
||||
{
|
||||
int deep = 0;
|
||||
while (t->parant) {
|
||||
deep++;
|
||||
t = t->parant;
|
||||
}
|
||||
return deep;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// 打印huffman树
|
||||
static void hm_tree_print(huff_tree *t)
|
||||
{
|
||||
if(t->left&&t->right){
|
||||
DBG_LOG("point:,count:%d\n",hm_calc_value_of_tree(t));
|
||||
hm_tree_print(t->left);
|
||||
hm_tree_print(t->right);
|
||||
}else{
|
||||
DBG_LOG("data:%d,count:%d\n",t->data,t->count);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// 建立huffman树
|
||||
static void hm_creat_tree(huffman_def *h)
|
||||
{
|
||||
int tail=h->index_table_index;
|
||||
huff_tree *sub1,*sub2;
|
||||
huff_tree **table=calloc(tail,sizeof(huff_tree *));
|
||||
for(int i=0;i<tail;i++){
|
||||
table[i]=h->index_table[i];
|
||||
}
|
||||
while(tail>1){
|
||||
huff_tree *temp;
|
||||
sub1=table[tail-1];
|
||||
sub2=table[tail-2];
|
||||
// 大在左,小在右
|
||||
temp=hm_creat_tree_point(h);
|
||||
sub1->parant=temp;
|
||||
sub2->parant=temp;
|
||||
// 左为1,右为0
|
||||
if(hm_calc_value_of_tree(sub1)>hm_calc_value_of_tree(sub2)){
|
||||
temp->left=sub1;
|
||||
sub1->pos=1;
|
||||
temp->right=sub2;
|
||||
sub2->pos=0;
|
||||
}else{
|
||||
temp->left=sub2;
|
||||
sub2->pos=1;
|
||||
temp->right=sub1;
|
||||
sub1->pos=0;
|
||||
}
|
||||
table[tail-2]=temp;
|
||||
tail--;
|
||||
hm_sort_index_table(table,tail);
|
||||
// DBG_LOG("-----table-----\n");
|
||||
// for(int i=0;i<tail;i++){
|
||||
// DBG_LOG("index:%d,count:%d\n",i,hm_calc_value_of_tree(table[i]));
|
||||
// }
|
||||
}
|
||||
h->tree=table[0];
|
||||
free(table);
|
||||
}
|
||||
|
||||
|
||||
// 删除树
|
||||
static void hm_del_tree(huffman_def *h,huff_tree *t)
|
||||
{
|
||||
if(t->left&&t->right){
|
||||
hm_del_tree(h,t->left);
|
||||
hm_del_tree(h,t->right);
|
||||
}
|
||||
hm_del_tree_point(h,t);
|
||||
}
|
||||
|
||||
// 数据中添加一个bit
|
||||
static void hm_add_bit(uint8_t *d,int *d_len,int bit,int *index)
|
||||
{
|
||||
if(*index<(*d_len )*8){
|
||||
uint8_t c = d[*d_len - 1];
|
||||
c|=bit<<(*index%8);
|
||||
d[*d_len - 1] = c;
|
||||
}else{
|
||||
d[*d_len] = bit;
|
||||
(*d_len)++;
|
||||
}
|
||||
(*index)++;
|
||||
}
|
||||
|
||||
|
||||
// 根据数据添加bit
|
||||
static int hm_encode_byte(huffman_def *h,uint8_t d)
|
||||
{
|
||||
huff_tree *t=0;
|
||||
// 这里默认一定能找到对应的值
|
||||
for(int i=0;i<h->index_table_index;i++)
|
||||
{
|
||||
t=h->index_table[i];
|
||||
if(t->data==d)
|
||||
break;
|
||||
}
|
||||
if(t->data!=d){
|
||||
DBG_WARN("can not encode.\n");
|
||||
exit(-1);
|
||||
}
|
||||
while(t->parant){
|
||||
hm_add_bit(h->out,&h->out_len,t->pos,&h->arr_bit_index);
|
||||
t=t->parant;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 生成索引
|
||||
static int hm_creat_index_table(huffman_def *h,uint8_t *data,int *data_len)
|
||||
{
|
||||
int temp;
|
||||
int diff;
|
||||
int temp_num;
|
||||
data[*data_len] = h->index_table_index; (*data_len)++;
|
||||
// hm_index_table_print(h);
|
||||
for(int i=0;i<h->index_table_index;i++)
|
||||
{
|
||||
data[*data_len] = h->index_table[i]->data; (*data_len)++;
|
||||
temp=h->index_table[i]->count;
|
||||
temp_num = temp / 255 + 1;
|
||||
for (int i = 0; i < temp_num; i++) {
|
||||
if (i < temp_num - 1) {
|
||||
data[*data_len] = 255; (*data_len)++;
|
||||
}
|
||||
else {
|
||||
data[*data_len] = temp % 255; (*data_len)++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// 填充0个数
|
||||
temp=8-(h->arr_bit_index%8);
|
||||
//DBG_LOG("fill with 0 by:%d\n", temp);
|
||||
data[*data_len] = temp; (*data_len)++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// huffman编码
|
||||
/*
|
||||
压缩后数据格式
|
||||
data[0]:索引表长度
|
||||
data[1~n]:索引表,每个索引由值(1byte)和频次(1byte,小于255)(2byte,大于等于255,频次由两个字节相加)
|
||||
data[n+1]:数据中填充0个数
|
||||
data[n+2~m]:压缩后的数据
|
||||
|
||||
*/
|
||||
int hm_encode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len)
|
||||
{
|
||||
int input_len = in_len;
|
||||
int output_len=0;
|
||||
int output_index = 0;
|
||||
huffman_def *h=calloc(1,sizeof(huffman_def));
|
||||
hm_calc_count(h,in, input_len);
|
||||
hm_creat_tree(h);
|
||||
DBG_LOG("huffman tree point num:%d\n",h->tree_point_num);
|
||||
output_len = hm_calc_encode_len(h);
|
||||
(*out) = calloc(output_len + 1, sizeof(uint8_t));
|
||||
hm_creat_index_table(h, *out, &output_index);
|
||||
DBG_LOG("output_len=%d\n", output_index);
|
||||
h->out = &(*out)[output_index];
|
||||
for(int i=0;i<input_len;i++)
|
||||
{
|
||||
hm_encode_byte(h,in[i]);
|
||||
}
|
||||
DBG_LOG("bitcount:%d\n", h->arr_bit_index);
|
||||
(*out)[output_index-1] = h->out_len*8- h->arr_bit_index;
|
||||
DBG_LOG("fill with 0 by:%d\n", (*out)[output_index - 1]);
|
||||
(*out_len) = output_len;
|
||||
hm_del_tree(h,h->tree);
|
||||
DBG_LOG("after del tree point num:%d\n",h->tree_point_num);
|
||||
|
||||
DBG_LOG("lenth_in:%d,length_encode:%d\n",input_len, output_len);
|
||||
free(h);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// 读取编码表,返回数据开始的位置
|
||||
static int hm_unpack_count(huffman_def *h,const uint8_t *d,int d_len)
|
||||
{
|
||||
int num = d[0];
|
||||
int index=1;
|
||||
uint8_t temp;
|
||||
for(int i=0;i<num;i++)
|
||||
{
|
||||
h->index_table[i]=hm_creat_tree_point(h);
|
||||
h->index_table[i]->data=d[index];index++;
|
||||
do{
|
||||
temp= d[index];index++;
|
||||
h->index_table[i]->count+=temp;
|
||||
}while(temp==0xff);
|
||||
h->index_table_index++;
|
||||
}
|
||||
temp= d[index];index++;
|
||||
h->in_bit_count=(d_len -index)*8-temp;
|
||||
h->in=&d[index];
|
||||
// hm_index_table_print(h);
|
||||
printf("bitcount:%d,\n",h->in_bit_count);
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
// 获取指定index的bit值
|
||||
static inline int hm_get_bit(const uint8_t *d,int index)
|
||||
{
|
||||
uint8_t t=d[index/8];
|
||||
return t&(1<<(index%8))?1:0;
|
||||
}
|
||||
|
||||
|
||||
// 对比树节点,匹配返回bit数,不匹配返回0
|
||||
static inline int hm_cmp_bits(huffman_def *h,huff_tree *t)
|
||||
{
|
||||
int count=0;
|
||||
// DBG_LOG("tree pos:",t->pos);
|
||||
while(t){
|
||||
// DBG_LOG("%d",t->pos);
|
||||
if(hm_get_bit(h->in,h->arr_bit_index+count)!=t->pos){
|
||||
// DBG_LOG(" |failed\n");
|
||||
return 0;
|
||||
}
|
||||
else{
|
||||
count++;
|
||||
t=t->parant;
|
||||
}
|
||||
}
|
||||
h->arr_bit_index+=count;
|
||||
// DBG_LOG(" |ok,\n");
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
static uint8_t hm_decode_byte(huffman_def *h)
|
||||
{
|
||||
huff_tree *t=h->tree;
|
||||
int bit;
|
||||
// DBG_LOG("decode:");
|
||||
while(t->left&&t->right){
|
||||
bit=hm_get_bit(h->in,h->arr_bit_index-1);
|
||||
// DBG_LOG("%d",bit);
|
||||
if(bit==t->left->pos)
|
||||
t=t->left;
|
||||
else
|
||||
t=t->right;
|
||||
h->arr_bit_index--;
|
||||
}
|
||||
// DBG_LOG(" | decode byte:%c\n",t->data);
|
||||
return t->data;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int hm_calc_decode_len(huffman_def *h)
|
||||
{
|
||||
int sum=0;
|
||||
for(int i=0;i<h->index_table_index;i++){
|
||||
sum+=h->index_table[i]->count;
|
||||
}
|
||||
DBG_LOG("data len for decode:%d\n",sum);
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// huffman解码
|
||||
/*
|
||||
*/
|
||||
int hm_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len)
|
||||
{
|
||||
int decode_len,decode_index;
|
||||
uint8_t *decode_data=0;
|
||||
uint8_t c;
|
||||
huffman_def *h=calloc(1,sizeof(huffman_def));
|
||||
if (h == 0) {
|
||||
return -1;
|
||||
}
|
||||
hm_unpack_count(h,in,in_len);
|
||||
hm_creat_tree(h);
|
||||
// hm_data_code_print(h);
|
||||
// hm_tree_print(h->tree);
|
||||
DBG_LOG("huffman tree point num:%d\n",h->tree_point_num);
|
||||
decode_len=hm_calc_decode_len(h);
|
||||
decode_index=decode_len;
|
||||
decode_data=calloc(decode_len+1,sizeof(uint8_t));
|
||||
h->arr_bit_index=h->in_bit_count;
|
||||
while(decode_index >0){
|
||||
c=hm_decode_byte(h);
|
||||
decode_data[decode_index-1]=c;
|
||||
decode_index--;
|
||||
}
|
||||
hm_del_tree(h,h->tree);
|
||||
DBG_LOG("after del tree point num:%d\n",h->tree_point_num);
|
||||
free(h);
|
||||
(*out) = decode_data;
|
||||
(*out_len) = decode_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
19
huffman/huffman_.h
Normal file
19
huffman/huffman_.h
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
#ifndef huffman_h__
|
||||
#define huffman_h__
|
||||
|
||||
#include "stdint.h"
|
||||
|
||||
// huffman编码的实现
|
||||
|
||||
int hm_encode(const uint8_t *in,const int in_len,uint8_t **out,int *out_len);
|
||||
|
||||
int hm_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len);
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user