添加lzw压缩算法

This commit is contained in:
ranchuan
2023-12-02 11:52:15 +08:00
parent 9e46a19283
commit a556d0a141
31 changed files with 1593 additions and 2 deletions

15
zl77/Makefile Normal file
View File

@@ -0,0 +1,15 @@
CC = gcc
# SRCS = $(wildcard *.c)
SRCS = zl77.c ../huffman/huffman_.c
STR = $(subst from,to,from your heart)
all:
$(CC) $(SRCS) -o hello
clean:
rm -rf *.exe

362
zl77/zl77.c Normal file
View File

@@ -0,0 +1,362 @@
#include "zl77.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "../huffman/huffman_.h"
// zl77 算法的实现
#define DBG_WARN printf
#define DBG_LOG printf
// 定义数据缓冲区步长
#define LZ77_BUFF_STEP_SIZE 10
typedef struct _buff_item{
uint8_t data[LZ77_BUFF_STEP_SIZE];
struct _buff_item *next;
struct _buff_item *prev;
}buff_item;
typedef struct _buff_def{
buff_item *current;
buff_item *head;
int used;
int all;
int current_index;
int bit_used;
}buff_def;
typedef struct _zl77_def
{
int dict_len;// 字典长度
int tran_len;// 转换区长度
int index;// 窗口位置
buff_def buff_chars;//字符编码区
buff_def buff_pos;//标号编码区
buff_def buff_bits;//编码类型标识区(1,字符;0,标号)
const uint8_t *in;
int in_len;
uint8_t cmp_pos;// 匹配到的pos距离
uint8_t cmp_len;// 匹配到的长度
uint8_t cmp_skip;// 窗口移动的距离
}zl77_def;
uint8_t zl77_buff_get_byte(buff_def *buff,int index);
void zl77_buff_set_byte(buff_def *buff,int index,uint8_t d);
void zl77_buff_append_bit(buff_def *buff,int bit);
void zl77_buff_append_byte(buff_def *buff, const uint8_t d);
int zl77_buff_get_bit(buff_def *buff, int index);
zl77_def *zl77_creat(void)
{
zl77_def *z=calloc(1,sizeof(zl77_def));
z->dict_len=5;
z->tran_len=3;
}
// 删除缓存
void zl77_del_buff(buff_def *buff)
{
buff_item *t=buff->head;
buff_item *o;
while(t){
o=t;
t=t->next;
free(o);
}
}
// 添加一个字节
void zl77_buff_append_byte(buff_def *buff, const uint8_t d)
{
if(buff->used>=buff->all){
buff_item *t=buff->head;
buff_item *t_old=0;
while (t)
{
t_old=t;
t=t->next;
}
t=calloc(1,sizeof(buff_item));
if(t_old){
t_old->next=t;
t->prev=t_old;
}else{
buff->head=t;
}
buff->all+=LZ77_BUFF_STEP_SIZE;
buff->current=t;
buff->current_index=buff->used;
}
while((buff->used/LZ77_BUFF_STEP_SIZE)>(buff->current_index/LZ77_BUFF_STEP_SIZE)){
buff->current=buff->current->next;
buff->current_index+=LZ77_BUFF_STEP_SIZE;
}
buff->current->data[buff->used%LZ77_BUFF_STEP_SIZE]=d;
buff->used++;
}
// 添加一个位
void zl77_buff_append_bit(buff_def *buff,int bit)
{
if(buff->bit_used/8>=buff->used){
zl77_buff_append_byte(buff,0);
}
uint8_t d=zl77_buff_get_byte(buff,buff->bit_used/8);
d|=bit<<(buff->bit_used%8);
zl77_buff_set_byte(buff,-1,d);
buff->bit_used++;
}
// 调整最近使用的缓冲区
static void zl77_buff_adjust_current(buff_def *buff,int index){
while((index/LZ77_BUFF_STEP_SIZE)>(buff->current_index/LZ77_BUFF_STEP_SIZE)){
buff->current=buff->current->next;
buff->current_index+=LZ77_BUFF_STEP_SIZE;
}
while((index/LZ77_BUFF_STEP_SIZE)<(buff->current_index/LZ77_BUFF_STEP_SIZE)){
buff->current=buff->current->prev;
buff->current_index-=LZ77_BUFF_STEP_SIZE;
}
}
// 获取指定字节
uint8_t zl77_buff_get_byte(buff_def *buff,int index){
if(index<0) index=buff->used+index;
if(index>=buff->used||index<0) return 0;
zl77_buff_adjust_current(buff,index);
return buff->current->data[index%LZ77_BUFF_STEP_SIZE];
}
// 设置指定字节
void zl77_buff_set_byte(buff_def *buff,int index,uint8_t d){
if(index<0) index=buff->used+index;
if(index>=buff->used||index<0) return ;
zl77_buff_adjust_current(buff,index);
buff->current->data[index%LZ77_BUFF_STEP_SIZE]=d;
}
// 获取指定位
int zl77_buff_get_bit(buff_def *buff, int index){
uint8_t d=zl77_buff_get_byte(buff,index/8);
return (d&(1<<(index%8)))?1:0;
}
void zl77_buff_print(buff_def *buff)
{
DBG_LOG("buff:[");
for(int i=0;i<buff->used;i++){
DBG_LOG("%02x ",zl77_buff_get_byte(buff,i));
}
DBG_LOG("]\n");
}
static uint8_t zl77_get_char(zl77_def *z,int index)
{
// DBG_LOG("get_char:[%d]\n",index);
if(index<0||index>=z->in_len) return 0;
return z->in[index];
}
// 比对找到了返回0没找到返回1
// 0记录标号1记录原始数据
static int zl77_cmp(zl77_def *z,int index){
uint8_t pos=0;
uint8_t len=0;
// DBG_LOG("index=%d\n",index);
for(int i=z->dict_len;i>0;i--){
if(zl77_get_char(z,index-i)==zl77_get_char(z,index)){
pos=i;
len=0;
for(int j=0;j<i;j++){
if(zl77_get_char(z,index-i+j)==zl77_get_char(z,index+j))
{
// DBG_LOG("%c|%c \n",zl77_get_char(z,index-i+j),zl77_get_char(z,index+j));
len++;
if(len>z->cmp_len){
z->cmp_len=len;
z->cmp_pos=pos;
}
}else{
len=0;
break;
}
}
}
}
if((pos|len)==0){
z->cmp_skip=1;
return 1;
}
else{
// for(int i=0;i<z->cmp_len;i++){
// DBG_LOG("%02x|%02x ",zl77_get_char(z,index-z->cmp_pos+i),zl77_get_char(z,index+i));
// }
z->cmp_skip=z->cmp_len;
return 0;
}
}
static inline void zl77_append_u32(uint8_t *data,int *index,uint32_t value){
data[(*index)++]=value&0xff;
data[(*index)++]=(value>>8)&0xff;
data[(*index)++]=(value>>16)&0xff;
data[(*index)++]=(value>>24)&0xff;
}
static inline uint32_t zl77_get_u32(const uint8_t *data,int index){
uint32_t ret=0;
for(int i=0;i<4;i++){
ret|=data[index+i]<<(8*i);
}
return ret;
}
int zl77_encode(const uint8_t *in,const int in_len,uint8_t **out,int *out_len)
{
int ret;
zl77_def *z=zl77_creat();
z->in=in;
z->in_len=in_len;
for(int i=0;i<z->in_len;){
z->cmp_pos=0;
z->cmp_len=0;
ret=zl77_cmp(z,i);
if(ret){
zl77_buff_append_byte(&z->buff_chars,zl77_get_char(z,i));
// DBG_LOG("char(%c);",zl77_get_char(z,i));
}else{
zl77_buff_append_byte(&z->buff_pos,((z->cmp_pos&0xf)<<4)|(z->cmp_len&0xf));
// DBG_LOG("pos(%d,%d);",z->cmp_pos,z->cmp_len);
if((z->cmp_pos|z->cmp_len)==0){
exit(1);
}
}
zl77_buff_append_bit(&z->buff_bits,ret);
i+=z->cmp_skip;
}
// DBG_LOG("\n");
// zl77_buff_print(&z->buff_chars);
// zl77_buff_print(&z->buff_pos);
// zl77_buff_print(&z->buff_bits);
uint32_t size_chars=z->buff_chars.used;
uint32_t size_pos=z->buff_pos.used;
uint32_t size_bits=z->buff_bits.used;
uint32_t size_unpack=z->in_len;
int index=0;
(*out_len)=16+size_chars+size_pos+size_bits;
(*out)=calloc(*out_len,sizeof(uint8_t));
zl77_append_u32(*out,&index,size_chars);
zl77_append_u32(*out,&index,size_pos);
zl77_append_u32(*out,&index,size_bits);
zl77_append_u32(*out,&index,size_unpack);
for(int i=0;i<size_chars;i++){
(*out)[index++]=zl77_buff_get_byte(&z->buff_chars,i);
}
for(int i=0;i<size_pos;i++){
(*out)[index++]=zl77_buff_get_byte(&z->buff_pos,i);
}
for(int i=0;i<size_bits;i++){
(*out)[index++]=zl77_buff_get_byte(&z->buff_bits,i);
}
zl77_del_buff(&z->buff_chars);
zl77_del_buff(&z->buff_pos);
zl77_del_buff(&z->buff_bits);
free(z);
DBG_LOG("in_len=%d,out_len=%d\n",in_len,*out_len);
return 0;
}
static inline int zl77_get_bit(const uint8_t *data,int index){
uint8_t c=data[index/8];
return c&(1<<(index%8))?1:0;
}
int zl77_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len)
{
int ret;
int index_chars=0;
int index_pos=0;
int index_bits=0;
uint8_t cmp_pos,cmp_len,ch;
zl77_def *z=zl77_creat();
uint32_t size_chars=zl77_get_u32(in,0);
uint32_t size_pos=zl77_get_u32(in,4);
uint32_t size_bits=zl77_get_u32(in,8);
uint32_t size_unpack=zl77_get_u32(in,12);
const uint8_t *chars=in+16;
const uint8_t *pos=in+16+size_chars;
const uint8_t *bits=in+16+size_chars+size_pos;
(*out)=calloc(size_unpack+1,sizeof(uint8_t));
for(int i=0;i<size_unpack;){
ret=zl77_get_bit(bits,index_bits);
index_bits++;
// DBG_LOG("index:%d,bit=%d\n",index_bits,ret);
if(ret){
ch=chars[index_chars++];
(*out)[i++]=ch;
// DBG_LOG("char(%c)",ch);
}else{
cmp_pos=pos[index_pos]>>4;
cmp_len=pos[index_pos]&0xf;index_pos++;
// DBG_LOG("pos(%d,%d)",cmp_pos,cmp_len);
memcpy(&(*out)[i],&(*out)[i-cmp_pos],cmp_len);
i+=cmp_len;
}
}
// DBG_LOG("\n");
free(z);
return 0;
}
void main(int argc,const char *argv[])
{
uint8_t *encode_data=0;
int encode_len=0;
uint8_t *decode_data=0;
int decode_len=0;
hm_encode(argv[1],strlen(argv[1]),&encode_data,&encode_len);
// for(int i=0;i<encode_len;i++)
// {
// DBG_LOG("%02x,",encode_data[i]);
// }
// DBG_LOG("\n");
hm_encode(encode_data,encode_len,&decode_data,&decode_len);
// zl77_decode(encode_data,encode_len,&decode_data,&decode_len);
// printf("decode:%s\n",decode_data);
}

25
zl77/zl77.h Normal file
View File

@@ -0,0 +1,25 @@
#ifndef zl77_h__
#define zl77_h__
#include "stdint.h"
int zl77_encode(const uint8_t *in,const int in_len,uint8_t **out,int *out_len);
int zl77_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len);
#endif