Files
c_progarm/zl77/zl77.c
2023-12-02 11:52:15 +08:00

363 lines
9.1 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "zl77.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "../huffman/huffman_.h"
// zl77 算法的实现
#define DBG_WARN printf
#define DBG_LOG printf
// 定义数据缓冲区步长
#define LZ77_BUFF_STEP_SIZE 10
typedef struct _buff_item{
uint8_t data[LZ77_BUFF_STEP_SIZE];
struct _buff_item *next;
struct _buff_item *prev;
}buff_item;
typedef struct _buff_def{
buff_item *current;
buff_item *head;
int used;
int all;
int current_index;
int bit_used;
}buff_def;
typedef struct _zl77_def
{
int dict_len;// 字典长度
int tran_len;// 转换区长度
int index;// 窗口位置
buff_def buff_chars;//字符编码区
buff_def buff_pos;//标号编码区
buff_def buff_bits;//编码类型标识区(1,字符;0,标号)
const uint8_t *in;
int in_len;
uint8_t cmp_pos;// 匹配到的pos距离
uint8_t cmp_len;// 匹配到的长度
uint8_t cmp_skip;// 窗口移动的距离
}zl77_def;
uint8_t zl77_buff_get_byte(buff_def *buff,int index);
void zl77_buff_set_byte(buff_def *buff,int index,uint8_t d);
void zl77_buff_append_bit(buff_def *buff,int bit);
void zl77_buff_append_byte(buff_def *buff, const uint8_t d);
int zl77_buff_get_bit(buff_def *buff, int index);
zl77_def *zl77_creat(void)
{
zl77_def *z=calloc(1,sizeof(zl77_def));
z->dict_len=5;
z->tran_len=3;
}
// 删除缓存
void zl77_del_buff(buff_def *buff)
{
buff_item *t=buff->head;
buff_item *o;
while(t){
o=t;
t=t->next;
free(o);
}
}
// 添加一个字节
void zl77_buff_append_byte(buff_def *buff, const uint8_t d)
{
if(buff->used>=buff->all){
buff_item *t=buff->head;
buff_item *t_old=0;
while (t)
{
t_old=t;
t=t->next;
}
t=calloc(1,sizeof(buff_item));
if(t_old){
t_old->next=t;
t->prev=t_old;
}else{
buff->head=t;
}
buff->all+=LZ77_BUFF_STEP_SIZE;
buff->current=t;
buff->current_index=buff->used;
}
while((buff->used/LZ77_BUFF_STEP_SIZE)>(buff->current_index/LZ77_BUFF_STEP_SIZE)){
buff->current=buff->current->next;
buff->current_index+=LZ77_BUFF_STEP_SIZE;
}
buff->current->data[buff->used%LZ77_BUFF_STEP_SIZE]=d;
buff->used++;
}
// 添加一个位
void zl77_buff_append_bit(buff_def *buff,int bit)
{
if(buff->bit_used/8>=buff->used){
zl77_buff_append_byte(buff,0);
}
uint8_t d=zl77_buff_get_byte(buff,buff->bit_used/8);
d|=bit<<(buff->bit_used%8);
zl77_buff_set_byte(buff,-1,d);
buff->bit_used++;
}
// 调整最近使用的缓冲区
static void zl77_buff_adjust_current(buff_def *buff,int index){
while((index/LZ77_BUFF_STEP_SIZE)>(buff->current_index/LZ77_BUFF_STEP_SIZE)){
buff->current=buff->current->next;
buff->current_index+=LZ77_BUFF_STEP_SIZE;
}
while((index/LZ77_BUFF_STEP_SIZE)<(buff->current_index/LZ77_BUFF_STEP_SIZE)){
buff->current=buff->current->prev;
buff->current_index-=LZ77_BUFF_STEP_SIZE;
}
}
// 获取指定字节
uint8_t zl77_buff_get_byte(buff_def *buff,int index){
if(index<0) index=buff->used+index;
if(index>=buff->used||index<0) return 0;
zl77_buff_adjust_current(buff,index);
return buff->current->data[index%LZ77_BUFF_STEP_SIZE];
}
// 设置指定字节
void zl77_buff_set_byte(buff_def *buff,int index,uint8_t d){
if(index<0) index=buff->used+index;
if(index>=buff->used||index<0) return ;
zl77_buff_adjust_current(buff,index);
buff->current->data[index%LZ77_BUFF_STEP_SIZE]=d;
}
// 获取指定位
int zl77_buff_get_bit(buff_def *buff, int index){
uint8_t d=zl77_buff_get_byte(buff,index/8);
return (d&(1<<(index%8)))?1:0;
}
void zl77_buff_print(buff_def *buff)
{
DBG_LOG("buff:[");
for(int i=0;i<buff->used;i++){
DBG_LOG("%02x ",zl77_buff_get_byte(buff,i));
}
DBG_LOG("]\n");
}
static uint8_t zl77_get_char(zl77_def *z,int index)
{
// DBG_LOG("get_char:[%d]\n",index);
if(index<0||index>=z->in_len) return 0;
return z->in[index];
}
// 比对找到了返回0没找到返回1
// 0记录标号1记录原始数据
static int zl77_cmp(zl77_def *z,int index){
uint8_t pos=0;
uint8_t len=0;
// DBG_LOG("index=%d\n",index);
for(int i=z->dict_len;i>0;i--){
if(zl77_get_char(z,index-i)==zl77_get_char(z,index)){
pos=i;
len=0;
for(int j=0;j<i;j++){
if(zl77_get_char(z,index-i+j)==zl77_get_char(z,index+j))
{
// DBG_LOG("%c|%c \n",zl77_get_char(z,index-i+j),zl77_get_char(z,index+j));
len++;
if(len>z->cmp_len){
z->cmp_len=len;
z->cmp_pos=pos;
}
}else{
len=0;
break;
}
}
}
}
if((pos|len)==0){
z->cmp_skip=1;
return 1;
}
else{
// for(int i=0;i<z->cmp_len;i++){
// DBG_LOG("%02x|%02x ",zl77_get_char(z,index-z->cmp_pos+i),zl77_get_char(z,index+i));
// }
z->cmp_skip=z->cmp_len;
return 0;
}
}
static inline void zl77_append_u32(uint8_t *data,int *index,uint32_t value){
data[(*index)++]=value&0xff;
data[(*index)++]=(value>>8)&0xff;
data[(*index)++]=(value>>16)&0xff;
data[(*index)++]=(value>>24)&0xff;
}
static inline uint32_t zl77_get_u32(const uint8_t *data,int index){
uint32_t ret=0;
for(int i=0;i<4;i++){
ret|=data[index+i]<<(8*i);
}
return ret;
}
int zl77_encode(const uint8_t *in,const int in_len,uint8_t **out,int *out_len)
{
int ret;
zl77_def *z=zl77_creat();
z->in=in;
z->in_len=in_len;
for(int i=0;i<z->in_len;){
z->cmp_pos=0;
z->cmp_len=0;
ret=zl77_cmp(z,i);
if(ret){
zl77_buff_append_byte(&z->buff_chars,zl77_get_char(z,i));
// DBG_LOG("char(%c);",zl77_get_char(z,i));
}else{
zl77_buff_append_byte(&z->buff_pos,((z->cmp_pos&0xf)<<4)|(z->cmp_len&0xf));
// DBG_LOG("pos(%d,%d);",z->cmp_pos,z->cmp_len);
if((z->cmp_pos|z->cmp_len)==0){
exit(1);
}
}
zl77_buff_append_bit(&z->buff_bits,ret);
i+=z->cmp_skip;
}
// DBG_LOG("\n");
// zl77_buff_print(&z->buff_chars);
// zl77_buff_print(&z->buff_pos);
// zl77_buff_print(&z->buff_bits);
uint32_t size_chars=z->buff_chars.used;
uint32_t size_pos=z->buff_pos.used;
uint32_t size_bits=z->buff_bits.used;
uint32_t size_unpack=z->in_len;
int index=0;
(*out_len)=16+size_chars+size_pos+size_bits;
(*out)=calloc(*out_len,sizeof(uint8_t));
zl77_append_u32(*out,&index,size_chars);
zl77_append_u32(*out,&index,size_pos);
zl77_append_u32(*out,&index,size_bits);
zl77_append_u32(*out,&index,size_unpack);
for(int i=0;i<size_chars;i++){
(*out)[index++]=zl77_buff_get_byte(&z->buff_chars,i);
}
for(int i=0;i<size_pos;i++){
(*out)[index++]=zl77_buff_get_byte(&z->buff_pos,i);
}
for(int i=0;i<size_bits;i++){
(*out)[index++]=zl77_buff_get_byte(&z->buff_bits,i);
}
zl77_del_buff(&z->buff_chars);
zl77_del_buff(&z->buff_pos);
zl77_del_buff(&z->buff_bits);
free(z);
DBG_LOG("in_len=%d,out_len=%d\n",in_len,*out_len);
return 0;
}
static inline int zl77_get_bit(const uint8_t *data,int index){
uint8_t c=data[index/8];
return c&(1<<(index%8))?1:0;
}
int zl77_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len)
{
int ret;
int index_chars=0;
int index_pos=0;
int index_bits=0;
uint8_t cmp_pos,cmp_len,ch;
zl77_def *z=zl77_creat();
uint32_t size_chars=zl77_get_u32(in,0);
uint32_t size_pos=zl77_get_u32(in,4);
uint32_t size_bits=zl77_get_u32(in,8);
uint32_t size_unpack=zl77_get_u32(in,12);
const uint8_t *chars=in+16;
const uint8_t *pos=in+16+size_chars;
const uint8_t *bits=in+16+size_chars+size_pos;
(*out)=calloc(size_unpack+1,sizeof(uint8_t));
for(int i=0;i<size_unpack;){
ret=zl77_get_bit(bits,index_bits);
index_bits++;
// DBG_LOG("index:%d,bit=%d\n",index_bits,ret);
if(ret){
ch=chars[index_chars++];
(*out)[i++]=ch;
// DBG_LOG("char(%c)",ch);
}else{
cmp_pos=pos[index_pos]>>4;
cmp_len=pos[index_pos]&0xf;index_pos++;
// DBG_LOG("pos(%d,%d)",cmp_pos,cmp_len);
memcpy(&(*out)[i],&(*out)[i-cmp_pos],cmp_len);
i+=cmp_len;
}
}
// DBG_LOG("\n");
free(z);
return 0;
}
void main(int argc,const char *argv[])
{
uint8_t *encode_data=0;
int encode_len=0;
uint8_t *decode_data=0;
int decode_len=0;
hm_encode(argv[1],strlen(argv[1]),&encode_data,&encode_len);
// for(int i=0;i<encode_len;i++)
// {
// DBG_LOG("%02x,",encode_data[i]);
// }
// DBG_LOG("\n");
hm_encode(encode_data,encode_len,&decode_data,&decode_len);
// zl77_decode(encode_data,encode_len,&decode_data,&decode_len);
// printf("decode:%s\n",decode_data);
}