363 lines
9.1 KiB
C
363 lines
9.1 KiB
C
|
||
|
||
#include "zl77.h"
|
||
#include "stdio.h"
|
||
#include "stdlib.h"
|
||
#include "string.h"
|
||
#include "../huffman/huffman_.h"
|
||
|
||
// zl77 算法的实现
|
||
|
||
#define DBG_WARN printf
|
||
#define DBG_LOG printf
|
||
|
||
|
||
|
||
// 定义数据缓冲区步长
|
||
#define LZ77_BUFF_STEP_SIZE 10
|
||
|
||
|
||
typedef struct _buff_item{
|
||
uint8_t data[LZ77_BUFF_STEP_SIZE];
|
||
struct _buff_item *next;
|
||
struct _buff_item *prev;
|
||
}buff_item;
|
||
|
||
|
||
typedef struct _buff_def{
|
||
buff_item *current;
|
||
buff_item *head;
|
||
int used;
|
||
int all;
|
||
int current_index;
|
||
int bit_used;
|
||
}buff_def;
|
||
|
||
|
||
|
||
typedef struct _zl77_def
|
||
{
|
||
int dict_len;// 字典长度
|
||
int tran_len;// 转换区长度
|
||
int index;// 窗口位置
|
||
buff_def buff_chars;//字符编码区
|
||
buff_def buff_pos;//标号编码区
|
||
buff_def buff_bits;//编码类型标识区(1,字符;0,标号)
|
||
const uint8_t *in;
|
||
int in_len;
|
||
uint8_t cmp_pos;// 匹配到的pos距离
|
||
uint8_t cmp_len;// 匹配到的长度
|
||
uint8_t cmp_skip;// 窗口移动的距离
|
||
}zl77_def;
|
||
|
||
|
||
|
||
uint8_t zl77_buff_get_byte(buff_def *buff,int index);
|
||
void zl77_buff_set_byte(buff_def *buff,int index,uint8_t d);
|
||
void zl77_buff_append_bit(buff_def *buff,int bit);
|
||
void zl77_buff_append_byte(buff_def *buff, const uint8_t d);
|
||
int zl77_buff_get_bit(buff_def *buff, int index);
|
||
|
||
|
||
|
||
zl77_def *zl77_creat(void)
|
||
{
|
||
zl77_def *z=calloc(1,sizeof(zl77_def));
|
||
z->dict_len=5;
|
||
z->tran_len=3;
|
||
}
|
||
|
||
|
||
|
||
// 删除缓存
|
||
void zl77_del_buff(buff_def *buff)
|
||
{
|
||
buff_item *t=buff->head;
|
||
buff_item *o;
|
||
while(t){
|
||
o=t;
|
||
t=t->next;
|
||
free(o);
|
||
}
|
||
}
|
||
|
||
// 添加一个字节
|
||
void zl77_buff_append_byte(buff_def *buff, const uint8_t d)
|
||
{
|
||
if(buff->used>=buff->all){
|
||
buff_item *t=buff->head;
|
||
buff_item *t_old=0;
|
||
while (t)
|
||
{
|
||
t_old=t;
|
||
t=t->next;
|
||
}
|
||
t=calloc(1,sizeof(buff_item));
|
||
if(t_old){
|
||
t_old->next=t;
|
||
t->prev=t_old;
|
||
}else{
|
||
buff->head=t;
|
||
}
|
||
buff->all+=LZ77_BUFF_STEP_SIZE;
|
||
buff->current=t;
|
||
buff->current_index=buff->used;
|
||
}
|
||
while((buff->used/LZ77_BUFF_STEP_SIZE)>(buff->current_index/LZ77_BUFF_STEP_SIZE)){
|
||
buff->current=buff->current->next;
|
||
buff->current_index+=LZ77_BUFF_STEP_SIZE;
|
||
}
|
||
buff->current->data[buff->used%LZ77_BUFF_STEP_SIZE]=d;
|
||
buff->used++;
|
||
}
|
||
|
||
// 添加一个位
|
||
void zl77_buff_append_bit(buff_def *buff,int bit)
|
||
{
|
||
if(buff->bit_used/8>=buff->used){
|
||
zl77_buff_append_byte(buff,0);
|
||
}
|
||
uint8_t d=zl77_buff_get_byte(buff,buff->bit_used/8);
|
||
d|=bit<<(buff->bit_used%8);
|
||
zl77_buff_set_byte(buff,-1,d);
|
||
buff->bit_used++;
|
||
}
|
||
|
||
// 调整最近使用的缓冲区
|
||
static void zl77_buff_adjust_current(buff_def *buff,int index){
|
||
while((index/LZ77_BUFF_STEP_SIZE)>(buff->current_index/LZ77_BUFF_STEP_SIZE)){
|
||
buff->current=buff->current->next;
|
||
buff->current_index+=LZ77_BUFF_STEP_SIZE;
|
||
}
|
||
while((index/LZ77_BUFF_STEP_SIZE)<(buff->current_index/LZ77_BUFF_STEP_SIZE)){
|
||
buff->current=buff->current->prev;
|
||
buff->current_index-=LZ77_BUFF_STEP_SIZE;
|
||
}
|
||
}
|
||
|
||
|
||
// 获取指定字节
|
||
uint8_t zl77_buff_get_byte(buff_def *buff,int index){
|
||
if(index<0) index=buff->used+index;
|
||
if(index>=buff->used||index<0) return 0;
|
||
zl77_buff_adjust_current(buff,index);
|
||
return buff->current->data[index%LZ77_BUFF_STEP_SIZE];
|
||
}
|
||
|
||
// 设置指定字节
|
||
void zl77_buff_set_byte(buff_def *buff,int index,uint8_t d){
|
||
if(index<0) index=buff->used+index;
|
||
if(index>=buff->used||index<0) return ;
|
||
zl77_buff_adjust_current(buff,index);
|
||
buff->current->data[index%LZ77_BUFF_STEP_SIZE]=d;
|
||
}
|
||
|
||
|
||
// 获取指定位
|
||
int zl77_buff_get_bit(buff_def *buff, int index){
|
||
uint8_t d=zl77_buff_get_byte(buff,index/8);
|
||
return (d&(1<<(index%8)))?1:0;
|
||
}
|
||
|
||
|
||
|
||
|
||
void zl77_buff_print(buff_def *buff)
|
||
{
|
||
DBG_LOG("buff:[");
|
||
for(int i=0;i<buff->used;i++){
|
||
DBG_LOG("%02x ",zl77_buff_get_byte(buff,i));
|
||
}
|
||
DBG_LOG("]\n");
|
||
}
|
||
|
||
|
||
|
||
static uint8_t zl77_get_char(zl77_def *z,int index)
|
||
{
|
||
// DBG_LOG("get_char:[%d]\n",index);
|
||
if(index<0||index>=z->in_len) return 0;
|
||
return z->in[index];
|
||
}
|
||
|
||
// 比对,找到了返回0没找到返回1
|
||
// 0记录标号,1记录原始数据
|
||
static int zl77_cmp(zl77_def *z,int index){
|
||
uint8_t pos=0;
|
||
uint8_t len=0;
|
||
// DBG_LOG("index=%d\n",index);
|
||
for(int i=z->dict_len;i>0;i--){
|
||
if(zl77_get_char(z,index-i)==zl77_get_char(z,index)){
|
||
pos=i;
|
||
len=0;
|
||
for(int j=0;j<i;j++){
|
||
if(zl77_get_char(z,index-i+j)==zl77_get_char(z,index+j))
|
||
{
|
||
// DBG_LOG("%c|%c \n",zl77_get_char(z,index-i+j),zl77_get_char(z,index+j));
|
||
len++;
|
||
if(len>z->cmp_len){
|
||
z->cmp_len=len;
|
||
z->cmp_pos=pos;
|
||
}
|
||
}else{
|
||
len=0;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if((pos|len)==0){
|
||
z->cmp_skip=1;
|
||
return 1;
|
||
}
|
||
else{
|
||
// for(int i=0;i<z->cmp_len;i++){
|
||
// DBG_LOG("%02x|%02x ",zl77_get_char(z,index-z->cmp_pos+i),zl77_get_char(z,index+i));
|
||
// }
|
||
z->cmp_skip=z->cmp_len;
|
||
return 0;
|
||
}
|
||
}
|
||
|
||
|
||
|
||
static inline void zl77_append_u32(uint8_t *data,int *index,uint32_t value){
|
||
data[(*index)++]=value&0xff;
|
||
data[(*index)++]=(value>>8)&0xff;
|
||
data[(*index)++]=(value>>16)&0xff;
|
||
data[(*index)++]=(value>>24)&0xff;
|
||
}
|
||
|
||
static inline uint32_t zl77_get_u32(const uint8_t *data,int index){
|
||
uint32_t ret=0;
|
||
for(int i=0;i<4;i++){
|
||
ret|=data[index+i]<<(8*i);
|
||
}
|
||
return ret;
|
||
}
|
||
|
||
|
||
int zl77_encode(const uint8_t *in,const int in_len,uint8_t **out,int *out_len)
|
||
{
|
||
int ret;
|
||
zl77_def *z=zl77_creat();
|
||
z->in=in;
|
||
z->in_len=in_len;
|
||
for(int i=0;i<z->in_len;){
|
||
z->cmp_pos=0;
|
||
z->cmp_len=0;
|
||
ret=zl77_cmp(z,i);
|
||
if(ret){
|
||
zl77_buff_append_byte(&z->buff_chars,zl77_get_char(z,i));
|
||
// DBG_LOG("char(%c);",zl77_get_char(z,i));
|
||
}else{
|
||
zl77_buff_append_byte(&z->buff_pos,((z->cmp_pos&0xf)<<4)|(z->cmp_len&0xf));
|
||
// DBG_LOG("pos(%d,%d);",z->cmp_pos,z->cmp_len);
|
||
if((z->cmp_pos|z->cmp_len)==0){
|
||
exit(1);
|
||
}
|
||
}
|
||
zl77_buff_append_bit(&z->buff_bits,ret);
|
||
i+=z->cmp_skip;
|
||
}
|
||
// DBG_LOG("\n");
|
||
// zl77_buff_print(&z->buff_chars);
|
||
// zl77_buff_print(&z->buff_pos);
|
||
// zl77_buff_print(&z->buff_bits);
|
||
uint32_t size_chars=z->buff_chars.used;
|
||
uint32_t size_pos=z->buff_pos.used;
|
||
uint32_t size_bits=z->buff_bits.used;
|
||
uint32_t size_unpack=z->in_len;
|
||
int index=0;
|
||
(*out_len)=16+size_chars+size_pos+size_bits;
|
||
(*out)=calloc(*out_len,sizeof(uint8_t));
|
||
zl77_append_u32(*out,&index,size_chars);
|
||
zl77_append_u32(*out,&index,size_pos);
|
||
zl77_append_u32(*out,&index,size_bits);
|
||
zl77_append_u32(*out,&index,size_unpack);
|
||
for(int i=0;i<size_chars;i++){
|
||
(*out)[index++]=zl77_buff_get_byte(&z->buff_chars,i);
|
||
}
|
||
for(int i=0;i<size_pos;i++){
|
||
(*out)[index++]=zl77_buff_get_byte(&z->buff_pos,i);
|
||
}
|
||
for(int i=0;i<size_bits;i++){
|
||
(*out)[index++]=zl77_buff_get_byte(&z->buff_bits,i);
|
||
}
|
||
zl77_del_buff(&z->buff_chars);
|
||
zl77_del_buff(&z->buff_pos);
|
||
zl77_del_buff(&z->buff_bits);
|
||
free(z);
|
||
|
||
DBG_LOG("in_len=%d,out_len=%d\n",in_len,*out_len);
|
||
return 0;
|
||
}
|
||
|
||
|
||
static inline int zl77_get_bit(const uint8_t *data,int index){
|
||
uint8_t c=data[index/8];
|
||
return c&(1<<(index%8))?1:0;
|
||
}
|
||
|
||
int zl77_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len)
|
||
{
|
||
int ret;
|
||
int index_chars=0;
|
||
int index_pos=0;
|
||
int index_bits=0;
|
||
uint8_t cmp_pos,cmp_len,ch;
|
||
zl77_def *z=zl77_creat();
|
||
uint32_t size_chars=zl77_get_u32(in,0);
|
||
uint32_t size_pos=zl77_get_u32(in,4);
|
||
uint32_t size_bits=zl77_get_u32(in,8);
|
||
uint32_t size_unpack=zl77_get_u32(in,12);
|
||
const uint8_t *chars=in+16;
|
||
const uint8_t *pos=in+16+size_chars;
|
||
const uint8_t *bits=in+16+size_chars+size_pos;
|
||
(*out)=calloc(size_unpack+1,sizeof(uint8_t));
|
||
for(int i=0;i<size_unpack;){
|
||
ret=zl77_get_bit(bits,index_bits);
|
||
index_bits++;
|
||
// DBG_LOG("index:%d,bit=%d\n",index_bits,ret);
|
||
if(ret){
|
||
ch=chars[index_chars++];
|
||
(*out)[i++]=ch;
|
||
// DBG_LOG("char(%c)",ch);
|
||
}else{
|
||
cmp_pos=pos[index_pos]>>4;
|
||
cmp_len=pos[index_pos]&0xf;index_pos++;
|
||
// DBG_LOG("pos(%d,%d)",cmp_pos,cmp_len);
|
||
memcpy(&(*out)[i],&(*out)[i-cmp_pos],cmp_len);
|
||
i+=cmp_len;
|
||
}
|
||
}
|
||
// DBG_LOG("\n");
|
||
free(z);
|
||
return 0;
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
void main(int argc,const char *argv[])
|
||
{
|
||
uint8_t *encode_data=0;
|
||
int encode_len=0;
|
||
uint8_t *decode_data=0;
|
||
int decode_len=0;
|
||
hm_encode(argv[1],strlen(argv[1]),&encode_data,&encode_len);
|
||
// for(int i=0;i<encode_len;i++)
|
||
// {
|
||
// DBG_LOG("%02x,",encode_data[i]);
|
||
// }
|
||
// DBG_LOG("\n");
|
||
hm_encode(encode_data,encode_len,&decode_data,&decode_len);
|
||
// zl77_decode(encode_data,encode_len,&decode_data,&decode_len);
|
||
// printf("decode:%s\n",decode_data);
|
||
}
|
||
|
||
|
||
|
||
|