添加lzw压缩算法

This commit is contained in:
ranchuan
2023-12-02 11:52:15 +08:00
parent 9e46a19283
commit a556d0a141
31 changed files with 1593 additions and 2 deletions

2
.gitignore vendored
View File

@@ -2,4 +2,4 @@
.vs/ .vs/
*.exe *.exe
*.pkt *.pkt
zlib-1.3/

View File

@@ -1,6 +1,7 @@
{ {
"files.associations": { "files.associations": {
"coder_lib.h": "c", "coder_lib.h": "c",
"huffman_.h": "c" "huffman_.h": "c",
"random": "c"
} }
} }

BIN
hello.exe

Binary file not shown.

23
lzw-ab/.vscode/c_cpp_properties.json vendored Normal file
View File

@@ -0,0 +1,23 @@
{
"configurations": [
{
"name": "Win32",
"includePath": [
"${workspaceFolder}/**",
"C:/MinGW/include/"
],
"defines": [
"_DEBUG",
"UNICODE",
"_UNICODE",
"__WIN32__",
"DLL_EXPORT"
],
"cStandard": "c17",
"cppStandard": "gnu++17",
"intelliSenseMode": "windows-gcc-x64",
"compilerPath": "C:/MinGW/bin/gcc.exe"
}
],
"version": 4
}

6
lzw-ab/.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,6 @@
{
"files.associations": {
"coder_lib.h": "c",
"huffman_.h": "c"
}
}

15
lzw-ab/Makefile Normal file
View File

@@ -0,0 +1,15 @@
CC = gcc
SRCS = $(wildcard *.c)
STR = $(subst from,to,from your heart)
all:
$(CC) $(SRCS) -o hello
clean:
rm -rf *.exe

83
lzw-ab/README Normal file
View File

@@ -0,0 +1,83 @@
////////////////////////////////////////////////////////////////////////////
// **** LZW-AB **** //
// Adjusted Binary LZW Compressor/Decompressor //
// Copyright (c) 2016-2020 David Bryant //
// All Rights Reserved //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
This is an implementation of the Lempel-Ziv-Welch general-purpose data
compression algorithm. It is targeted at embedded applications that require
high speed compression or decompression facilities where lots of RAM for
large dictionaries might not be available. I have used this in several
projects for storing compressed firmware images, and once I even coded the
decompressor in Z-80 assembly language for speed! Depending on the maximum
symbol size selected, the implementation can require from 2368 to 335616
bytes of RAM for decoding (and about half again more for encoding).
This is a streaming compressor in that the data is not divided into blocks
and no context information like dictionaries or Huffman tables are sent
ahead of the compressed data (except for one byte to signal the maximum
bit depth). This limits the maximum possible compression ratio compared to
algorithms that significantly preprocess the data, but with the help of
some enhancements to the LZW algorithm (described below) it is able to
compress better than the UNIX "compress" utility (which is also LZW) and
is in fact closer to and sometimes beats the compression level of "gzip".
The symbols are stored in "adjusted binary" which provides somewhat better
compression (with virtually no speed penalty) compared to the fixed word
sizes normally used. Once the dictionary is full, the encoder returns to
the beginning and recycles string codes that have not been used yet for
longer strings. In this way the dictionary constantly "churns" based on the
the incoming stream, thereby improving and adapting to optimal compression.
The compression performance is constantly monitored and a dictionary flush
is forced on stretches of negative compression which limits worst-case
performance to about 8% inflation.
LZW-AB consists of three standard C files: the library, a command-line
filter demo using pipes, and a command-line test harness. Each program
builds with a single command on most platforms. It has been designed with
maximum portability in mind and should work correctly on big-endian as well
as little-endian machines.
Linux:
% gcc -O3 lzwfilter.c lzwlib.c -o lzwfilter
% gcc -O3 lzwtester.c lzwlib.c -o lzwtester
Darwin/Mac:
% clang -O3 lzwfilter.c lzwlib.c -o lzwfilter
% clang -O3 lzwtester.c lzwlib.c -o lzwtester
MS Visual Studio:
cl -O2 lzwfilter.c lzwlib.c
cl -O2 lzwtester.c lzwlib.c
There are Windows binaries (built on MinGW) for the filter and the tester on the
GitHub release page (v3). The "help" display for the filter looks like this:
Usage: lzwfilter [-options] [< infile] [> outfile]
Operation: compression is default, use -d to decompress
Options: -d = decompress
-h = display this "help" message
-1 = maximum symbol size = 9 bits
-2 = maximum symbol size = 10 bits
-3 = maximum symbol size = 11 bits
-4 = maximum symbol size = 12 bits
-5 = maximum symbol size = 13 bits
-6 = maximum symbol size = 14 bits
-7 = maximum symbol size = 15 bits
-8 = maximum symbol size = 16 bits (default)
-v = verbose (display ratio and checksum)
Here's the "help" display for the tester:
Usage: lzwtester [options] file [...]
Options: -1 ... -8 = test using only specified max symbol size (9 - 16)
-0 = cycle through all maximum symbol sizes (default)
-e = exhaustive test (by successive truncation)
-f = fuzz test (randomly corrupt compressed data)
-q = quiet mode (only reports errors and summary)

25
lzw-ab/license.txt Normal file
View File

@@ -0,0 +1,25 @@
Copyright (c) David Bryant
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Conifer Software nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

191
lzw-ab/lzwfilter.ctt Normal file
View File

@@ -0,0 +1,191 @@
////////////////////////////////////////////////////////////////////////////
// **** LZW-AB **** //
// Adjusted Binary LZW Compressor/Decompressor //
// Copyright (c) 2016-2020 David Bryant //
// All Rights Reserved //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifdef _WIN32
#include <fcntl.h>
#endif
#include "lzwlib.h"
/* This module provides a command-line filter for testing the lzw library.
* It can also optionally calculate and display the compression ratio and
* a simple checksum for informational purposes. Other command-line
* arguments select decoding mode or the maximum symbol size (9 to 16 bits)
* for encoding.
*/
static const char *usage =
" Usage: lzwfilter [-options] [< infile] [> outfile]\n\n"
" Operation: compression is default, use -d to decompress\n\n"
" Options: -d = decompress\n"
" -h = display this \"help\" message\n"
" -1 = maximum symbol size = 9 bits\n"
" -2 = maximum symbol size = 10 bits\n"
" -3 = maximum symbol size = 11 bits\n"
" -4 = maximum symbol size = 12 bits\n"
" -5 = maximum symbol size = 13 bits\n"
" -6 = maximum symbol size = 14 bits\n"
" -7 = maximum symbol size = 15 bits\n"
" -8 = maximum symbol size = 16 bits (default)\n"
" -v = verbose (display ratio and checksum)\n\n"
" Web: Visit www.github.com/dbry/lzw-ab for latest version and info\n\n";
typedef struct {
unsigned char buffer [65536];
int checksum, head, tail;
size_t byte_count;
} streamer;
static int read_buff (void *ctx)
{
streamer *stream = ctx;
int value;
if (stream->head == stream->tail)
stream->tail = (stream->head = 0) + fread (stream->buffer, 1, sizeof (stream->buffer), stdin);
if (stream->head < stream->tail) {
value = stream->buffer [stream->head++];
stream->checksum = stream->checksum * 3 + (unsigned char) value;
stream->byte_count++;
}
else
value = EOF;
return value;
}
static void write_buff (int value, void *ctx)
{
streamer *stream = ctx;
if (value == EOF) {
fwrite (stream->buffer, 1, stream->head, stdout);
return;
}
stream->buffer [stream->head++] = value;
if (stream->head == sizeof (stream->buffer)) {
fwrite (stream->buffer, 1, stream->head, stdout);
stream->head = 0;
}
stream->checksum = stream->checksum * 3 + (unsigned char) value;
stream->byte_count++;
}
int main (int argc, char **argv)
{
int decompress = 0, maxbits = 16, verbose = 0, error = 0;
streamer reader, writer;
memset (&reader, 0, sizeof (reader));
memset (&writer, 0, sizeof (writer));
reader.checksum = writer.checksum = -1;
while (--argc) {
if ((**++argv == '-') && (*argv)[1])
while (*++*argv)
switch (**argv) {
case '1':
maxbits = 9;
break;
case '2':
maxbits = 10;
break;
case '3':
maxbits = 11;
break;
case '4':
maxbits = 12;
break;
case '5':
maxbits = 13;
break;
case '6':
maxbits = 14;
break;
case '7':
maxbits = 15;
break;
case '8':
maxbits = 16;
break;
case 'D': case 'd':
decompress = 1;
break;
case 'H': case 'h':
fprintf (stderr, "%s", usage);
return 0;
break;
case 'V': case 'v':
verbose = 1;
break;
default:
fprintf (stderr, "illegal option: %c !\n", **argv);
error = 1;
break;
}
else {
fprintf (stderr, "unknown argument: %s\n", *argv);
error = 1;
}
}
if (error) {
fprintf (stderr, "%s", usage);
return 0;
}
#ifdef _WIN32
setmode (fileno (stdin), O_BINARY);
setmode (fileno (stdout), O_BINARY);
#endif
if (decompress) {
if (lzw_decompress (write_buff, &writer, read_buff, &reader)) {
fprintf (stderr, "lzw_decompress() returned non-zero!\n");
return 1;
}
write_buff (EOF, &writer);
if (verbose && writer.byte_count)
fprintf (stderr, "output checksum = %x, ratio = %.2f%%\n", writer.checksum, reader.byte_count * 100.0 / writer.byte_count);
}
else {
if (lzw_compress (write_buff, &writer, read_buff, &reader, maxbits)) {
fprintf (stderr, "lzw_compress() returned non-zero!\n");
return 1;
}
write_buff (EOF, &writer);
if (verbose && reader.byte_count)
fprintf (stderr, "source checksum = %x, ratio = %.2f%%\n", reader.checksum, writer.byte_count * 100.0 / reader.byte_count);
}
return 0;
}

513
lzw-ab/lzwlib.c Normal file
View File

@@ -0,0 +1,513 @@
////////////////////////////////////////////////////////////////////////////
// **** LZW-AB **** //
// Adjusted Binary LZW Compressor/Decompressor //
// Copyright (c) 2016-2020 David Bryant //
// All Rights Reserved //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lzwlib.h"
/* This library implements the LZW general-purpose data compression algorithm.
* The algorithm was originally described as a hardware implementation by
* Terry Welsh here:
*
* Welch, T.A. “A Technique for High-Performance Data Compression.”
* IEEE Computer 17,6 (June 1984), pp. 8-19.
*
* Since then there have been enumerable refinements and variations on the
* basic technique, and this implementation is no different. The target of
* the present implementation is embedded systems, and so emphasis was placed
* on simplicity, fast execution, and minimal RAM usage.
*
* This is a streaming compressor in that the data is not divided into blocks
* and no context information like dictionaries or Huffman tables are sent
* ahead of the compressed data (except for one byte to signal the maximum
* bit depth). This limits the maximum possible compression ratio compared to
* algorithms that significantly preprocess the data, but with the help of
* some enhancements to the LZW algorithm (described below) it is able to
* compress better than the UNIX "compress" utility (which is also LZW) and
* is in fact closer to and sometimes beats the compression level of "gzip".
*
* The symbols are stored in "adjusted binary" which provides somewhat better
* compression, with virtually no speed penalty, compared to the fixed word
* sizes normally used. These are sometimes called "phased-in" binary codes
* and their use in LZW is described here:
*
* R. N. Horspool, "Improving LZW (data compression algorithm)", Data
* Compression Conference, pp. 332-341, 1991.
*
* Earlier versions of this compressor would reset as soon as the dictionary
* became full to ensure good performance on heterogenous data (such as tar
* files or executable images). While trivial to implement, this is not
* particularly efficient with homogeneous data (or in general) because we
* spend a lot of time sending short symbols where the compression is poor.
*
* This newer version utilizes a technique such that once the dictionary is
* full, we restart at the beginning and recycle only those codes that were
* seen only once. We know this because they are not referenced by longer
* strings, and are easy to replace in the dictionary for the same reason.
* Since they have only been seen once it's also more likely that we will
* be replacing them with a more common string, and this is especially
* true if the data characteristics are changing.
*
* Replacing string codes in this manner has the interesting side effect that
* some older shorter strings that the removed strings were based on will
* possibly become unreferenced themselves and be recycled on the next pass.
* In this way, the entire dictionary constantly "churns" based on the
* incoming stream, thereby improving and adapting to optimal compression.
*
* Even with this technique there is still a possibility that a sudden change
* in the data characteristics will appear, resulting in significant negative
* compression (up to 100% for 16-bit codes). To detect this case we generate
* an exponentially decaying average of the current compression ratio and reset
* when this hits about 1.06, which limits worst case inflation to about 8%.
*
* The maximum symbol size is configurable on the encode side (from 9 bits to
* 16 bits) and determines the RAM footprint required by both sides and, to a
* large extent, the compression performance. This information is communicated
* to the decoder in the first stream byte so that it can allocate accordingly.
* The RAM requirements are as follows:
*
* maximum encoder RAM decoder RAM
* symbol size requirement requirement
* -----------------------------------------
* 9-bit 4096 bytes 2368 bytes
* 10-bit 8192 bytes 4992 bytes
* 11-bit 16384 bytes 10240 bytes
* 12-bit 32768 bytes 20736 bytes
* 13-bit 65536 bytes 41728 bytes
* 14-bit 131072 bytes 83712 bytes
* 15-bit 262144 bytes 167680 bytes
* 16-bit 524288 bytes 335616 bytes
*
* This implementation uses malloc(), but obviously an embedded version could
* use static arrays instead if desired (assuming that the maxbits was
* controlled outside).
*/
#define NULL_CODE 65535 // indicates a NULL prefix (must be unsigned short)
#define CLEAR_CODE 256 // code to flush dictionary and restart decoder
#define FIRST_STRING 257 // code of first dictionary string
/* This macro determines the number of bits required to represent the given value,
* not counting the implied MSB. For GNU C it will use the provided built-in,
* otherwise a comparison tree is employed. Note that in the non-GNU case, only
* values up to 65535 (15 bits) are supported.
*/
#ifdef __GNUC__
#define CODE_BITS(n) (31 - __builtin_clz(n))
#else
#define CODE_BITS(n) ((n) < 4096 ? \
((n) < 1024 ? 8 + ((n) >= 512) : 10 + ((n) >= 2048)) : \
((n) < 16384 ? 12 + ((n) >= 8192) : 14 + ((n) >= 32768)))
#endif
/* This macro writes the adjusted-binary symbol "code" given the maximum
* symbol "maxcode". A macro is used here just to avoid the duplication in
* the lzw_compress() function. The idea is that if "maxcode" is not one
* less than a power of two (which it rarely will be) then this code can
* often send fewer bits that would be required with a fixed-sized code.
*
* For example, the first code we send will have a "maxcode" of 257, so
* every "code" would normally consume 9 bits. But with adjusted binary we
* can actually represent any code from 0 to 253 with just 8 bits -- only
* the 4 codes from 254 to 257 take 9 bits.
*/
#define WRITE_CODE(code,maxcode) do { \
unsigned int code_bits = CODE_BITS (maxcode); \
unsigned int extras = (2 << code_bits) - (maxcode) - 1; \
if ((code) < extras) { \
shifter |= ((code) << bits); \
bits += code_bits; \
} \
else { \
shifter |= ((((code) + extras) >> 1) << bits); \
bits += code_bits; \
shifter |= ((((code) + extras) & 1) << bits++); \
} \
do { (*dst)(shifter,dstctx); shifter >>= 8; \
output_bytes += 256; \
} while ((bits -= 8) >= 8); \
} while (0)
/* LZW compression function. Bytes (8-bit) are read and written through callbacks and the
* "maxbits" parameter specifies the maximum symbol size (9-16), which in turn determines
* the RAM requirement and, to a large extent, the level of compression achievable. A return
* value of EOF from the "src" callback terminates the compression process. A non-zero return
* value indicates one of the two possible errors -- bad "maxbits" param or failed malloc().
* There are contexts (void pointers) that are passed to the callbacks to easily facilitate
* multiple instances of the compression operation (but simple applications can ignore these).
*/
typedef struct {
unsigned short first_reference, next_reference, back_reference;
unsigned char terminator;
} encoder_entry_t;
int lzw_compress (void (*dst)(int,void*), void *dstctx, int (*src)(void*), void *srcctx, int maxbits)
{
unsigned int maxcode = FIRST_STRING, next_string = FIRST_STRING, prefix = NULL_CODE, total_codes;
unsigned int dictionary_full = 0, available_entries, max_available_entries, max_available_code;
unsigned int input_bytes = 65536, output_bytes = 65536;
unsigned int shifter = 0, bits = 0;
encoder_entry_t *dictionary;
int c;
if (maxbits < 9 || maxbits > 16) // check for valid "maxbits" setting
return 1;
// based on the "maxbits" parameter, compute total codes and allocate dictionary storage
total_codes = 1 << maxbits;
dictionary = malloc (total_codes * sizeof (encoder_entry_t));
max_available_entries = total_codes - FIRST_STRING - 1;
max_available_code = total_codes - 2;
if (!dictionary)
return 1; // failed malloc()
// clear the dictionary
available_entries = max_available_entries;
memset (dictionary, 0, 256 * sizeof (encoder_entry_t));
(*dst)(maxbits - 9, dstctx); // first byte in output stream indicates the maximum symbol bits
// This is the main loop where we read input bytes and compress them. We always keep track of the
// "prefix", which represents a pending byte (if < 256) or string entry (if >= FIRST_STRING) that
// has not been sent to the decoder yet. The output symbols are kept in the "shifter" and "bits"
// variables and are sent to the output every time 8 bits are available (done in the macro).
while ((c = (*src)(srcctx)) != EOF) {
unsigned int cti; // coding table index
input_bytes += 256;
if (prefix == NULL_CODE) { // this only happens the very first byte when we don't yet have a prefix
prefix = c;
continue;
}
memset (dictionary + next_string, 0, sizeof (encoder_entry_t));
if ((cti = dictionary [prefix].first_reference)) { // if any longer strings are built on the current prefix...
while (1)
if (dictionary [cti].terminator == c) { // we found a matching string, so we just update the prefix
prefix = cti; // to that string and continue without sending anything
break;
}
else if (!dictionary [cti].next_reference) { // this string did not match the new character and
dictionary [cti].next_reference = next_string; // there aren't any more, so we'll add a new string,
// point to it with "next_reference", and also make the
dictionary [next_string].back_reference = cti; // "back_reference" which is used for recycling entries
cti = 0;
break;
}
else
cti = dictionary [cti].next_reference; // there are more possible matches to check, so loop back
}
else { // no longer strings are based on the current prefix, so now
dictionary [prefix].first_reference = next_string; // the current prefix plus the new byte will be the next string
dictionary [next_string].back_reference = prefix; // also make the back_reference used for recycling
if (prefix >= FIRST_STRING) available_entries--; // the codes 0-255 are never available for recycling
}
// If "cti" is zero, we could not simply extend our "prefix" to a longer string because we did not find a
// dictionary match, so we send the symbol representing the current "prefix" and add the new string to the
// dictionary. Since the current byte "c" was not included in the prefix, that now becomes our new prefix.
if (!cti) {
WRITE_CODE (prefix, maxcode); // send symbol for current prefix (0 to maxcode-1)
dictionary [next_string].terminator = c; // newly created string has current byte as the terminator
prefix = c; // current byte also becomes new prefix for next string
// If the dictionary is not full yet, we bump the maxcode and next_string and check to see if the
// dictionary is now full. If it is we set the dictionary_full flag and leave maxcode set to two
// less than total_codes because every string entry is now available for matching, but the actual
// maximum code is reserved for EOF.
if (!dictionary_full) {
dictionary_full = (++next_string > max_available_code);
maxcode++;
}
// If the dictionary is full we look for an entry to recycle starting at next_string (the one we
// just created or recycled) plus one (with check for wrap check). We know there is one because at
// a minimum the string we just added. This also takes care of removing the entry to be recycled
// (which is possible/easy because no longer strings have been based on it).
if (dictionary_full) {
for (next_string++; next_string <= max_available_code || (next_string = FIRST_STRING); next_string++)
if (!dictionary [next_string].first_reference)
break;
cti = dictionary [next_string].back_reference; // dictionary [cti] references the entry we're
// trying to recycle (either as a first or a next)
if (dictionary [cti].first_reference == next_string) {
dictionary [cti].first_reference = dictionary [next_string].next_reference;
// if we just cleared a first reference, and that string is not 0-255,
// then that's a newly available entry
if (!dictionary [cti].first_reference && cti >= FIRST_STRING)
available_entries++;
}
else if (dictionary [cti].next_reference == next_string) // fixup a "next_reference"
dictionary [cti].next_reference = dictionary [next_string].next_reference;
// If the entry we're recycling had a next reference, then update the back reference
// so it's completely out of the chain. Of course we know it didn't have a first
// reference because then we wouldn't be recycling it.
if (dictionary [next_string].next_reference)
dictionary [dictionary [next_string].next_reference].back_reference = cti;
// This check is technically not needed because there will always be an available entry
// (the last string we added at a minimum) but we don't want to get in a situation where
// we only have a few entries that we're cycling though. I pulled the limits (16 entries
// or 1% of total) out of a hat.
if (available_entries < 16 || available_entries * 100 < max_available_entries) {
// clear the dictionary and reset the byte counters -- basically everything starts over
// except that we keep the last pending "prefix" (which, of course, was never sent)
WRITE_CODE (CLEAR_CODE, maxcode);
memset (dictionary, 0, 256 * sizeof (encoder_entry_t));
available_entries = max_available_entries;
next_string = maxcode = FIRST_STRING;
input_bytes = output_bytes = 65536;
dictionary_full = 0;
}
}
// This is similar to the above check, except that it's used whether the dictionary is full or not.
// It uses an exponentially decaying average of the current compression ratio, so it can terminate
// very early if the incoming data is uncompressible or it can terminate any later time that the
// dictionary no longer compresses the incoming stream.
if (output_bytes > input_bytes + (input_bytes >> 4)) {
WRITE_CODE (CLEAR_CODE, maxcode);
memset (dictionary, 0, 256 * sizeof (encoder_entry_t));
available_entries = max_available_entries;
next_string = maxcode = FIRST_STRING;
input_bytes = output_bytes = 65536;
dictionary_full = 0;
}
else {
output_bytes -= output_bytes >> 8;
input_bytes -= input_bytes >> 8;
}
}
}
// we're done with input, so if we've received anything we still need to send that pesky pending prefix...
if (prefix != NULL_CODE) {
WRITE_CODE (prefix, maxcode);
if (!dictionary_full)
maxcode++;
}
WRITE_CODE (maxcode, maxcode); // the maximum possible code is always reserved for our END_CODE
if (bits) // finally, flush any pending bits from the shifter
(*dst)(shifter, dstctx);
free (dictionary);
return 0;
}
/* LZW decompression function. Bytes (8-bit) are read and written through callbacks. The
* "maxbits" parameter is read as the first byte in the stream and controls how much memory
* is allocated for decoding. A return value of EOF from the "src" callback terminates the
* decompression process (although this should not normally occur). A non-zero return value
* indicates an error, which in this case can be a bad "maxbits" read from the stream, a
* failed malloc(), or if an EOF is read from the input stream before the decompression
* terminates naturally with END_CODE. There are contexts (void pointers) that are passed
* to the callbacks to easily facilitate multiple instances of the decompression operation
* (but simple applications can ignore these).
*/
typedef struct {
unsigned char terminator, extra_references;
unsigned short prefix;
} decoder_entry_t;
int lzw_decompress (void (*dst)(int,void*), void *dstctx, int (*src)(void*), void *srcctx)
{
unsigned int maxcode = FIRST_STRING, next_string = FIRST_STRING - 1, prefix = CLEAR_CODE;
unsigned int dictionary_full = 0, max_available_code, total_codes;
unsigned int shifter = 0, bits = 0, read_byte, i;
unsigned char *reverse_buffer, *referenced;
decoder_entry_t *dictionary;
if ((read_byte = ((*src)(srcctx))) == EOF || (read_byte & 0xf8)) //sanitize first byte
return 1;
// based on the "maxbits" parameter, compute total codes and allocate dictionary storage
total_codes = 512 << (read_byte & 0x7);
max_available_code = total_codes - 2;
dictionary = malloc (total_codes * sizeof (decoder_entry_t));
reverse_buffer = malloc (total_codes - 256);
referenced = malloc (total_codes / 8); // bitfield indicating code is referenced at least once
// Note that to implement the dictionary entry recycling we have to keep track of how many
// longer strings are based on each string in the dictionary. This can be between 0 (no
// references) to 256 (every possible next byte), but unfortunately that's one more value
// than what can be stored in a byte. The solution is to have a single bit for each entry
// indicating any references (i.e., the code cannot be recycled) and an additional byte
// in the dictionary entry struct counting the "extra" references (beyond one).
if (!reverse_buffer || !dictionary) // check for malloc() failure
return 1;
for (i = 0; i < 256; ++i) { // these never change
dictionary [i].prefix = NULL_CODE;
dictionary [i].terminator = i;
}
// This is the main loop where we read input symbols. The values range from 0 to the code value
// of the "next" string in the dictionary (although the actual "next" code cannot be used yet,
// and so we reserve that code for the END_CODE). Note that receiving an EOF from the input
// stream is actually an error because we should have gotten the END_CODE first.
while (1) {
unsigned int code_bits = CODE_BITS (maxcode), code;
unsigned int extras = (2 << code_bits) - maxcode - 1;
do {
if ((read_byte = ((*src)(srcctx))) == EOF) {
free (dictionary); free (reverse_buffer); free (referenced);
return 1;
}
shifter |= read_byte << bits;
} while ((bits += 8) < code_bits);
// first we assume the code will fit in the minimum number of required bits
code = shifter & ((1 << code_bits) - 1);
shifter >>= code_bits;
bits -= code_bits;
// but if code >= extras, then we need to read another bit to calculate the real code
// (this is the "adjusted binary" part)
if (code >= extras) {
if (!bits) {
if ((read_byte = ((*src)(srcctx))) == EOF) {
free (dictionary); free (reverse_buffer); free (referenced);
return 1;
}
shifter = read_byte;
bits = 8;
}
code = (code << 1) - extras + (shifter & 1);
shifter >>= 1;
bits--;
}
if (code == maxcode) // sending the maximum code is reserved for the end of the file
break;
else if (code == CLEAR_CODE) { // otherwise check for a CLEAR_CODE to start over early
next_string = FIRST_STRING - 1;
maxcode = FIRST_STRING;
dictionary_full = 0;
}
else if (prefix == CLEAR_CODE) { // this only happens at the first symbol which is always sent
(*dst)(code, dstctx); // literally and becomes our initial prefix
next_string++;
maxcode++;
}
// Otherwise we have a valid prefix so we step through the string from end to beginning storing the
// bytes in the "reverse_buffer", and then we send them out in the proper order. One corner-case
// we have to handle here is that the string might be the same one that is actually being defined
// now (code == next_string).
else {
unsigned int cti = (code == next_string) ? prefix : code;
unsigned char *rbp = reverse_buffer, c;
do {
*rbp++ = dictionary [cti].terminator;
if (rbp == reverse_buffer + total_codes - 256) {
free (dictionary); free (reverse_buffer); free (referenced);
return 1;
}
} while ((cti = dictionary [cti].prefix) != NULL_CODE);
c = *--rbp; // the first byte in this string is the terminator for the last string, which is
// the one that we'll create a new dictionary entry for this time
do // send string in corrected order (except for the terminator which we don't know yet)
(*dst)(*rbp, dstctx);
while (rbp-- != reverse_buffer);
if (code == next_string) {
(*dst)(c,dstctx);
}
// This should always execute (the conditional is to catch corruptions) and is where we add a new string to
// the dictionary, either at the end or elsewhere when we are "recycling" entries that were never referenced
if (next_string >= FIRST_STRING && next_string < total_codes) {
if (referenced [prefix >> 3] & (1 << (prefix & 7))) // increment reference count on prefix
dictionary [prefix].extra_references++;
else
referenced [prefix >> 3] |= 1 << (prefix & 7);
dictionary [next_string].prefix = prefix; // now update the next dictionary entry with the new string
dictionary [next_string].terminator = c; // (but we're always one behind, so it's not the string just sent)
dictionary [next_string].extra_references = 0; // newly created string has not been referenced
referenced [next_string >> 3] &= ~(1 << (next_string & 7));
}
// If the dictionary is not full yet, we bump the maxcode and next_string and check to see if the
// dictionary is now full. If it is we set the dictionary_full flag and set next_string back to the
// beginning of the dictionary strings to start recycling them. Note that then maxcode will remain
// two less than total_codes because every string entry is available for matching, and the actual
// maximum code is reserved for EOF.
if (!dictionary_full) {
maxcode++;
if (++next_string > max_available_code) {
dictionary_full = 1;
maxcode--;
}
}
// If the dictionary is full we look for an entry to recycle starting at next_string (the one we
// created or recycled) plus one. We know there is one because at a minimum the string we just added
// has not been referenced). This also takes care of removing the entry to be recycled (which is
// possible/easy because no longer strings have been based on it).
if (dictionary_full) {
for (next_string++; next_string <= max_available_code || (next_string = FIRST_STRING); next_string++)
if (!(referenced [next_string >> 3] & (1 << (next_string & 7))))
break;
if (dictionary [dictionary [next_string].prefix].extra_references)
dictionary [dictionary [next_string].prefix].extra_references--;
else
referenced [dictionary [next_string].prefix >> 3] &= ~(1 << (dictionary [next_string].prefix & 7));
}
}
prefix = code; // the code we just received becomes the prefix for the next dictionary string entry
// (which we'll create once we find out the terminator)
}
free (dictionary); free (reverse_buffer); free (referenced);
return 0;
}

15
lzw-ab/lzwlib.h Normal file
View File

@@ -0,0 +1,15 @@
////////////////////////////////////////////////////////////////////////////
// **** LZW-AB **** //
// Adjusted Binary LZW Compressor/Decompressor //
// Copyright (c) 2016-2020 David Bryant //
// All Rights Reserved //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
#ifndef LZWLIB_H_
#define LZWLIB_H_
int lzw_compress (void (*dst)(int,void*), void *dstctx, int (*src)(void*), void *srcctx, int maxbits);
int lzw_decompress (void (*dst)(int,void*), void *dstctx, int (*src)(void*), void *srcctx);
#endif /* LZWLIB_H_ */

317
lzw-ab/lzwtester.c Normal file
View File

@@ -0,0 +1,317 @@
////////////////////////////////////////////////////////////////////////////
// **** LZW-AB **** //
// Adjusted Binary LZW Compressor/Decompressor //
// Copyright (c) 2016-2020 David Bryant //
// All Rights Reserved //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#include "lzwlib.h"
/* This module provides a command-line test harness for the lzw library.
* Given a list of files, it will read each one and byte-for-byte verify
* the data after a round-trip through a compression / decompression cycle
* at each of the 8 available maximum symbol size settings.
*
* It can also optionally perform fuzz testing by randomly corrupting the
* compressed bitstream. Obviously this will introduce integrity failures,
* but it should not cause a crash. It also has an "exhaustive" mode that
* creates hundreds of simulated images from each input file by successive
* truncation from both ends.
*/
static const char *usage =
" Usage: lzwtester [options] file [...]\n\n"
" Options: -1 ... -8 = test using only specified max symbol size (9 - 16)\n"
" -0 = cycle through all maximum symbol sizes (default)\n"
" -e = exhaustive test (by successive truncation)\n"
" -f = fuzz test (randomly corrupt compressed data)\n"
" -q = quiet mode (only reports errors and summary)\n\n"
" Web: Visit www.github.com/dbry/lzw-ab for latest version and info\n\n";
typedef struct {
unsigned int size, index, wrapped, byte_errors, first_error, fuzz_testing;
unsigned char *buffer;
} streamer;
static int read_buff (void *ctx)
{
streamer *stream = ctx;
if (stream->index == stream->size)
return EOF;
return stream->buffer [stream->index++];
}
static void write_buff (int value, void *ctx)
{
streamer *stream = ctx;
// for fuzz testing, randomly corrupt 1 byte in every 65536 (on average)
if (stream->fuzz_testing) {
static unsigned long long kernel = 0x3141592653589793;
kernel = ((kernel << 4) - kernel) ^ 1;
kernel = ((kernel << 4) - kernel) ^ 1;
kernel = ((kernel << 4) - kernel) ^ 1;
if (!(kernel >> 48))
value ^= (int)(kernel >> 40);
}
if (stream->index == stream->size) {
stream->index = 0;
stream->wrapped++;
}
stream->buffer [stream->index++] = value;
}
static void check_buff (int value, void *ctx)
{
streamer *stream = ctx;
if (stream->index == stream->size) {
stream->wrapped++;
return;
}
if (stream->buffer [stream->index] != value)
if (!stream->byte_errors++)
stream->first_error = stream->index;
stream->index++;
}
#ifdef _WIN32
long long DoGetFileSize (FILE *hFile)
{
LARGE_INTEGER Size;
HANDLE fHandle;
if (hFile == NULL)
return 0;
fHandle = (HANDLE)_get_osfhandle(_fileno(hFile));
if (fHandle == INVALID_HANDLE_VALUE)
return 0;
Size.u.LowPart = GetFileSize(fHandle, &Size.u.HighPart);
if (Size.u.LowPart == INVALID_FILE_SIZE && GetLastError() != NO_ERROR)
return 0;
return (long long)Size.QuadPart;
}
#else
long long DoGetFileSize (FILE *hFile)
{
struct stat statbuf;
if (!hFile || fstat (fileno (hFile), &statbuf) || !S_ISREG(statbuf.st_mode))
return 0;
return (long long) statbuf.st_size;
}
#endif
int main (int argc, char **argv)
{
int index, checked = 0, tests = 0, skipped = 0, errors = 0;
int set_maxbits = 0, quiet_mode = 0, exhaustive_mode = 0;
long long total_input_bytes = 0, total_output_bytes = 0;
streamer reader, writer, checker;
memset (&reader, 0, sizeof (reader));
memset (&writer, 0, sizeof (writer));
memset (&checker, 0, sizeof (checker));
if (argc < 2) {
printf ("%s", usage);
return 0;
}
for (index = 1; index < argc; ++index) {
const char *filename = argv [index];
int test_size, bytes_read, maxbits;
unsigned char *file_buffer;
long long file_size;
FILE *infile;
if (!strcmp (filename, "-q")) {
quiet_mode = 1;
continue;
}
if (!strcmp (filename, "-e")) {
exhaustive_mode = 1;
continue;
}
if (!strcmp (filename, "-f")) {
writer.fuzz_testing = 1;
continue;
}
if (strlen (filename) == 2 && filename [0] == '-' && filename [1] >= '0' && filename [1] <= '8') {
if (filename [1] > '0')
set_maxbits = filename [1] - '0' + 8;
else
set_maxbits = 0;
continue;
}
infile = fopen (filename, "rb");
if (!infile) {
printf ("\ncan't open file %s!\n", filename);
skipped++;
continue;
}
file_size = DoGetFileSize (infile);
if (!file_size) {
printf ("\ncan't get file size of %s (may be zero)!\n", filename);
skipped++;
continue;
}
if (file_size > 1024LL * 1024LL * 1024LL) {
printf ("\nfile %s is too big!\n", filename);
skipped++;
continue;
}
file_buffer = malloc (file_size);
writer.size = (unsigned int)(file_size * 2 + 10);
writer.buffer = malloc (writer.size);
if (!file_buffer || !writer.buffer) {
printf ("\nfile %s is too big!\n", filename);
if (writer.buffer) free (writer.buffer);
if (file_buffer) free (file_buffer);
skipped++;
continue;
}
bytes_read = fread (file_buffer, 1, (int) file_size, infile);
fclose (infile);
if (bytes_read != (int) file_size) {
printf ("\nfile %s could not be read!\n", filename);
free (writer.buffer);
free (file_buffer);
skipped++;
continue;
}
if (!quiet_mode)
printf ("\n");
test_size = file_size;
checked++;
do {
for (maxbits = set_maxbits ? set_maxbits : 9; maxbits <= (set_maxbits ? set_maxbits : 16); ++maxbits) {
int res, got_error = 0;
reader.buffer = file_buffer + (file_size - test_size) / 2;
reader.size = test_size;
reader.index = writer.index = writer.wrapped = 0;
if (lzw_compress (write_buff, &writer, read_buff, &reader, maxbits)) {
printf ("\nlzw_compress() returned error on file %s, maxbits = %d\n", filename, maxbits);
errors++;
continue;
}
if (writer.wrapped) {
printf ("\nover 100%% inflation on file %s, maxbits = %d!\n", filename, maxbits);
errors++;
continue;
}
checker.buffer = reader.buffer;
checker.size = reader.size;
checker.wrapped = checker.byte_errors = checker.index = 0;
reader.buffer = writer.buffer;
reader.size = writer.index;
reader.index = 0;
res = lzw_decompress (check_buff, &checker, read_buff, &reader);
reader.buffer = checker.buffer;
reader.size = checker.size;
got_error = res || checker.index != checker.size || checker.wrapped || checker.byte_errors;
if (!quiet_mode || got_error)
printf ("file %s, maxbits = %2d: %u bytes --> %u bytes, %.2f%%\n", filename, maxbits,
reader.size, writer.index, writer.index * 100.0 / reader.size);
if (got_error) {
if (res)
printf ("decompressor returned an error\n");
if (!checker.index)
printf ("decompression didn't generate any data\n");
else if (checker.index != checker.size)
printf ("decompression terminated %u bytes early\n", checker.size - checker.index);
else if (checker.wrapped)
printf ("decompression generated %u extra bytes\n", checker.wrapped);
if (checker.byte_errors)
printf ("there were %u byte data errors starting at index %u\n",
checker.byte_errors, checker.first_error);
else if (checker.index != checker.size || checker.wrapped)
printf ("(but the data generated was all correct)\n");
printf ("\n");
errors++;
}
else {
total_input_bytes += reader.size;
total_output_bytes += writer.index;
}
tests++;
if (exhaustive_mode)
test_size -= (test_size + 98) / 100;
}
} while (exhaustive_mode && test_size > 1 && test_size > file_size / 100);
free (writer.buffer);
free (file_buffer);
}
if (errors)
printf ("\n***** %d errors detected in %d tests using %d files (%d skipped) *****\n\n", errors, tests, checked, skipped);
else {
printf ("\nsuccessfully ran %d tests using %d files (%d skipped) with no errors detected\n", tests, checked, skipped);
printf ("cumulative results: %llu bytes --> %llu bytes, %.2f%%\n\n", total_input_bytes, total_output_bytes,
total_output_bytes * 100.0 / total_input_bytes);
}
return errors;
}

View File

View File

BIN
other/lzw-ab-master.zip Normal file

Binary file not shown.

15
zl77/Makefile Normal file
View File

@@ -0,0 +1,15 @@
CC = gcc
# SRCS = $(wildcard *.c)
SRCS = zl77.c ../huffman/huffman_.c
STR = $(subst from,to,from your heart)
all:
$(CC) $(SRCS) -o hello
clean:
rm -rf *.exe

362
zl77/zl77.c Normal file
View File

@@ -0,0 +1,362 @@
#include "zl77.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "../huffman/huffman_.h"
// zl77 算法的实现
#define DBG_WARN printf
#define DBG_LOG printf
// 定义数据缓冲区步长
#define LZ77_BUFF_STEP_SIZE 10
typedef struct _buff_item{
uint8_t data[LZ77_BUFF_STEP_SIZE];
struct _buff_item *next;
struct _buff_item *prev;
}buff_item;
typedef struct _buff_def{
buff_item *current;
buff_item *head;
int used;
int all;
int current_index;
int bit_used;
}buff_def;
typedef struct _zl77_def
{
int dict_len;// 字典长度
int tran_len;// 转换区长度
int index;// 窗口位置
buff_def buff_chars;//字符编码区
buff_def buff_pos;//标号编码区
buff_def buff_bits;//编码类型标识区(1,字符;0,标号)
const uint8_t *in;
int in_len;
uint8_t cmp_pos;// 匹配到的pos距离
uint8_t cmp_len;// 匹配到的长度
uint8_t cmp_skip;// 窗口移动的距离
}zl77_def;
uint8_t zl77_buff_get_byte(buff_def *buff,int index);
void zl77_buff_set_byte(buff_def *buff,int index,uint8_t d);
void zl77_buff_append_bit(buff_def *buff,int bit);
void zl77_buff_append_byte(buff_def *buff, const uint8_t d);
int zl77_buff_get_bit(buff_def *buff, int index);
zl77_def *zl77_creat(void)
{
zl77_def *z=calloc(1,sizeof(zl77_def));
z->dict_len=5;
z->tran_len=3;
}
// 删除缓存
void zl77_del_buff(buff_def *buff)
{
buff_item *t=buff->head;
buff_item *o;
while(t){
o=t;
t=t->next;
free(o);
}
}
// 添加一个字节
void zl77_buff_append_byte(buff_def *buff, const uint8_t d)
{
if(buff->used>=buff->all){
buff_item *t=buff->head;
buff_item *t_old=0;
while (t)
{
t_old=t;
t=t->next;
}
t=calloc(1,sizeof(buff_item));
if(t_old){
t_old->next=t;
t->prev=t_old;
}else{
buff->head=t;
}
buff->all+=LZ77_BUFF_STEP_SIZE;
buff->current=t;
buff->current_index=buff->used;
}
while((buff->used/LZ77_BUFF_STEP_SIZE)>(buff->current_index/LZ77_BUFF_STEP_SIZE)){
buff->current=buff->current->next;
buff->current_index+=LZ77_BUFF_STEP_SIZE;
}
buff->current->data[buff->used%LZ77_BUFF_STEP_SIZE]=d;
buff->used++;
}
// 添加一个位
void zl77_buff_append_bit(buff_def *buff,int bit)
{
if(buff->bit_used/8>=buff->used){
zl77_buff_append_byte(buff,0);
}
uint8_t d=zl77_buff_get_byte(buff,buff->bit_used/8);
d|=bit<<(buff->bit_used%8);
zl77_buff_set_byte(buff,-1,d);
buff->bit_used++;
}
// 调整最近使用的缓冲区
static void zl77_buff_adjust_current(buff_def *buff,int index){
while((index/LZ77_BUFF_STEP_SIZE)>(buff->current_index/LZ77_BUFF_STEP_SIZE)){
buff->current=buff->current->next;
buff->current_index+=LZ77_BUFF_STEP_SIZE;
}
while((index/LZ77_BUFF_STEP_SIZE)<(buff->current_index/LZ77_BUFF_STEP_SIZE)){
buff->current=buff->current->prev;
buff->current_index-=LZ77_BUFF_STEP_SIZE;
}
}
// 获取指定字节
uint8_t zl77_buff_get_byte(buff_def *buff,int index){
if(index<0) index=buff->used+index;
if(index>=buff->used||index<0) return 0;
zl77_buff_adjust_current(buff,index);
return buff->current->data[index%LZ77_BUFF_STEP_SIZE];
}
// 设置指定字节
void zl77_buff_set_byte(buff_def *buff,int index,uint8_t d){
if(index<0) index=buff->used+index;
if(index>=buff->used||index<0) return ;
zl77_buff_adjust_current(buff,index);
buff->current->data[index%LZ77_BUFF_STEP_SIZE]=d;
}
// 获取指定位
int zl77_buff_get_bit(buff_def *buff, int index){
uint8_t d=zl77_buff_get_byte(buff,index/8);
return (d&(1<<(index%8)))?1:0;
}
void zl77_buff_print(buff_def *buff)
{
DBG_LOG("buff:[");
for(int i=0;i<buff->used;i++){
DBG_LOG("%02x ",zl77_buff_get_byte(buff,i));
}
DBG_LOG("]\n");
}
static uint8_t zl77_get_char(zl77_def *z,int index)
{
// DBG_LOG("get_char:[%d]\n",index);
if(index<0||index>=z->in_len) return 0;
return z->in[index];
}
// 比对找到了返回0没找到返回1
// 0记录标号1记录原始数据
static int zl77_cmp(zl77_def *z,int index){
uint8_t pos=0;
uint8_t len=0;
// DBG_LOG("index=%d\n",index);
for(int i=z->dict_len;i>0;i--){
if(zl77_get_char(z,index-i)==zl77_get_char(z,index)){
pos=i;
len=0;
for(int j=0;j<i;j++){
if(zl77_get_char(z,index-i+j)==zl77_get_char(z,index+j))
{
// DBG_LOG("%c|%c \n",zl77_get_char(z,index-i+j),zl77_get_char(z,index+j));
len++;
if(len>z->cmp_len){
z->cmp_len=len;
z->cmp_pos=pos;
}
}else{
len=0;
break;
}
}
}
}
if((pos|len)==0){
z->cmp_skip=1;
return 1;
}
else{
// for(int i=0;i<z->cmp_len;i++){
// DBG_LOG("%02x|%02x ",zl77_get_char(z,index-z->cmp_pos+i),zl77_get_char(z,index+i));
// }
z->cmp_skip=z->cmp_len;
return 0;
}
}
static inline void zl77_append_u32(uint8_t *data,int *index,uint32_t value){
data[(*index)++]=value&0xff;
data[(*index)++]=(value>>8)&0xff;
data[(*index)++]=(value>>16)&0xff;
data[(*index)++]=(value>>24)&0xff;
}
static inline uint32_t zl77_get_u32(const uint8_t *data,int index){
uint32_t ret=0;
for(int i=0;i<4;i++){
ret|=data[index+i]<<(8*i);
}
return ret;
}
int zl77_encode(const uint8_t *in,const int in_len,uint8_t **out,int *out_len)
{
int ret;
zl77_def *z=zl77_creat();
z->in=in;
z->in_len=in_len;
for(int i=0;i<z->in_len;){
z->cmp_pos=0;
z->cmp_len=0;
ret=zl77_cmp(z,i);
if(ret){
zl77_buff_append_byte(&z->buff_chars,zl77_get_char(z,i));
// DBG_LOG("char(%c);",zl77_get_char(z,i));
}else{
zl77_buff_append_byte(&z->buff_pos,((z->cmp_pos&0xf)<<4)|(z->cmp_len&0xf));
// DBG_LOG("pos(%d,%d);",z->cmp_pos,z->cmp_len);
if((z->cmp_pos|z->cmp_len)==0){
exit(1);
}
}
zl77_buff_append_bit(&z->buff_bits,ret);
i+=z->cmp_skip;
}
// DBG_LOG("\n");
// zl77_buff_print(&z->buff_chars);
// zl77_buff_print(&z->buff_pos);
// zl77_buff_print(&z->buff_bits);
uint32_t size_chars=z->buff_chars.used;
uint32_t size_pos=z->buff_pos.used;
uint32_t size_bits=z->buff_bits.used;
uint32_t size_unpack=z->in_len;
int index=0;
(*out_len)=16+size_chars+size_pos+size_bits;
(*out)=calloc(*out_len,sizeof(uint8_t));
zl77_append_u32(*out,&index,size_chars);
zl77_append_u32(*out,&index,size_pos);
zl77_append_u32(*out,&index,size_bits);
zl77_append_u32(*out,&index,size_unpack);
for(int i=0;i<size_chars;i++){
(*out)[index++]=zl77_buff_get_byte(&z->buff_chars,i);
}
for(int i=0;i<size_pos;i++){
(*out)[index++]=zl77_buff_get_byte(&z->buff_pos,i);
}
for(int i=0;i<size_bits;i++){
(*out)[index++]=zl77_buff_get_byte(&z->buff_bits,i);
}
zl77_del_buff(&z->buff_chars);
zl77_del_buff(&z->buff_pos);
zl77_del_buff(&z->buff_bits);
free(z);
DBG_LOG("in_len=%d,out_len=%d\n",in_len,*out_len);
return 0;
}
static inline int zl77_get_bit(const uint8_t *data,int index){
uint8_t c=data[index/8];
return c&(1<<(index%8))?1:0;
}
int zl77_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len)
{
int ret;
int index_chars=0;
int index_pos=0;
int index_bits=0;
uint8_t cmp_pos,cmp_len,ch;
zl77_def *z=zl77_creat();
uint32_t size_chars=zl77_get_u32(in,0);
uint32_t size_pos=zl77_get_u32(in,4);
uint32_t size_bits=zl77_get_u32(in,8);
uint32_t size_unpack=zl77_get_u32(in,12);
const uint8_t *chars=in+16;
const uint8_t *pos=in+16+size_chars;
const uint8_t *bits=in+16+size_chars+size_pos;
(*out)=calloc(size_unpack+1,sizeof(uint8_t));
for(int i=0;i<size_unpack;){
ret=zl77_get_bit(bits,index_bits);
index_bits++;
// DBG_LOG("index:%d,bit=%d\n",index_bits,ret);
if(ret){
ch=chars[index_chars++];
(*out)[i++]=ch;
// DBG_LOG("char(%c)",ch);
}else{
cmp_pos=pos[index_pos]>>4;
cmp_len=pos[index_pos]&0xf;index_pos++;
// DBG_LOG("pos(%d,%d)",cmp_pos,cmp_len);
memcpy(&(*out)[i],&(*out)[i-cmp_pos],cmp_len);
i+=cmp_len;
}
}
// DBG_LOG("\n");
free(z);
return 0;
}
void main(int argc,const char *argv[])
{
uint8_t *encode_data=0;
int encode_len=0;
uint8_t *decode_data=0;
int decode_len=0;
hm_encode(argv[1],strlen(argv[1]),&encode_data,&encode_len);
// for(int i=0;i<encode_len;i++)
// {
// DBG_LOG("%02x,",encode_data[i]);
// }
// DBG_LOG("\n");
hm_encode(encode_data,encode_len,&decode_data,&decode_len);
// zl77_decode(encode_data,encode_len,&decode_data,&decode_len);
// printf("decode:%s\n",decode_data);
}

25
zl77/zl77.h Normal file
View File

@@ -0,0 +1,25 @@
#ifndef zl77_h__
#define zl77_h__
#include "stdint.h"
int zl77_encode(const uint8_t *in,const int in_len,uint8_t **out,int *out_len);
int zl77_decode(const uint8_t* in, const int in_len, uint8_t** out, int* out_len);
#endif