spicetools/util/lz77.cpp

261 lines
7.8 KiB
C++
Raw Permalink Normal View History

2024-08-28 15:10:34 +00:00
#include "lz77.h"
#include <cstdlib>
namespace util::lz77 {
/*
* Configuration Values
*/
static const size_t LZ_WINDOW_SIZE = 0x1000;
static const size_t LZ_WINDOW_MASK = LZ_WINDOW_SIZE - 1;
static const size_t LZ_THRESHOLD = 3;
static const size_t LZ_THRESHOLD_INPLACE = 0xA;
static const size_t LZ_LOOK_RANGE = 0x200;
static const size_t LZ_MAX_LEN = 0xF + LZ_THRESHOLD;
static const size_t LZ_MAX_BUFFER = 0x10 + 1;
/*
* Dummy Compression
* Results in even bigger output size but is fast af.
*/
uint8_t *compress_stub(uint8_t *input, size_t input_length, size_t *compressed_length) {
uint8_t *output = (uint8_t *) malloc(input_length + input_length / 8 + 9);
uint8_t *cur_byte = &output[0];
// copy data blocks
for (size_t n = 0; n < input_length / 8; n++) {
// fake flag
*cur_byte++ = 0xFF;
// uncompressed data
for (size_t i = 0; i < 8; i++) {
*cur_byte++ = input[n * 8 + i];
}
}
// remaining bytes
int extra_bytes = input_length % 8;
if (extra_bytes == 0) {
*cur_byte++ = 0x00;
} else {
*cur_byte++ = 0xFF >> (8 - extra_bytes);
for (size_t i = input_length - extra_bytes; i < input_length; i++) {
*cur_byte++ = input[i];
}
for (size_t i = 0; i < 4; i++) {
*cur_byte++ = 0x00;
}
}
// calculate size
*compressed_length = (size_t) (cur_byte - &output[0]);
return output;
}
/*
* Compression Helpers
*/
static size_t match_current(uint8_t *window, size_t pos, size_t length_max,
uint8_t *data, uint8_t data_length, size_t data_pos) {
// compare data
size_t length = 0;
while ((data_pos + length < data_length) &&
(length < length_max) &&
(window[(pos + length) & LZ_WINDOW_MASK] == data[data_pos + length] &&
length < LZ_MAX_LEN)) {
length++;
}
// return detected length
return length;
}
static bool match_window(uint8_t *window, size_t pos, uint8_t *data, size_t data_length, size_t data_pos,
size_t *match_pos, size_t *match_length) {
// search window for best match
size_t pos_max = 0, length_max = 0;
for (size_t i = LZ_THRESHOLD; i < LZ_LOOK_RANGE; i++) {
// check for match
size_t length = match_current(window, (pos - i) & LZ_WINDOW_MASK, i, data, data_length, data_pos);
// check if threshold is reached
if (length >= LZ_THRESHOLD_INPLACE) {
*match_pos = i;
*match_length = length;
return true;
}
// update max values
if (length >= LZ_THRESHOLD) {
pos_max = i;
length_max = length;
}
}
// check if threshold is reached
if (length_max >= LZ_THRESHOLD) {
*match_pos = pos_max;
*match_length = length_max;
return true;
} else {
return false;
}
}
/*
* This one actually compresses the data.
*/
std::vector<uint8_t> compress(uint8_t *input, size_t input_length) {
// output buffer
std::vector<uint8_t> output;
output.reserve(input_length);
// window buffer
uint8_t *window = new uint8_t[LZ_WINDOW_SIZE];
size_t window_pos = 0;
// working buffer
uint8_t *buffer = new uint8_t[LZ_MAX_BUFFER];
size_t buffer_pos = 0;
// state
uint8_t flag;
size_t pad = 3;
// iterate input bytes
size_t input_pos = 0;
while (input_pos < input_length) {
// reset state
flag = 0;
buffer_pos = 0;
// iterate flag bytes
for (size_t bit_pos = 0; bit_pos < 8; bit_pos++) {
// don't match if data is bigger than input
if (input_pos >= input_length) {
pad = 0;
flag = flag >> (8 - bit_pos);
buffer[buffer_pos++] = 0;
buffer[buffer_pos++] = 0;
}
// match window
uint8_t bit_value;
size_t match_pos, match_length;
if (match_window(window, window_pos, input, input_length, input_pos, &match_pos, &match_length)) {
// apply match
buffer[buffer_pos++] = match_pos >> 4;
buffer[buffer_pos++] = ((match_pos & 0xF) << 4) | ((match_length - LZ_THRESHOLD) & 0xF);
bit_value = 0;
for (size_t n = 0; n < match_length; n++) {
window[window_pos++ & LZ_WINDOW_MASK] = input[input_pos++];
}
} else {
// no match found
buffer[buffer_pos++] = input[input_pos];
window[window_pos++] = input[input_pos++];
bit_value = 1;
}
// add bit to flags
flag = (flag >> 1) & 0x7F;
flag |= bit_value == 0 ? 0 : 0x80;
// update window
window_pos &= LZ_WINDOW_MASK;
}
// write to output
output.push_back(flag);
for (size_t i = 0; i < buffer_pos; i++) {
output.push_back(buffer[i]);
}
}
// padding
for (size_t i = 0; i < pad; i++) {
output.push_back(0x00);
}
// clean up and return result
delete[] window;
delete[] buffer;
return output;
}
std::vector<uint8_t> decompress(uint8_t *input, size_t input_length) {
// output buffer
std::vector<uint8_t> output;
output.reserve(input_length);
// create window
uint8_t *window = new uint8_t[LZ_WINDOW_SIZE];
size_t window_pos = 0;
// iterate input data
size_t input_pos = 0;
while (input_pos < input_length) {
// read flag
uint8_t flag = input[input_pos++];
// iterate flag bits
for (size_t bit_pos = 0; bit_pos < 8; bit_pos++) {
// check flag bit
if ((flag >> bit_pos) & 1) {
// copy data from input
output.push_back(input[input_pos]);
window[window_pos++] = input[input_pos++];
window_pos &= LZ_WINDOW_MASK;
} else if (input_pos + 1 < input_length) {
// check word
size_t word = (input[input_pos] << 8) | input[input_pos + 1];
if (word == 0) {
// detected end
delete[] window;
return output;
} else {
// skip word
input_pos += 2;
// get window
size_t position = (window_pos - (word >> 4)) & LZ_WINDOW_MASK;
size_t length = (word & 0x0F) + LZ_THRESHOLD;
// copy data from window
for (size_t i = 0; i < length; i++) {
uint8_t data = window[position++ & LZ_WINDOW_MASK];
output.push_back(data);
window[window_pos++] = data;
window_pos &= LZ_WINDOW_MASK;
}
}
}
}
}
// clean up and return result
delete[] window;
return output;
}
}