123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549 |
- /*
- Copyright (c) 2011, 2012, Simon Howard
- Permission to use, copy, modify, and/or distribute this software
- for any purpose with or without fee is hereby granted, provided
- that the above copyright notice and this permission notice appear
- in all copies.
- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
- AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- //
- // Decoder for PMarc -pm2- compression format. PMarc is a variant
- // of LHA commonly used on the MSX computer architecture.
- //
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <inttypes.h>
- #include "lha_decoder.h"
- #include "bit_stream_reader.c"
- #include "pma_common.c"
- // Include tree decoder.
- typedef uint8_t TreeElement;
- #include "tree_decode.c"
- // Size of the ring buffer (in bytes) used to store past history
- // for copies.
- #define RING_BUFFER_SIZE 8192
- // Maximum number of bytes that might be placed in the output buffer
- // from a single call to lha_pm2_decoder_read (largest copy size).
- #define OUTPUT_BUFFER_SIZE 256
- // Number of tree elements in the code tree.
- #define CODE_TREE_ELEMENTS 65
- // Number of tree elements in the offset tree.
- #define OFFSET_TREE_ELEMENTS 17
- typedef enum {
- PM2_REBUILD_UNBUILT, // At start of stream
- PM2_REBUILD_BUILD1, // After 1KiB
- PM2_REBUILD_BUILD2, // After 2KiB
- PM2_REBUILD_BUILD3, // After 4KiB
- PM2_REBUILD_CONTINUING, // 8KiB onwards...
- } PM2RebuildState;
- typedef struct {
- BitStreamReader bit_stream_reader;
- // State of decode tree.
- PM2RebuildState tree_state;
- // Number of bytes until we initiate a tree rebuild.
- size_t tree_rebuild_remaining;
- // History ring buffer, for copies:
- uint8_t ringbuf[RING_BUFFER_SIZE];
- unsigned int ringbuf_pos;
- // History linked list, for adaptively encoding byte values.
- HistoryLinkedList history_list;
- // Array representing the huffman tree used for representing
- // code values. A given node of the tree has children
- // code_tree[n] and code_tree[n + 1]. code_tree[0] is the
- // root node.
- TreeElement code_tree[CODE_TREE_ELEMENTS];
- // If zero, we don't need an offset tree:
- int need_offset_tree;
- // Array representing huffman tree used to look up offsets.
- // Same format as code_tree[].
- TreeElement offset_tree[OFFSET_TREE_ELEMENTS];
- } LHAPM2Decoder;
- // Decode table for history value. Characters that appeared recently in
- // the history are more likely than ones that appeared a long time ago,
- // so the history value is huffman coded so that small values require
- // fewer bits. The history value is then used to search within the
- // history linked list to get the actual character.
- static const VariableLengthTable history_decode[] = {
- { 0, 3 }, // 0 + (1 << 3) = 8
- { 8, 3 }, // 8 + (1 << 3) = 16
- { 16, 4 }, // 16 + (1 << 4) = 32
- { 32, 5 }, // 32 + (1 << 5) = 64
- { 64, 5 }, // 64 + (1 << 5) = 96
- { 96, 5 }, // 96 + (1 << 5) = 128
- { 128, 6 }, // 128 + (1 << 6) = 192
- { 192, 6 }, // 192 + (1 << 6) = 256
- };
- // Decode table for copies. As with history_decode[], small copies
- // are more common, and require fewer bits.
- static const VariableLengthTable copy_decode[] = {
- { 17, 3 }, // 17 + (1 << 3) = 25
- { 25, 3 }, // 25 + (1 << 3) = 33
- { 33, 5 }, // 33 + (1 << 5) = 65
- { 65, 6 }, // 65 + (1 << 6) = 129
- { 129, 7 }, // 129 + (1 << 7) = 256
- { 256, 0 }, // 256 (unique value)
- };
- // Initialize PMA decoder.
- static int lha_pm2_decoder_init(void *data, LHADecoderCallback callback,
- void *callback_data)
- {
- LHAPM2Decoder *decoder = data;
- bit_stream_reader_init(&decoder->bit_stream_reader,
- callback, callback_data);
- // Tree has not been built yet. It needs to be built on
- // the first call to read().
- decoder->tree_state = PM2_REBUILD_UNBUILT;
- decoder->tree_rebuild_remaining = 0;
- // Initialize ring buffer contents.
- memset(&decoder->ringbuf, ' ', RING_BUFFER_SIZE);
- decoder->ringbuf_pos = 0;
- // Init history lookup list.
- init_history_list(&decoder->history_list);
- // Initialize the lookup trees to a known state.
- init_tree(decoder->code_tree, CODE_TREE_ELEMENTS);
- init_tree(decoder->offset_tree, OFFSET_TREE_ELEMENTS);
- return 1;
- }
- // Read the list of code lengths to use for the code tree and construct
- // the code_tree structure.
- static int read_code_tree(LHAPM2Decoder *decoder)
- {
- uint8_t code_lengths[31];
- int num_codes, min_code_length, length_bits, val;
- unsigned int i;
- // Read the number of codes in the tree.
- num_codes = read_bits(&decoder->bit_stream_reader, 5);
- // Read min_code_length, which is used as an offset.
- min_code_length = read_bits(&decoder->bit_stream_reader, 3);
- if (min_code_length < 0 || num_codes < 0) {
- return 0;
- }
- // Store flag variable indicating whether we want to read
- // the offset tree as well.
- decoder->need_offset_tree
- = num_codes >= 10
- && !(num_codes == 29 && min_code_length == 0);
- // Minimum length of zero means a tree containing a single code.
- if (min_code_length == 0) {
- set_tree_single(decoder->code_tree, num_codes - 1);
- return 1;
- }
- // How many bits are used to represent each table entry?
- length_bits = read_bits(&decoder->bit_stream_reader, 3);
- if (length_bits < 0) {
- return 0;
- }
- // Read table of code lengths:
- for (i = 0; i < (unsigned int) num_codes; ++i) {
- // Read a table entry. A value of zero represents an
- // unused code. Otherwise the value represents
- // an offset from the minimum length (previously read).
- val = read_bits(&decoder->bit_stream_reader,
- (unsigned int) length_bits);
- if (val < 0) {
- return 0;
- } else if (val == 0) {
- code_lengths[i] = 0;
- } else {
- code_lengths[i] = (uint8_t) (min_code_length + val - 1);
- }
- }
- // Build the tree.
- build_tree(decoder->code_tree, sizeof(decoder->code_tree),
- code_lengths, (unsigned int) num_codes);
- return 1;
- }
- // Read the code lengths for the offset tree and construct the offset
- // tree lookup table.
- static int read_offset_tree(LHAPM2Decoder *decoder,
- unsigned int num_offsets)
- {
- uint8_t offset_lengths[8];
- unsigned int off;
- unsigned int single_offset, num_codes;
- int len;
- if (!decoder->need_offset_tree) {
- return 1;
- }
- // Read 'num_offsets' 3-bit length values. For each offset
- // value 'off', offset_lengths[off] is the length of the
- // code that will represent 'off', or 0 if it will not
- // appear within the tree.
- num_codes = 0;
- single_offset = 0;
- for (off = 0; off < num_offsets; ++off) {
- len = read_bits(&decoder->bit_stream_reader, 3);
- if (len < 0) {
- return 0;
- }
- offset_lengths[off] = (uint8_t) len;
- // Track how many actual codes were in the tree.
- if (len != 0) {
- single_offset = off;
- ++num_codes;
- }
- }
- // If there was a single code, this is a single node tree.
- if (num_codes == 1) {
- set_tree_single(decoder->offset_tree, single_offset);
- return 1;
- }
- // Build the tree.
- build_tree(decoder->offset_tree, sizeof(decoder->offset_tree),
- offset_lengths, num_offsets);
- return 1;
- }
- // Rebuild the decode trees used to compress data. This is called when
- // decoder->tree_rebuild_remaining reaches zero.
- static void rebuild_tree(LHAPM2Decoder *decoder)
- {
- switch (decoder->tree_state) {
- // Initial tree build, from start of stream:
- case PM2_REBUILD_UNBUILT:
- read_code_tree(decoder);
- read_offset_tree(decoder, 5);
- decoder->tree_state = PM2_REBUILD_BUILD1;
- decoder->tree_rebuild_remaining = 1024;
- break;
- // Tree rebuild after 1KiB of data has been read:
- case PM2_REBUILD_BUILD1:
- read_offset_tree(decoder, 6);
- decoder->tree_state = PM2_REBUILD_BUILD2;
- decoder->tree_rebuild_remaining = 1024;
- break;
- // Tree rebuild after 2KiB of data has been read:
- case PM2_REBUILD_BUILD2:
- read_offset_tree(decoder, 7);
- decoder->tree_state = PM2_REBUILD_BUILD3;
- decoder->tree_rebuild_remaining = 2048;
- break;
- // Tree rebuild after 4KiB of data has been read:
- case PM2_REBUILD_BUILD3:
- if (read_bit(&decoder->bit_stream_reader) == 1) {
- read_code_tree(decoder);
- }
- read_offset_tree(decoder, 8);
- decoder->tree_state = PM2_REBUILD_CONTINUING;
- decoder->tree_rebuild_remaining = 4096;
- break;
- // Tree rebuild after 8KiB of data has been read,
- // and every 4KiB after that:
- case PM2_REBUILD_CONTINUING:
- if (read_bit(&decoder->bit_stream_reader) == 1) {
- read_code_tree(decoder);
- read_offset_tree(decoder, 8);
- }
- decoder->tree_rebuild_remaining = 4096;
- break;
- }
- }
- static void output_byte(LHAPM2Decoder *decoder, uint8_t *buf,
- size_t *buf_len, uint8_t b)
- {
- // Add to history ring buffer.
- decoder->ringbuf[decoder->ringbuf_pos] = b;
- decoder->ringbuf_pos = (decoder->ringbuf_pos + 1) % RING_BUFFER_SIZE;
- // Add to output buffer.
- buf[*buf_len] = b;
- ++*buf_len;
- // Update history chain.
- update_history_list(&decoder->history_list, b);
- // Count down until it is time to perform a rebuild of the
- // lookup trees.
- --decoder->tree_rebuild_remaining;
- if (decoder->tree_rebuild_remaining == 0) {
- rebuild_tree(decoder);
- }
- }
- // Read a single byte from the input stream and add it to the output
- // buffer.
- static void read_single_byte(LHAPM2Decoder *decoder, unsigned int code,
- uint8_t *buf, size_t *buf_len)
- {
- int offset;
- uint8_t b;
- offset = decode_variable_length(&decoder->bit_stream_reader,
- history_decode, code);
- if (offset < 0) {
- return;
- }
- b = find_in_history_list(&decoder->history_list, (uint8_t) offset);
- output_byte(decoder, buf, buf_len, b);
- }
- // Calculate how many bytes from history to copy:
- static int history_get_count(LHAPM2Decoder *decoder, unsigned int code)
- {
- // How many bytes to copy? A small value represents the
- // literal number of bytes to copy; larger values are a header
- // for a variable length value to be decoded.
- if (code < 15) {
- return (int) code + 2;
- } else {
- return decode_variable_length(&decoder->bit_stream_reader,
- copy_decode, code - 15);
- }
- }
- // Calculate the offset within history at which to start copying:
- static int history_get_offset(LHAPM2Decoder *decoder, unsigned int code)
- {
- unsigned int bits;
- int result, val;
- result = 0;
- // Calculate number of bits to read.
- // Code of zero indicates a simple 6-bit value giving the offset.
- if (code == 0) {
- bits = 6;
- }
- // Mid-range encoded offset value.
- // Read a code using the offset tree, indicating the length
- // of the offset value to follow. The code indicates the
- // number of bits (values 0-7 = 6-13 bits).
- else if (code < 20) {
- val = read_from_tree(&decoder->bit_stream_reader,
- decoder->offset_tree);
- if (val < 0) {
- return -1;
- } else if (val == 0) {
- bits = 6;
- } else {
- bits = (unsigned int) val + 5;
- result = 1 << bits;
- }
- }
- // Large copy values start from offset zero.
- else {
- return 0;
- }
- // Read a number of bits representing the offset value. The
- // number of length of this value is variable, and is calculated
- // above.
- val = read_bits(&decoder->bit_stream_reader, bits);
- if (val < 0) {
- return -1;
- }
- result += val;
- return result;
- }
- static void copy_from_history(LHAPM2Decoder *decoder, unsigned int code,
- uint8_t *buf, size_t *buf_len)
- {
- int to_copy, offset;
- unsigned int i, pos, start;
- // Read number of bytes to copy and offset within history to copy
- // from.
- to_copy = history_get_count(decoder, code);
- offset = history_get_offset(decoder, code);
- if (to_copy < 0 || offset < 0) {
- return;
- }
- // Sanity check to prevent the potential for buffer overflow.
- if (to_copy > OUTPUT_BUFFER_SIZE) {
- return;
- }
- // Perform copy.
- start = decoder->ringbuf_pos + RING_BUFFER_SIZE - 1
- - (unsigned int) offset;
- for (i = 0; i < (unsigned int) to_copy; ++i) {
- pos = (start + i) % RING_BUFFER_SIZE;
- output_byte(decoder, buf, buf_len, decoder->ringbuf[pos]);
- }
- }
- // Decode data and store it into buf[], returning the number of
- // bytes decoded.
- static size_t lha_pm2_decoder_read(void *data, uint8_t *buf)
- {
- LHAPM2Decoder *decoder = data;
- size_t result;
- int code;
- // On first pass through, build initial lookup trees.
- if (decoder->tree_state == PM2_REBUILD_UNBUILT) {
- // First bit in stream is discarded?
- read_bit(&decoder->bit_stream_reader);
- rebuild_tree(decoder);
- }
- result = 0;
- code = read_from_tree(&decoder->bit_stream_reader, decoder->code_tree);
- if (code < 0) {
- return 0;
- }
- if (code < 8) {
- read_single_byte(decoder, (unsigned int) code, buf, &result);
- } else {
- copy_from_history(decoder, (unsigned int) code - 8,
- buf, &result);
- }
- return result;
- }
- LHADecoderType lha_pm2_decoder = {
- lha_pm2_decoder_init,
- NULL,
- lha_pm2_decoder_read,
- sizeof(LHAPM2Decoder),
- OUTPUT_BUFFER_SIZE,
- RING_BUFFER_SIZE
- };
|