lh_new_decoder.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. /*
  2. Copyright (c) 2011, 2012, Simon Howard
  3. Permission to use, copy, modify, and/or distribute this software
  4. for any purpose with or without fee is hereby granted, provided
  5. that the above copyright notice and this permission notice appear
  6. in all copies.
  7. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
  8. WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
  9. WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
  10. AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
  11. CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  12. LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
  13. NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  14. CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. // Decoder for "new-style" LHA algorithms, used with LHA v2 and onwards
  17. // (-lh4-, -lh5-, -lh6-, -lh7-).
  18. //
  19. // This file is designed to be a template. It is #included by other
  20. // files to generate an optimized decoder.
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <inttypes.h>
  25. #include "lha_decoder.h"
  26. #include "bit_stream_reader.c"
  27. // Include tree decoder.
  28. typedef uint16_t TreeElement;
  29. #include "tree_decode.c"
  30. // Threshold for copying. The first copy code starts from here.
  31. #define COPY_THRESHOLD 3 /* bytes */
  32. // Ring buffer containing history has a size that is a power of two.
  33. // The number of bits is specified.
  34. #define RING_BUFFER_SIZE (1 << HISTORY_BITS)
  35. // Required size of the output buffer. At most, a single call to read()
  36. // might result in a copy of the entire ring buffer.
  37. #define OUTPUT_BUFFER_SIZE RING_BUFFER_SIZE
  38. // Number of different command codes. 0-255 range are literal byte
  39. // values, while higher values indicate copy from history.
  40. #define NUM_CODES 510
  41. // Number of possible codes in the "temporary table" used to encode the
  42. // codes table.
  43. #define MAX_TEMP_CODES 20
  44. typedef struct {
  45. // Input bit stream.
  46. BitStreamReader bit_stream_reader;
  47. // Ring buffer of past data. Used for position-based copies.
  48. uint8_t ringbuf[RING_BUFFER_SIZE];
  49. unsigned int ringbuf_pos;
  50. // Number of commands remaining before we start a new block.
  51. unsigned int block_remaining;
  52. // Table used for the code tree.
  53. TreeElement code_tree[NUM_CODES * 2];
  54. // Table used to encode the offset tree, used to read offsets
  55. // into the history buffer. This same table is also used to
  56. // encode the temp-table, which is bigger; hence the size.
  57. TreeElement offset_tree[MAX_TEMP_CODES * 2];
  58. } LHANewDecoder;
  59. // Initialize the history ring buffer.
  60. static void init_ring_buffer(LHANewDecoder *decoder)
  61. {
  62. memset(decoder->ringbuf, ' ', RING_BUFFER_SIZE);
  63. decoder->ringbuf_pos = 0;
  64. }
  65. static int lha_lh_new_init(void *data, LHADecoderCallback callback,
  66. void *callback_data)
  67. {
  68. LHANewDecoder *decoder = data;
  69. // Initialize input stream reader.
  70. bit_stream_reader_init(&decoder->bit_stream_reader,
  71. callback, callback_data);
  72. // Initialize data structures.
  73. init_ring_buffer(decoder);
  74. // First read starts the first block.
  75. decoder->block_remaining = 0;
  76. // Initialize tree tables to a known state.
  77. init_tree(decoder->code_tree, NUM_CODES * 2);
  78. init_tree(decoder->offset_tree, MAX_TEMP_CODES * 2);
  79. return 1;
  80. }
  81. // Read a length value - this is normally a value in the 0-7 range, but
  82. // sometimes can be longer.
  83. static int read_length_value(LHANewDecoder *decoder)
  84. {
  85. int i, len;
  86. len = read_bits(&decoder->bit_stream_reader, 3);
  87. if (len < 0) {
  88. return -1;
  89. }
  90. if (len == 7) {
  91. // Read more bits to extend the length until we reach a '0'.
  92. for (;;) {
  93. i = read_bit(&decoder->bit_stream_reader);
  94. if (i < 0) {
  95. return -1;
  96. } else if (i == 0) {
  97. break;
  98. }
  99. ++len;
  100. }
  101. }
  102. return len;
  103. }
  104. // Read the values from the input stream that define the temporary table
  105. // used for encoding the code table.
  106. static int read_temp_table(LHANewDecoder *decoder)
  107. {
  108. int i, j, n, len, code;
  109. uint8_t code_lengths[MAX_TEMP_CODES];
  110. // How many codes?
  111. n = read_bits(&decoder->bit_stream_reader, 5);
  112. if (n < 0) {
  113. return 0;
  114. }
  115. // n=0 is a special case, meaning only a single code that
  116. // is of zero length.
  117. if (n == 0) {
  118. code = read_bits(&decoder->bit_stream_reader, 5);
  119. if (code < 0) {
  120. return 0;
  121. }
  122. set_tree_single(decoder->offset_tree, code);
  123. return 1;
  124. }
  125. // Enforce a hard limit on the number of codes.
  126. if (n > MAX_TEMP_CODES) {
  127. n = MAX_TEMP_CODES;
  128. }
  129. // Read the length of each code.
  130. for (i = 0; i < n; ++i) {
  131. len = read_length_value(decoder);
  132. if (len < 0) {
  133. return 0;
  134. }
  135. code_lengths[i] = len;
  136. // After the first three lengths, there is a 2-bit
  137. // field to allow skipping over up to a further three
  138. // lengths. Not sure of the reason for this ...
  139. if (i == 2) {
  140. len = read_bits(&decoder->bit_stream_reader, 2);
  141. if (len < 0) {
  142. return 0;
  143. }
  144. for (j = 0; j < len; ++j) {
  145. ++i;
  146. code_lengths[i] = 0;
  147. }
  148. }
  149. }
  150. build_tree(decoder->offset_tree, MAX_TEMP_CODES * 2, code_lengths, n);
  151. return 1;
  152. }
  153. // Code table codes can indicate that a sequence of codes should be
  154. // skipped over. The number to skip is Huffman-encoded. Given a skip
  155. // range (0-2), this reads the number of codes to skip over.
  156. static int read_skip_count(LHANewDecoder *decoder, int skiprange)
  157. {
  158. int result;
  159. // skiprange=0 => 1 code.
  160. if (skiprange == 0) {
  161. result = 1;
  162. }
  163. // skiprange=1 => 3-18 codes.
  164. else if (skiprange == 1) {
  165. result = read_bits(&decoder->bit_stream_reader, 4);
  166. if (result < 0) {
  167. return -1;
  168. }
  169. result += 3;
  170. }
  171. // skiprange=2 => 20+ codes.
  172. else {
  173. result = read_bits(&decoder->bit_stream_reader, 9);
  174. if (result < 0) {
  175. return -1;
  176. }
  177. result += 20;
  178. }
  179. return result;
  180. }
  181. static int read_code_table(LHANewDecoder *decoder)
  182. {
  183. int i, j, n, skip_count, code;
  184. uint8_t code_lengths[NUM_CODES];
  185. // How many codes?
  186. n = read_bits(&decoder->bit_stream_reader, 9);
  187. if (n < 0) {
  188. return 0;
  189. }
  190. // n=0 implies a single code of zero length; all inputs
  191. // decode to the same code.
  192. if (n == 0) {
  193. code = read_bits(&decoder->bit_stream_reader, 9);
  194. if (code < 0) {
  195. return 0;
  196. }
  197. set_tree_single(decoder->code_tree, code);
  198. return 1;
  199. }
  200. if (n > NUM_CODES) {
  201. n = NUM_CODES;
  202. }
  203. // Read the length of each code.
  204. // The lengths are encoded using the temp-table previously read;
  205. // offset_tree is reused temporarily to hold it.
  206. i = 0;
  207. while (i < n) {
  208. code = read_from_tree(&decoder->bit_stream_reader,
  209. decoder->offset_tree);
  210. if (code < 0) {
  211. return 0;
  212. }
  213. // The code that was read can have different meanings.
  214. // If in the range 0-2, it indicates that a number of
  215. // codes are unused and should be skipped over.
  216. // Values greater than two represent a frequency count.
  217. if (code <= 2) {
  218. skip_count = read_skip_count(decoder, code);
  219. if (skip_count < 0) {
  220. return 0;
  221. }
  222. for (j = 0; j < skip_count && i < n; ++j) {
  223. code_lengths[i] = 0;
  224. ++i;
  225. }
  226. } else {
  227. code_lengths[i] = code - 2;
  228. ++i;
  229. }
  230. }
  231. build_tree(decoder->code_tree, NUM_CODES * 2, code_lengths, n);
  232. return 1;
  233. }
  234. static int read_offset_table(LHANewDecoder *decoder)
  235. {
  236. int i, n, len, code;
  237. uint8_t code_lengths[HISTORY_BITS];
  238. // How many codes?
  239. n = read_bits(&decoder->bit_stream_reader, OFFSET_BITS);
  240. if (n < 0) {
  241. return 0;
  242. }
  243. // n=0 is a special case, meaning only a single code that
  244. // is of zero length.
  245. if (n == 0) {
  246. code = read_bits(&decoder->bit_stream_reader, OFFSET_BITS);
  247. if (code < 0) {
  248. return 0;
  249. }
  250. set_tree_single(decoder->offset_tree, code);
  251. return 1;
  252. }
  253. // Enforce a hard limit on the number of codes.
  254. if (n > HISTORY_BITS) {
  255. n = HISTORY_BITS;
  256. }
  257. // Read the length of each code.
  258. for (i = 0; i < n; ++i) {
  259. len = read_length_value(decoder);
  260. if (len < 0) {
  261. return 0;
  262. }
  263. code_lengths[i] = len;
  264. }
  265. build_tree(decoder->offset_tree, MAX_TEMP_CODES * 2, code_lengths, n);
  266. return 1;
  267. }
  268. // Start reading a new block from the input stream.
  269. static int start_new_block(LHANewDecoder *decoder)
  270. {
  271. int len;
  272. // Read length of new block (in commands).
  273. len = read_bits(&decoder->bit_stream_reader, 16);
  274. if (len < 0) {
  275. return 0;
  276. }
  277. decoder->block_remaining = (size_t) len;
  278. // Read the temporary decode table, used to encode the codes table.
  279. // The position table data structure is reused for this.
  280. if (!read_temp_table(decoder)) {
  281. return 0;
  282. }
  283. // Read the code table; this is encoded *using* the temp table.
  284. if (!read_code_table(decoder)) {
  285. return 0;
  286. }
  287. // Read the offset table.
  288. if (!read_offset_table(decoder)) {
  289. return 0;
  290. }
  291. return 1;
  292. }
  293. // Read the next code from the input stream. Returns the code, or -1 if
  294. // an error occurred.
  295. static int read_code(LHANewDecoder *decoder)
  296. {
  297. return read_from_tree(&decoder->bit_stream_reader, decoder->code_tree);
  298. }
  299. // Read an offset distance from the input stream.
  300. // Returns the code, or -1 if an error occurred.
  301. static int read_offset_code(LHANewDecoder *decoder)
  302. {
  303. int bits, result;
  304. bits = read_from_tree(&decoder->bit_stream_reader,
  305. decoder->offset_tree);
  306. if (bits < 0) {
  307. return -1;
  308. }
  309. // The code read indicates the length of the offset in bits.
  310. //
  311. // The returned value looks like this:
  312. // bits = 0 -> 0
  313. // bits = 1 -> 1
  314. // bits = 2 -> 1x
  315. // bits = 3 -> 1xx
  316. // bits = 4 -> 1xxx
  317. // etc.
  318. if (bits == 0) {
  319. return 0;
  320. } else if (bits == 1) {
  321. return 1;
  322. } else {
  323. result = read_bits(&decoder->bit_stream_reader, bits - 1);
  324. if (result < 0) {
  325. return -1;
  326. }
  327. return result + (1 << (bits - 1));
  328. }
  329. }
  330. // Add a byte value to the output stream.
  331. static void output_byte(LHANewDecoder *decoder, uint8_t *buf,
  332. size_t *buf_len, uint8_t b)
  333. {
  334. buf[*buf_len] = b;
  335. ++*buf_len;
  336. decoder->ringbuf[decoder->ringbuf_pos] = b;
  337. decoder->ringbuf_pos = (decoder->ringbuf_pos + 1) % RING_BUFFER_SIZE;
  338. }
  339. // Copy a block from the history buffer.
  340. static void copy_from_history(LHANewDecoder *decoder, uint8_t *buf,
  341. size_t *buf_len, size_t count)
  342. {
  343. int offset;
  344. unsigned int i, start;
  345. offset = read_offset_code(decoder);
  346. if (offset < 0) {
  347. return;
  348. }
  349. start = decoder->ringbuf_pos + RING_BUFFER_SIZE
  350. - (unsigned int) offset - 1;
  351. for (i = 0; i < count; ++i) {
  352. output_byte(decoder, buf, buf_len,
  353. decoder->ringbuf[(start + i) % RING_BUFFER_SIZE]);
  354. }
  355. }
  356. static size_t lha_lh_new_read(void *data, uint8_t *buf)
  357. {
  358. LHANewDecoder *decoder = data;
  359. size_t result;
  360. int code;
  361. // Start of new block?
  362. while (decoder->block_remaining == 0) {
  363. if (!start_new_block(decoder)) {
  364. return 0;
  365. }
  366. }
  367. --decoder->block_remaining;
  368. // Read next command from input stream.
  369. result = 0;
  370. code = read_code(decoder);
  371. if (code < 0) {
  372. return 0;
  373. }
  374. // The code may be either a literal byte value or a copy command.
  375. if (code < 256) {
  376. output_byte(decoder, buf, &result, (uint8_t) code);
  377. } else {
  378. copy_from_history(decoder, buf, &result,
  379. code - 256 + COPY_THRESHOLD);
  380. }
  381. return result;
  382. }
  383. LHADecoderType DECODER_NAME = {
  384. lha_lh_new_init,
  385. NULL,
  386. lha_lh_new_read,
  387. sizeof(LHANewDecoder),
  388. OUTPUT_BUFFER_SIZE,
  389. RING_BUFFER_SIZE / 2
  390. };
  391. // This is a hack for -lh4-:
  392. #ifdef DECODER2_NAME
  393. LHADecoderType DECODER2_NAME = {
  394. lha_lh_new_init,
  395. NULL,
  396. lha_lh_new_read,
  397. sizeof(LHANewDecoder),
  398. OUTPUT_BUFFER_SIZE,
  399. RING_BUFFER_SIZE / 4
  400. };
  401. #endif