lh1_decoder.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724
  1. /*
  2. Copyright (c) 2011, 2012, Simon Howard
  3. Permission to use, copy, modify, and/or distribute this software
  4. for any purpose with or without fee is hereby granted, provided
  5. that the above copyright notice and this permission notice appear
  6. in all copies.
  7. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
  8. WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
  9. WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
  10. AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
  11. CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  12. LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
  13. NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  14. CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <stdio.h>
  17. #include <stdlib.h>
  18. #include <string.h>
  19. #include <inttypes.h>
  20. #include "lha_decoder.h"
  21. #include "bit_stream_reader.c"
  22. // Size of the ring buffer used to hold history:
  23. #define RING_BUFFER_SIZE 4096 /* bytes */
  24. // When this limit is reached, the code tree is reordered.
  25. #define TREE_REORDER_LIMIT 32 * 1024 /* 32 kB */
  26. // Number of codes ('byte' codes + 'copy' codes):
  27. #define NUM_CODES 314
  28. // Number of nodes in the code tree.
  29. #define NUM_TREE_NODES (NUM_CODES * 2 - 1)
  30. // Number of possible offsets:
  31. #define NUM_OFFSETS 64
  32. // Minimum length of the offset top bits:
  33. #define MIN_OFFSET_LENGTH 3 /* bits */
  34. // Threshold for copying. The first copy code starts from here.
  35. #define COPY_THRESHOLD 3 /* bytes */
  36. // Required size of the output buffer. At most, a single call to read()
  37. // might result in a copy of the entire ring buffer.
  38. #define OUTPUT_BUFFER_SIZE RING_BUFFER_SIZE
  39. typedef struct {
  40. // If true, this node is a leaf node.
  41. unsigned int leaf :1;
  42. // If this is a leaf node, child_index is the code represented by
  43. // this node. Otherwise, nodes[child_index] and nodes[child_index-1]
  44. // are the children of this node.
  45. unsigned int child_index :15;
  46. // Index of the parent node of this node.
  47. uint16_t parent;
  48. // Frequency count for this node - number of times that it has
  49. // received a hit.
  50. uint16_t freq;
  51. // Group that this node belongs to.
  52. uint16_t group;
  53. } Node;
  54. typedef struct {
  55. // Input bit stream.
  56. BitStreamReader bit_stream_reader;
  57. // Ring buffer of past data. Used for position-based copies.
  58. uint8_t ringbuf[RING_BUFFER_SIZE];
  59. unsigned int ringbuf_pos;
  60. // Array of tree nodes. nodes[0] is the root node. The array
  61. // is maintained in order by frequency.
  62. Node nodes[NUM_TREE_NODES];
  63. // Indices of leaf nodes of the tree (map from code to leaf
  64. // node index)
  65. uint16_t leaf_nodes[NUM_CODES];
  66. // Groups list. Every node belongs to a group. All nodes within
  67. // a group have the same frequency. There can be at most
  68. // NUM_TREE_NODES groups (one for each node). num_groups is used
  69. // to allocate and deallocate groups as needed.
  70. uint16_t groups[NUM_TREE_NODES];
  71. unsigned int num_groups;
  72. // Index of the "leader" of a group within the nodes[] array.
  73. // The leader is the left-most node within a span of nodes with
  74. // the same frequency.
  75. uint16_t group_leader[NUM_TREE_NODES];
  76. // Offset lookup table. Maps from a byte value (sequence of next
  77. // 8 bits from input stream) to an offset value.
  78. uint8_t offset_lookup[256];
  79. // Length of offsets, in bits.
  80. uint8_t offset_lengths[NUM_OFFSETS];
  81. } LHALH1Decoder;
  82. // Frequency distribution used to calculate the offset codes.
  83. static const unsigned int offset_fdist[] = {
  84. 1, // 3 bits
  85. 3, // 4 bits
  86. 8, // 5 bits
  87. 12, // 6 bits
  88. 24, // 7 bits
  89. 16, // 8 bits
  90. };
  91. // Allocate a group from the free groups array.
  92. static uint16_t alloc_group(LHALH1Decoder *decoder)
  93. {
  94. uint16_t result;
  95. result = decoder->groups[decoder->num_groups];
  96. ++decoder->num_groups;
  97. return result;
  98. }
  99. // Free a group that is no longer in use.
  100. static void free_group(LHALH1Decoder *decoder, uint16_t group)
  101. {
  102. --decoder->num_groups;
  103. decoder->groups[decoder->num_groups] = group;
  104. }
  105. // Initialize groups array.
  106. static void init_groups(LHALH1Decoder *decoder)
  107. {
  108. unsigned int i;
  109. for (i = 0; i < NUM_TREE_NODES; ++i) {
  110. decoder->groups[i] = (uint16_t) i;
  111. }
  112. decoder->num_groups = 0;
  113. }
  114. // Initialize the tree with its basic initial configuration.
  115. static void init_tree(LHALH1Decoder *decoder)
  116. {
  117. unsigned int i, child;
  118. int node_index;
  119. uint16_t leaf_group;
  120. Node *node;
  121. // Leaf nodes are placed at the end of the table. Start by
  122. // initializing these, and working backwards.
  123. node_index = NUM_TREE_NODES - 1;
  124. leaf_group = alloc_group(decoder);
  125. for (i = 0; i < NUM_CODES; ++i) {
  126. node = &decoder->nodes[node_index];
  127. node->leaf = 1;
  128. node->child_index = (unsigned short) i;
  129. node->freq = 1;
  130. node->group = leaf_group;
  131. decoder->group_leader[leaf_group] = (uint16_t) node_index;
  132. decoder->leaf_nodes[i] = (uint16_t) node_index;
  133. --node_index;
  134. }
  135. // Now build up the intermediate nodes, up to the root. Each
  136. // node gets two nodes as children.
  137. child = NUM_TREE_NODES - 1;
  138. while (node_index >= 0) {
  139. node = &decoder->nodes[node_index];
  140. node->leaf = 0;
  141. // Set child pointer and update the parent pointers of the
  142. // children.
  143. node->child_index = child;
  144. decoder->nodes[child].parent = (uint16_t) node_index;
  145. decoder->nodes[child - 1].parent = (uint16_t) node_index;
  146. // The node's frequency is equal to the sum of the frequencies
  147. // of its children.
  148. node->freq = (uint16_t) (decoder->nodes[child].freq
  149. + decoder->nodes[child - 1].freq);
  150. // Is the frequency the same as the last node we processed?
  151. // if so, we are in the same group. If not, we must
  152. // allocate a new group. Either way, this node is now the
  153. // leader of its group.
  154. if (node->freq == decoder->nodes[node_index + 1].freq) {
  155. node->group = decoder->nodes[node_index + 1].group;
  156. } else {
  157. node->group = alloc_group(decoder);
  158. }
  159. decoder->group_leader[node->group] = (uint16_t) node_index;
  160. // Process next node.
  161. --node_index;
  162. child -= 2;
  163. }
  164. }
  165. // Fill in a range of values in the offset_lookup table, which have
  166. // the bits from 'code' as the high bits, and the low bits can be
  167. // any values in the range from 'mask'. Set these values to point
  168. // to 'offset'.
  169. static void fill_offset_range(LHALH1Decoder *decoder, uint8_t code,
  170. unsigned int mask, unsigned int offset)
  171. {
  172. unsigned int i;
  173. // Set offset lookup table to map from all possible input values
  174. // that fit within the mask to the target offset.
  175. for (i = 0; (i & ~mask) == 0; ++i) {
  176. decoder->offset_lookup[code | i] = (uint8_t) offset;
  177. }
  178. }
  179. // Calculate the values for the offset_lookup and offset_lengths
  180. // tables.
  181. static void init_offset_table(LHALH1Decoder *decoder)
  182. {
  183. unsigned int i, j, len;
  184. uint8_t code, iterbit, offset;
  185. code = 0;
  186. offset = 0;
  187. // Iterate through each entry in the frequency distribution table,
  188. // filling in codes in the lookup table as we go.
  189. for (i = 0; i < sizeof(offset_fdist) / sizeof(*offset_fdist); ++i) {
  190. // offset_fdist[0] is the number of codes of length
  191. // MIN_OFFSET_LENGTH bits, increasing as we go. As the
  192. // code increases in length, we must iterate progressively
  193. // lower bits in the code (moving right - extending the
  194. // code to be 1 bit longer).
  195. len = i + MIN_OFFSET_LENGTH;
  196. iterbit = (uint8_t) (1 << (8 - len));
  197. for (j = 0; j < offset_fdist[i]; ++j) {
  198. // Store lookup values for this offset in the
  199. // lookup table, and save the code length.
  200. // (iterbit - 1) turns into a mask for the lower
  201. // bits that are not part of the code.
  202. fill_offset_range(decoder, code,
  203. (uint8_t) (iterbit - 1), offset);
  204. decoder->offset_lengths[offset] = (uint8_t) len;
  205. // Iterate to next code.
  206. code = (uint8_t) (code + iterbit);
  207. ++offset;
  208. }
  209. }
  210. }
  211. // Initialize the history ring buffer.
  212. static void init_ring_buffer(LHALH1Decoder *decoder)
  213. {
  214. memset(decoder->ringbuf, ' ', RING_BUFFER_SIZE);
  215. decoder->ringbuf_pos = 0;
  216. }
  217. static int lha_lh1_init(void *data, LHADecoderCallback callback,
  218. void *callback_data)
  219. {
  220. LHALH1Decoder *decoder = data;
  221. // Initialize input stream reader.
  222. bit_stream_reader_init(&decoder->bit_stream_reader,
  223. callback, callback_data);
  224. // Initialize data structures.
  225. init_groups(decoder);
  226. init_tree(decoder);
  227. init_offset_table(decoder);
  228. init_ring_buffer(decoder);
  229. return 1;
  230. }
  231. // Make the given node the leader of its group: swap it with the current
  232. // leader so that it is in the left-most position. Returns the new index
  233. // of the node.
  234. static uint16_t make_group_leader(LHALH1Decoder *decoder,
  235. uint16_t node_index)
  236. {
  237. Node *node, *leader;
  238. uint16_t group;
  239. uint16_t leader_index;
  240. unsigned int tmp;
  241. group = decoder->nodes[node_index].group;
  242. leader_index = decoder->group_leader[group];
  243. // Already the leader? If so, there is nothing to do.
  244. if (leader_index == node_index) {
  245. return node_index;
  246. }
  247. node = &decoder->nodes[node_index];
  248. leader = &decoder->nodes[leader_index];
  249. // Swap leaf and child indices in the two nodes:
  250. tmp = leader->leaf;
  251. leader->leaf = node->leaf;
  252. node->leaf = tmp;
  253. tmp = leader->child_index;
  254. leader->child_index = node->child_index;
  255. node->child_index = tmp;
  256. if (node->leaf) {
  257. decoder->leaf_nodes[node->child_index] = node_index;
  258. } else {
  259. decoder->nodes[node->child_index].parent = node_index;
  260. decoder->nodes[node->child_index - 1].parent = node_index;
  261. }
  262. if (leader->leaf) {
  263. decoder->leaf_nodes[leader->child_index] = leader_index;
  264. } else {
  265. decoder->nodes[leader->child_index].parent = leader_index;
  266. decoder->nodes[leader->child_index - 1].parent = leader_index;
  267. }
  268. return leader_index;
  269. }
  270. // Increase the frequency count for a node, rearranging groups as
  271. // appropriate.
  272. static void increment_node_freq(LHALH1Decoder *decoder, uint16_t node_index)
  273. {
  274. Node *node, *other;
  275. node = &decoder->nodes[node_index];
  276. other = &decoder->nodes[node_index - 1];
  277. ++node->freq;
  278. // If the node is part of a group containing other nodes, it
  279. // must leave the group.
  280. if (node_index < NUM_TREE_NODES - 1
  281. && node->group == decoder->nodes[node_index + 1].group) {
  282. // Next node in the group now becomes the leader.
  283. ++decoder->group_leader[node->group];
  284. // The node must now either join the group to its
  285. // left, or start a new group.
  286. if (node->freq == other->freq) {
  287. node->group = other->group;
  288. } else {
  289. node->group = alloc_group(decoder);
  290. decoder->group_leader[node->group] = node_index;
  291. }
  292. } else {
  293. // The node is in a group of its own (single-node
  294. // group). It might need to join the group of the
  295. // node on its left if it has the same frequency.
  296. if (node->freq == other->freq) {
  297. free_group(decoder, node->group);
  298. node->group = other->group;
  299. }
  300. }
  301. }
  302. // Reconstruct the code huffman tree to be more evenly distributed.
  303. // Invoked periodically as data is processed.
  304. static void reconstruct_tree(LHALH1Decoder *decoder)
  305. {
  306. Node *leaf;
  307. unsigned int child;
  308. unsigned int freq;
  309. unsigned int group;
  310. int i;
  311. // Gather all leaf nodes at the start of the table.
  312. leaf = decoder->nodes;
  313. for (i = 0; i < NUM_TREE_NODES; ++i) {
  314. if (decoder->nodes[i].leaf) {
  315. leaf->leaf = 1;
  316. leaf->child_index = decoder->nodes[i].child_index;
  317. // Frequency of the nodes in the new tree is halved,
  318. // this acts as a running average each time the
  319. // tree is reconstructed.
  320. leaf->freq = (uint16_t) (decoder->nodes[i].freq + 1) / 2;
  321. ++leaf;
  322. }
  323. }
  324. // The leaf nodes are now all at the start of the table. Now
  325. // reconstruct the tree, starting from the end of the table and
  326. // working backwards, inserting branch nodes between the leaf
  327. // nodes. Each branch node inherits the sum of the frequencies
  328. // of its children, and must be placed to maintain the ordering
  329. // within the table by decreasing frequency.
  330. leaf = &decoder->nodes[NUM_CODES - 1];
  331. child = NUM_TREE_NODES - 1;
  332. i = NUM_TREE_NODES - 1;
  333. while (i >= 0) {
  334. // Before we can add a new branch node, we need at least
  335. // two nodes to use as children. If we don't have this
  336. // then we need to copy some from the leaves.
  337. while ((int) child - i < 2) {
  338. decoder->nodes[i] = *leaf;
  339. decoder->leaf_nodes[leaf->child_index] = (uint16_t) i;
  340. --i;
  341. --leaf;
  342. }
  343. // Now that we have at least two nodes to take as children
  344. // of the new branch node, we can calculate the branch
  345. // node's frequency.
  346. freq = (unsigned int) (decoder->nodes[child].freq
  347. + decoder->nodes[child - 1].freq);
  348. // Now copy more leaf nodes until the correct place to
  349. // insert the new branch node presents itself.
  350. while (leaf >= decoder->nodes && freq >= leaf->freq) {
  351. decoder->nodes[i] = *leaf;
  352. decoder->leaf_nodes[leaf->child_index] = (uint16_t) i;
  353. --i;
  354. --leaf;
  355. }
  356. // The new branch node can now be inserted.
  357. decoder->nodes[i].leaf = 0;
  358. decoder->nodes[i].freq = (uint16_t) freq;
  359. decoder->nodes[i].child_index = (uint16_t) child;
  360. decoder->nodes[child].parent = (uint16_t) i;
  361. decoder->nodes[child - 1].parent = (uint16_t) i;
  362. --i;
  363. // Process the next pair of children.
  364. child -= 2;
  365. }
  366. // Reconstruct the group data. Start by resetting group data.
  367. init_groups(decoder);
  368. // Assign a group to the first node.
  369. group = alloc_group(decoder);
  370. decoder->nodes[0].group = (uint16_t) group;
  371. decoder->group_leader[group] = 0;
  372. // Assign a group number to each node, nodes having the same
  373. // group if the have the same frequency, and allocating new
  374. // groups when a new frequency is found.
  375. for (i = 1; i < NUM_TREE_NODES; ++i) {
  376. if (decoder->nodes[i].freq == decoder->nodes[i - 1].freq) {
  377. decoder->nodes[i].group = decoder->nodes[i - 1].group;
  378. } else {
  379. group = alloc_group(decoder);
  380. decoder->nodes[i].group = (uint16_t) group;
  381. // First node with a particular frequency is leader.
  382. decoder->group_leader[group] = (uint16_t) i;
  383. }
  384. }
  385. }
  386. // Increment the counter for the specific code, reordering the tree as
  387. // necessary.
  388. static void increment_for_code(LHALH1Decoder *decoder, uint16_t code)
  389. {
  390. uint16_t node_index;
  391. // When the limit is reached, we must reorder the code tree
  392. // to better match the code frequencies:
  393. if (decoder->nodes[0].freq >= TREE_REORDER_LIMIT) {
  394. reconstruct_tree(decoder);
  395. }
  396. ++decoder->nodes[0].freq;
  397. // Dynamically adjust the tree. Start from the leaf node of
  398. // the tree and walk back up, rearranging nodes to the root.
  399. node_index = decoder->leaf_nodes[code];
  400. while (node_index != 0) {
  401. // Shift the node to the left side of its group,
  402. // and bump the frequency count.
  403. node_index = make_group_leader(decoder, node_index);
  404. increment_node_freq(decoder, node_index);
  405. // Iterate up to the parent node.
  406. node_index = decoder->nodes[node_index].parent;
  407. }
  408. }
  409. // Read a code from the input stream.
  410. static int read_code(LHALH1Decoder *decoder, uint16_t *result)
  411. {
  412. unsigned int node_index;
  413. int bit;
  414. // Start from the root node, and traverse down until a leaf is
  415. // reached.
  416. node_index = 0;
  417. //printf("<root ");
  418. while (!decoder->nodes[node_index].leaf) {
  419. bit = read_bit(&decoder->bit_stream_reader);
  420. if (bit < 0) {
  421. return 0;
  422. }
  423. //printf("<%i>", bit);
  424. // Choose one of the two children depending on the
  425. // bit that was read.
  426. node_index = decoder->nodes[node_index].child_index
  427. - (unsigned int) bit;
  428. }
  429. *result = decoder->nodes[node_index].child_index;
  430. //printf(" -> %i!>\n", *result);
  431. increment_for_code(decoder, *result);
  432. return 1;
  433. }
  434. // Read an offset code from the input stream.
  435. static int read_offset(LHALH1Decoder *decoder, unsigned int *result)
  436. {
  437. unsigned int offset;
  438. int future, offset2;
  439. // The offset can be up to 8 bits long, but is likely not
  440. // that long. Use the lookup table to find the offset
  441. // and its length.
  442. future = peek_bits(&decoder->bit_stream_reader, 8);
  443. if (future < 0) {
  444. return 0;
  445. }
  446. offset = decoder->offset_lookup[future];
  447. // Skip past the offset bits and also read the following
  448. // lower-order bits.
  449. read_bits(&decoder->bit_stream_reader,
  450. decoder->offset_lengths[offset]);
  451. offset2 = read_bits(&decoder->bit_stream_reader, 6);
  452. if (offset2 < 0) {
  453. return 0;
  454. }
  455. *result = (offset << 6) | (unsigned int) offset2;
  456. return 1;
  457. }
  458. static void output_byte(LHALH1Decoder *decoder, uint8_t *buf,
  459. size_t *buf_len, uint8_t b)
  460. {
  461. buf[*buf_len] = b;
  462. ++*buf_len;
  463. decoder->ringbuf[decoder->ringbuf_pos] = b;
  464. decoder->ringbuf_pos = (decoder->ringbuf_pos + 1) % RING_BUFFER_SIZE;
  465. }
  466. static size_t lha_lh1_read(void *data, uint8_t *buf)
  467. {
  468. LHALH1Decoder *decoder = data;
  469. size_t result;
  470. uint16_t code;
  471. result = 0;
  472. // Read the next code from the input stream.
  473. if (!read_code(decoder, &code)) {
  474. return 0;
  475. }
  476. // The code either indicates a single byte to be output, or
  477. // it indicates that a block should be copied from the ring
  478. // buffer as it is a repeat of a sequence earlier in the
  479. // stream.
  480. if (code < 0x100) {
  481. output_byte(decoder, buf, &result, (uint8_t) code);
  482. } else {
  483. unsigned int count, start, i, pos, offset;
  484. // Read the offset into the history at which to start
  485. // copying.
  486. if (!read_offset(decoder, &offset)) {
  487. return 0;
  488. }
  489. count = code - 0x100U + COPY_THRESHOLD;
  490. start = decoder->ringbuf_pos - offset + RING_BUFFER_SIZE - 1;
  491. // Copy from history into output buffer:
  492. for (i = 0; i < count; ++i) {
  493. pos = (start + i) % RING_BUFFER_SIZE;
  494. output_byte(decoder, buf, &result,
  495. decoder->ringbuf[pos]);
  496. }
  497. }
  498. return result;
  499. }
  500. LHADecoderType lha_lh1_decoder = {
  501. lha_lh1_init,
  502. NULL,
  503. lha_lh1_read,
  504. sizeof(LHALH1Decoder),
  505. OUTPUT_BUFFER_SIZE,
  506. RING_BUFFER_SIZE
  507. };