LZCBDecompressor.cpp 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. /* Copyright (C) Teemu Suutari */
  2. #include <array>
  3. #include "LZCBDecompressor.hpp"
  4. #include "RangeDecoder.hpp"
  5. #include "InputStream.hpp"
  6. #include "OutputStream.hpp"
  7. #include "common/Common.hpp"
  8. namespace ancient::internal
  9. {
  10. template<size_t T>
  11. class FrequencyTree
  12. {
  13. public:
  14. FrequencyTree()
  15. {
  16. for (uint32_t i=0;i<_size;i++)
  17. _tree[i]=0;
  18. }
  19. ~FrequencyTree()
  20. {
  21. // nothing needed
  22. }
  23. uint16_t decode(uint16_t value,uint16_t &low,uint16_t &freq) const
  24. {
  25. if (value>=_tree[_size-1])
  26. throw Decompressor::DecompressionError();
  27. uint16_t symbol=0;
  28. low=0;
  29. for (uint32_t i=_levels-2;;i--)
  30. {
  31. uint16_t tmp=_tree[_levelOffsets[i]+symbol];
  32. if (uint32_t(symbol+1)<_levelSizes[i] && value>=tmp)
  33. {
  34. symbol++;
  35. low+=tmp;
  36. value-=tmp;
  37. }
  38. if (!i) break;
  39. symbol<<=1;
  40. }
  41. freq=_tree[symbol];
  42. return symbol;
  43. }
  44. bool exists(uint16_t symbol) const
  45. {
  46. return _tree[symbol];
  47. }
  48. void increment(uint16_t symbol)
  49. {
  50. for (uint16_t i=0;i<_levels;i++)
  51. {
  52. _tree[_levelOffsets[i]+symbol]++;
  53. symbol>>=1;
  54. }
  55. }
  56. void halve()
  57. {
  58. // non-standard way
  59. for (uint32_t i=0;i<T;i++)
  60. _tree[i]>>=1;
  61. for (uint32_t i=T;i<_size;i++)
  62. _tree[i]=0;
  63. for (uint32_t i=0,length=T;i<_levels-1;i++,length=(length+1)>>1)
  64. {
  65. for (uint32_t j=0;j<length;j++)
  66. _tree[_levelOffsets[i+1]+(j>>1)]+=_tree[_levelOffsets[i]+j];
  67. }
  68. }
  69. uint32_t getTotal() const
  70. {
  71. return _tree[_size-1];
  72. }
  73. private:
  74. static constexpr uint32_t levelSize(uint32_t level)
  75. {
  76. uint32_t ret=T;
  77. for (uint32_t i=0;i<level;i++)
  78. {
  79. ret=(ret+1)>>1;
  80. }
  81. return ret;
  82. }
  83. static constexpr uint32_t levels()
  84. {
  85. uint32_t ret=0;
  86. while (levelSize(ret)!=1) ret++;
  87. return ret+1;
  88. }
  89. static constexpr uint32_t size()
  90. {
  91. uint32_t ret=0;
  92. for (uint32_t i=0;i<levels();i++)
  93. ret+=levelSize(i);
  94. return ret;
  95. }
  96. static constexpr uint32_t levelOffset(uint32_t level)
  97. {
  98. uint32_t ret=0;
  99. for (uint32_t i=0;i<level;i++)
  100. ret+=levelSize(i);
  101. return ret;
  102. }
  103. template<uint32_t... I>
  104. static constexpr auto makeLevelOffsetSequence(std::integer_sequence<uint32_t,I...>)
  105. {
  106. return std::integer_sequence<uint32_t,levelOffset(I)...>{};
  107. }
  108. template<uint32_t... I>
  109. static constexpr auto makeLevelSizeSequence(std::integer_sequence<uint32_t,I...>)
  110. {
  111. return std::integer_sequence<uint32_t,levelSize(I)...>{};
  112. }
  113. template<uint32_t... I>
  114. static constexpr std::array<uint32_t,sizeof...(I)> makeArray(std::integer_sequence<uint32_t,I...>)
  115. {
  116. return std::array<uint32_t,sizeof...(I)>{{I...}};
  117. }
  118. static constexpr uint32_t _size=size();
  119. static constexpr uint32_t _levels=levels();
  120. static constexpr std::array<uint32_t,_levels> _levelOffsets=makeArray(makeLevelOffsetSequence(std::make_integer_sequence<uint32_t,levels()>{}));
  121. static constexpr std::array<uint32_t,_levels> _levelSizes=makeArray(makeLevelSizeSequence(std::make_integer_sequence<uint32_t,levels()>{}));
  122. uint16_t _tree[size()];
  123. };
  124. template<size_t T>
  125. class FrequencyDecoder
  126. {
  127. public:
  128. FrequencyDecoder(RangeDecoder &decoder) :
  129. _decoder(decoder)
  130. {
  131. // nothing needed
  132. }
  133. ~FrequencyDecoder()
  134. {
  135. // nothing needed
  136. }
  137. template<typename F>
  138. uint16_t decode(F readFunc)
  139. {
  140. uint16_t freq=0,symbol,value=_decoder.decode(_threshold+_tree.getTotal());
  141. if (value>=_threshold)
  142. {
  143. uint16_t low;
  144. symbol=_tree.decode(value-_threshold,low,freq);
  145. _decoder.scale(_threshold+low,_threshold+low+freq,_threshold+_tree.getTotal());
  146. if (freq==1 && _threshold>1)
  147. _threshold--;
  148. } else {
  149. _decoder.scale(0,_threshold,_threshold+_tree.getTotal());
  150. symbol=readFunc();
  151. // A bug in the encoder
  152. if (!symbol && _tree.exists(symbol)) symbol=T;
  153. _threshold++;
  154. }
  155. _tree.increment(symbol);
  156. if (_threshold+_tree.getTotal()>=0x3ffdU)
  157. {
  158. _tree.halve();
  159. _threshold=(_threshold>>1)+1;
  160. }
  161. return symbol;
  162. }
  163. private:
  164. RangeDecoder &_decoder;
  165. FrequencyTree<T+1> _tree;
  166. uint16_t _threshold=1;
  167. };
  168. bool LZCBDecompressor::detectHeaderXPK(uint32_t hdr) noexcept
  169. {
  170. return hdr==FourCC("LZCB");
  171. }
  172. std::shared_ptr<XPKDecompressor> LZCBDecompressor::create(uint32_t hdr,uint32_t recursionLevel,const Buffer &packedData,std::shared_ptr<XPKDecompressor::State> &state,bool verify)
  173. {
  174. return std::make_shared<LZCBDecompressor>(hdr,recursionLevel,packedData,state,verify);
  175. }
  176. LZCBDecompressor::LZCBDecompressor(uint32_t hdr,uint32_t recursionLevel,const Buffer &packedData,std::shared_ptr<XPKDecompressor::State> &state,bool verify) :
  177. XPKDecompressor(recursionLevel),
  178. _packedData(packedData)
  179. {
  180. if (packedData.size()<2) throw Decompressor::InvalidFormatError();
  181. }
  182. LZCBDecompressor::~LZCBDecompressor()
  183. {
  184. // nothing needed
  185. }
  186. const std::string &LZCBDecompressor::getSubName() const noexcept
  187. {
  188. static std::string name="XPK-LZCB: LZ-compressor";
  189. return name;
  190. }
  191. void LZCBDecompressor::decompressImpl(Buffer &rawData,const Buffer &previousData,bool verify)
  192. {
  193. class BitReader : public RangeDecoder::BitReader
  194. {
  195. public:
  196. BitReader(ForwardInputStream &stream) :
  197. _reader(stream)
  198. {
  199. // nothing needed
  200. }
  201. virtual ~BitReader()
  202. {
  203. // nothing needed
  204. }
  205. virtual uint32_t readBit() override final
  206. {
  207. return _reader.readBitsBE32(1);
  208. }
  209. uint32_t readBits(uint32_t bitCount)
  210. {
  211. return _reader.readBitsBE32(bitCount);
  212. }
  213. private:
  214. MSBBitReader<ForwardInputStream> _reader;
  215. };
  216. ForwardInputStream inputStream(_packedData,0,_packedData.size(),true);
  217. ForwardOutputStream outputStream(rawData,0,rawData.size());
  218. BitReader bitReader(inputStream);
  219. RangeDecoder rangeDecoder(bitReader,bitReader.readBits(16));
  220. // Ugly duplicates
  221. auto readByte=[&]()->uint16_t
  222. {
  223. uint16_t ret=rangeDecoder.decode(0x100U);
  224. rangeDecoder.scale(ret,ret+1,0x100U);
  225. return ret;
  226. };
  227. auto readCount=[&]()->uint16_t
  228. {
  229. uint16_t ret=rangeDecoder.decode(0x101U);
  230. rangeDecoder.scale(ret,ret+1,0x101U);
  231. return ret;
  232. };
  233. FrequencyDecoder<256> baseLiteralDecoder(rangeDecoder);
  234. FrequencyDecoder<257> repeatCountDecoder(rangeDecoder);
  235. FrequencyDecoder<257> literalCountDecoder(rangeDecoder);
  236. FrequencyDecoder<256> distanceDecoder(rangeDecoder);
  237. std::unique_ptr<FrequencyDecoder<256>> literalDecoders[256];
  238. uint8_t ch=uint8_t(baseLiteralDecoder.decode(readByte));
  239. outputStream.writeByte(ch);
  240. bool lastIsLiteral=true;
  241. while (!outputStream.eof())
  242. {
  243. uint32_t count=repeatCountDecoder.decode(readCount);
  244. if (count)
  245. {
  246. if (count==0x100U)
  247. {
  248. uint32_t tmp;
  249. do
  250. {
  251. tmp=readByte();
  252. count+=tmp;
  253. } while (tmp==0xffU);
  254. }
  255. count+=lastIsLiteral?5:4;
  256. uint32_t distance=distanceDecoder.decode(readByte)<<8;
  257. distance|=readByte();
  258. ch=outputStream.copy(distance,count);
  259. lastIsLiteral=false;
  260. } else {
  261. uint16_t literalCount;
  262. do
  263. {
  264. literalCount=literalCountDecoder.decode(readCount);
  265. if (!literalCount) throw Decompressor::DecompressionError();
  266. for (uint32_t i=0;i<literalCount;i++)
  267. {
  268. auto &literalDecoder=literalDecoders[ch];
  269. if (!literalDecoder) literalDecoder=std::make_unique<FrequencyDecoder<256>>(rangeDecoder);
  270. ch=uint8_t(literalDecoder->decode([&]()
  271. {
  272. return baseLiteralDecoder.decode(readByte);
  273. }));
  274. outputStream.writeByte(ch);
  275. }
  276. } while (literalCount==0x100U);
  277. lastIsLiteral=true;
  278. }
  279. }
  280. }
  281. }