1
0

unpack.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. #ifndef _RAR_UNPACK_
  2. #define _RAR_UNPACK_
  3. // Maximum allowed number of compressed bits processed in quick mode.
  4. #define MAX_QUICK_DECODE_BITS 10
  5. // Maximum number of filters per entire data block. Must be at least
  6. // twice more than MAX_PACK_FILTERS to store filters from two data blocks.
  7. #define MAX_UNPACK_FILTERS 8192
  8. // Maximum number of filters per entire data block for RAR3 unpack.
  9. // Must be at least twice more than v3_MAX_PACK_FILTERS to store filters
  10. // from two data blocks.
  11. #define MAX3_UNPACK_FILTERS 8192
  12. // Limit maximum number of channels in RAR3 delta filter to some reasonable
  13. // value to prevent too slow processing of corrupt archives with invalid
  14. // channels number. Must be equal or larger than v3_MAX_FILTER_CHANNELS.
  15. // No need to provide it for RAR5, which uses only 5 bits to store channels.
  16. #define MAX3_UNPACK_CHANNELS 1024
  17. // Maximum size of single filter block. We restrict it to limit memory
  18. // allocation. Must be equal or larger than MAX_ANALYZE_SIZE.
  19. #define MAX_FILTER_BLOCK_SIZE 0x400000
  20. // Write data in 4 MB or smaller blocks. Must not exceed PACK_MAX_READ,
  21. // so we keep the number of buffered filters in unpacker reasonable.
  22. #define UNPACK_MAX_WRITE 0x400000
  23. // Decode compressed bit fields to alphabet numbers.
  24. struct DecodeTable:PackDef
  25. {
  26. // Real size of DecodeNum table.
  27. uint MaxNum;
  28. // Left aligned start and upper limit codes defining code space
  29. // ranges for bit lengths. DecodeLen[BitLength-1] defines the start of
  30. // range for bit length and DecodeLen[BitLength] defines next code
  31. // after the end of range or in other words the upper limit code
  32. // for specified bit length.
  33. uint DecodeLen[16];
  34. // Every item of this array contains the sum of all preceding items.
  35. // So it contains the start position in code list for every bit length.
  36. uint DecodePos[16];
  37. // Number of compressed bits processed in quick mode.
  38. // Must not exceed MAX_QUICK_DECODE_BITS.
  39. uint QuickBits;
  40. // Translates compressed bits (up to QuickBits length)
  41. // to bit length in quick mode.
  42. byte QuickLen[1<<MAX_QUICK_DECODE_BITS];
  43. // Translates compressed bits (up to QuickBits length)
  44. // to position in alphabet in quick mode.
  45. // 'ushort' saves some memory and even provides a little speed gain
  46. // comparting to 'uint' here.
  47. ushort QuickNum[1<<MAX_QUICK_DECODE_BITS];
  48. // Translate the position in code list to position in alphabet.
  49. // We do not allocate it dynamically to avoid performance overhead
  50. // introduced by pointer, so we use the largest possible table size
  51. // as array dimension. Real size of this array is defined in MaxNum.
  52. // We use this array if compressed bit field is too lengthy
  53. // for QuickLen based translation.
  54. // 'ushort' saves some memory and even provides a little speed gain
  55. // comparting to 'uint' here.
  56. ushort DecodeNum[LARGEST_TABLE_SIZE];
  57. };
  58. struct UnpackBlockHeader
  59. {
  60. int BlockSize;
  61. int BlockBitSize;
  62. int BlockStart;
  63. int HeaderSize;
  64. bool LastBlockInFile;
  65. bool TablePresent;
  66. };
  67. struct UnpackBlockTables
  68. {
  69. DecodeTable LD; // Decode literals.
  70. DecodeTable DD; // Decode distances.
  71. DecodeTable LDD; // Decode lower bits of distances.
  72. DecodeTable RD; // Decode repeating distances.
  73. DecodeTable BD; // Decode bit lengths in Huffman table.
  74. };
  75. #ifdef RAR_SMP
  76. enum UNP_DEC_TYPE {
  77. UNPDT_LITERAL,UNPDT_MATCH,UNPDT_FULLREP,UNPDT_REP,UNPDT_FILTER
  78. };
  79. struct UnpackDecodedItem
  80. {
  81. UNP_DEC_TYPE Type;
  82. ushort Length;
  83. union
  84. {
  85. uint Distance;
  86. byte Literal[4];
  87. };
  88. };
  89. struct UnpackThreadData
  90. {
  91. Unpack *UnpackPtr;
  92. BitInput Inp;
  93. bool HeaderRead;
  94. UnpackBlockHeader BlockHeader;
  95. bool TableRead;
  96. UnpackBlockTables BlockTables;
  97. int DataSize; // Data left in buffer. Can be less than block size.
  98. bool DamagedData;
  99. bool LargeBlock;
  100. bool NoDataLeft; // 'true' if file is read completely.
  101. bool Incomplete; // Not entire block was processed, need to read more data.
  102. UnpackDecodedItem *Decoded;
  103. uint DecodedSize;
  104. uint DecodedAllocated;
  105. uint ThreadNumber; // For debugging.
  106. UnpackThreadData()
  107. :Inp(false)
  108. {
  109. Decoded=NULL;
  110. }
  111. ~UnpackThreadData()
  112. {
  113. if (Decoded!=NULL)
  114. free(Decoded);
  115. }
  116. };
  117. #endif
  118. struct UnpackFilter
  119. {
  120. byte Type;
  121. uint BlockStart;
  122. uint BlockLength;
  123. byte Channels;
  124. // uint Width;
  125. // byte PosR;
  126. bool NextWindow;
  127. };
  128. struct UnpackFilter30
  129. {
  130. unsigned int BlockStart;
  131. unsigned int BlockLength;
  132. bool NextWindow;
  133. // Position of parent filter in Filters array used as prototype for filter
  134. // in PrgStack array. Not defined for filters in Filters array.
  135. unsigned int ParentFilter;
  136. VM_PreparedProgram Prg;
  137. };
  138. struct AudioVariables // For RAR 2.0 archives only.
  139. {
  140. int K1,K2,K3,K4,K5;
  141. int D1,D2,D3,D4;
  142. int LastDelta;
  143. unsigned int Dif[11];
  144. unsigned int ByteCount;
  145. int LastChar;
  146. };
  147. // We can use the fragmented dictionary in case heap does not have the single
  148. // large enough memory block. It is slower than normal dictionary.
  149. class FragmentedWindow
  150. {
  151. private:
  152. enum {MAX_MEM_BLOCKS=32};
  153. void Reset();
  154. byte *Mem[MAX_MEM_BLOCKS];
  155. size_t MemSize[MAX_MEM_BLOCKS];
  156. public:
  157. FragmentedWindow();
  158. ~FragmentedWindow();
  159. void Init(size_t WinSize);
  160. byte& operator [](size_t Item);
  161. void CopyString(uint Length,uint Distance,size_t &UnpPtr,size_t MaxWinMask);
  162. void CopyData(byte *Dest,size_t WinPos,size_t Size);
  163. size_t GetBlockSize(size_t StartPos,size_t RequiredSize);
  164. };
  165. class Unpack:PackDef
  166. {
  167. private:
  168. void Unpack5(bool Solid);
  169. void Unpack5MT(bool Solid);
  170. bool UnpReadBuf();
  171. void UnpWriteBuf();
  172. byte* ApplyFilter(byte *Data,uint DataSize,UnpackFilter *Flt);
  173. void UnpWriteArea(size_t StartPtr,size_t EndPtr);
  174. void UnpWriteData(byte *Data,size_t Size);
  175. _forceinline uint SlotToLength(BitInput &Inp,uint Slot);
  176. void UnpInitData50(bool Solid);
  177. bool ReadBlockHeader(BitInput &Inp,UnpackBlockHeader &Header);
  178. bool ReadTables(BitInput &Inp,UnpackBlockHeader &Header,UnpackBlockTables &Tables);
  179. void MakeDecodeTables(byte *LengthTable,DecodeTable *Dec,uint Size);
  180. _forceinline uint DecodeNumber(BitInput &Inp,DecodeTable *Dec);
  181. void CopyString();
  182. inline void InsertOldDist(unsigned int Distance);
  183. void UnpInitData(bool Solid);
  184. _forceinline void CopyString(uint Length,uint Distance);
  185. uint ReadFilterData(BitInput &Inp);
  186. bool ReadFilter(BitInput &Inp,UnpackFilter &Filter);
  187. bool AddFilter(UnpackFilter &Filter);
  188. bool AddFilter();
  189. void InitFilters();
  190. ComprDataIO *UnpIO;
  191. BitInput Inp;
  192. #ifdef RAR_SMP
  193. void InitMT();
  194. bool UnpackLargeBlock(UnpackThreadData &D);
  195. bool ProcessDecoded(UnpackThreadData &D);
  196. ThreadPool *UnpThreadPool;
  197. UnpackThreadData *UnpThreadData;
  198. uint MaxUserThreads;
  199. byte *ReadBufMT;
  200. #endif
  201. Array<byte> FilterSrcMemory;
  202. Array<byte> FilterDstMemory;
  203. // Filters code, one entry per filter.
  204. Array<UnpackFilter> Filters;
  205. uint OldDist[4],OldDistPtr;
  206. uint LastLength;
  207. // LastDist is necessary only for RAR2 and older with circular OldDist
  208. // array. In RAR3 last distance is always stored in OldDist[0].
  209. uint LastDist;
  210. size_t UnpPtr,WrPtr;
  211. // Top border of read packed data.
  212. int ReadTop;
  213. // Border to call UnpReadBuf. We use it instead of (ReadTop-C)
  214. // for optimization reasons. Ensures that we have C bytes in buffer
  215. // unless we are at the end of file.
  216. int ReadBorder;
  217. UnpackBlockHeader BlockHeader;
  218. UnpackBlockTables BlockTables;
  219. size_t WriteBorder;
  220. byte *Window;
  221. FragmentedWindow FragWindow;
  222. bool Fragmented;
  223. int64 DestUnpSize;
  224. bool Suspended;
  225. bool UnpAllBuf;
  226. bool UnpSomeRead;
  227. int64 WrittenFileSize;
  228. bool FileExtracted;
  229. /***************************** Unpack v 1.5 *********************************/
  230. void Unpack15(bool Solid);
  231. void ShortLZ();
  232. void LongLZ();
  233. void HuffDecode();
  234. void GetFlagsBuf();
  235. void UnpInitData15(int Solid);
  236. void InitHuff();
  237. void CorrHuff(ushort *CharSet,byte *NumToPlace);
  238. void CopyString15(uint Distance,uint Length);
  239. uint DecodeNum(uint Num,uint StartPos,uint *DecTab,uint *PosTab);
  240. ushort ChSet[256],ChSetA[256],ChSetB[256],ChSetC[256];
  241. byte NToPl[256],NToPlB[256],NToPlC[256];
  242. uint FlagBuf,AvrPlc,AvrPlcB,AvrLn1,AvrLn2,AvrLn3;
  243. int Buf60,NumHuf,StMode,LCount,FlagsCnt;
  244. uint Nhfb,Nlzb,MaxDist3;
  245. /***************************** Unpack v 1.5 *********************************/
  246. /***************************** Unpack v 2.0 *********************************/
  247. void Unpack20(bool Solid);
  248. DecodeTable MD[4]; // Decode multimedia data, up to 4 channels.
  249. unsigned char UnpOldTable20[MC20*4];
  250. bool UnpAudioBlock;
  251. uint UnpChannels,UnpCurChannel;
  252. int UnpChannelDelta;
  253. void CopyString20(uint Length,uint Distance);
  254. bool ReadTables20();
  255. void UnpWriteBuf20();
  256. void UnpInitData20(int Solid);
  257. void ReadLastTables();
  258. byte DecodeAudio(int Delta);
  259. struct AudioVariables AudV[4];
  260. /***************************** Unpack v 2.0 *********************************/
  261. /***************************** Unpack v 3.0 *********************************/
  262. enum BLOCK_TYPES {BLOCK_LZ,BLOCK_PPM};
  263. void UnpInitData30(bool Solid);
  264. void Unpack29(bool Solid);
  265. void InitFilters30(bool Solid);
  266. bool ReadEndOfBlock();
  267. bool ReadVMCode();
  268. bool ReadVMCodePPM();
  269. bool AddVMCode(uint FirstByte,byte *Code,uint CodeSize);
  270. int SafePPMDecodeChar();
  271. bool ReadTables30();
  272. bool UnpReadBuf30();
  273. void UnpWriteBuf30();
  274. void ExecuteCode(VM_PreparedProgram *Prg);
  275. int PrevLowDist,LowDistRepCount;
  276. ModelPPM PPM;
  277. int PPMEscChar;
  278. byte UnpOldTable[HUFF_TABLE_SIZE30];
  279. int UnpBlockType;
  280. // If we already read decoding tables for Unpack v2,v3,v5.
  281. // We should not use a single variable for all algorithm versions,
  282. // because we can have a corrupt archive with one algorithm file
  283. // followed by another algorithm file with "solid" flag and we do not
  284. // want to reuse tables from one algorithm in another.
  285. bool TablesRead2,TablesRead3,TablesRead5;
  286. // Virtual machine to execute filters code.
  287. RarVM VM;
  288. // Buffer to read VM filters code. We moved it here from AddVMCode
  289. // function to reduce time spent in BitInput constructor.
  290. BitInput VMCodeInp;
  291. // Filters code, one entry per filter.
  292. Array<UnpackFilter30 *> Filters30;
  293. // Filters stack, several entrances of same filter are possible.
  294. Array<UnpackFilter30 *> PrgStack;
  295. // Lengths of preceding data blocks, one length of one last block
  296. // for every filter. Used to reduce the size required to write
  297. // the data block length if lengths are repeating.
  298. Array<int> OldFilterLengths;
  299. int LastFilter;
  300. /***************************** Unpack v 3.0 *********************************/
  301. public:
  302. Unpack(ComprDataIO *DataIO);
  303. ~Unpack();
  304. void Init(size_t WinSize,bool Solid);
  305. void DoUnpack(uint Method,bool Solid);
  306. bool IsFileExtracted() {return(FileExtracted);}
  307. void SetDestSize(int64 DestSize) {DestUnpSize=DestSize;FileExtracted=false;}
  308. void SetSuspended(bool Suspended) {Unpack::Suspended=Suspended;}
  309. #ifdef RAR_SMP
  310. void SetThreads(uint Threads);
  311. void UnpackDecode(UnpackThreadData &D);
  312. #endif
  313. size_t MaxWinSize;
  314. size_t MaxWinMask;
  315. uint GetChar()
  316. {
  317. if (Inp.InAddr>BitInput::MAX_SIZE-30)
  318. {
  319. UnpReadBuf();
  320. if (Inp.InAddr>=BitInput::MAX_SIZE) // If nothing was read.
  321. return 0;
  322. }
  323. return Inp.InBuf[Inp.InAddr++];
  324. }
  325. };
  326. #endif