lha_file_header.c 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081
  1. /*
  2. Copyright (c) 2011, 2012, Simon Howard
  3. Permission to use, copy, modify, and/or distribute this software
  4. for any purpose with or without fee is hereby granted, provided
  5. that the above copyright notice and this permission notice appear
  6. in all copies.
  7. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
  8. WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
  9. WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
  10. AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
  11. CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
  12. LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
  13. NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  14. CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <stdlib.h>
  17. #include <string.h>
  18. #include <ctype.h>
  19. #include <time.h>
  20. #include "lha_endian.h"
  21. #include "lha_file_header.h"
  22. #include "ext_header.h"
  23. #include "crc16.h"
  24. #define COMMON_HEADER_LEN 22 /* bytes */
  25. // Minimum length of a level 0 header (with zero-length filename).
  26. #define LEVEL_0_MIN_HEADER_LEN 22 /* bytes */
  27. // Minimum length of a level 1 base header (with zero-length filename).
  28. #define LEVEL_1_MIN_HEADER_LEN 25 /* bytes */
  29. // Length of a level 2 base header.
  30. #define LEVEL_2_HEADER_LEN 26 /* bytes */
  31. // Length of a level 3 base header.
  32. #define LEVEL_3_HEADER_LEN 32 /* bytes */
  33. // Maximum length of a level 3 header (including extended headers).
  34. #define LEVEL_3_MAX_HEADER_LEN (1024 * 1024) /* 1 MB */
  35. // Length of a level 0 Unix extended area.
  36. #define LEVEL_0_UNIX_EXTENDED_LEN 12 /* bytes */
  37. // Length of a level 0 OS-9 extended area.
  38. #define LEVEL_0_OS9_EXTENDED_LEN 22 /* bytes */
  39. #define RAW_DATA(hdr_ptr, off) ((*hdr_ptr)->raw_data[off])
  40. #define RAW_DATA_LEN(hdr_ptr) ((*hdr_ptr)->raw_data_len)
  41. char *lha_file_header_full_path(LHAFileHeader *header)
  42. {
  43. char *path;
  44. char *filename;
  45. char *result;
  46. if (header->path != NULL) {
  47. path = header->path;
  48. } else {
  49. path = "";
  50. }
  51. if (header->filename != NULL) {
  52. filename = header->filename;
  53. } else {
  54. filename = "";
  55. }
  56. result = malloc(strlen(path) + strlen(filename) + 1);
  57. if (result == NULL) {
  58. return NULL;
  59. }
  60. sprintf(result, "%s%s", path, filename);
  61. return result;
  62. }
  63. /**
  64. * Given a file header with the filename set, split it into separate
  65. * path and filename components, if necessary.
  66. *
  67. * @param header Point to the file header structure.
  68. * @return Non-zero for success, or zero for failure.
  69. */
  70. static int split_header_filename(LHAFileHeader *header)
  71. {
  72. char *sep;
  73. char *new_filename;
  74. // Is there a directory separator in the path? If so, we need to
  75. // split into directory name and filename.
  76. sep = strrchr(header->filename, '/');
  77. if (sep != NULL) {
  78. new_filename = strdup(sep + 1);
  79. if (new_filename == NULL) {
  80. return 0;
  81. }
  82. *(sep + 1) = '\0';
  83. header->path = header->filename;
  84. header->filename = new_filename;
  85. }
  86. return 1;
  87. }
  88. // Perform checksum of header contents.
  89. static int check_l0_checksum(uint8_t *header, size_t header_len, size_t csum)
  90. {
  91. unsigned int result;
  92. unsigned int i;
  93. result = 0;
  94. for (i = 0; i < header_len; ++i) {
  95. result += header[i];
  96. }
  97. return (result & 0xff) == csum;
  98. }
  99. // Perform full-header CRC check, based on CRC from "common" extended header.
  100. static int check_common_crc(LHAFileHeader *header)
  101. {
  102. uint16_t crc;
  103. crc = 0;
  104. lha_crc16_buf(&crc, header->raw_data, header->raw_data_len);
  105. return crc == header->common_crc;
  106. }
  107. // Decode MS-DOS timestamp.
  108. static unsigned int decode_ftime(uint8_t *buf)
  109. {
  110. int raw;
  111. struct tm datetime;
  112. raw = (int) lha_decode_uint32(buf);
  113. if (raw == 0) {
  114. return 0;
  115. }
  116. // Deconstruct the contents of the MS-DOS time value and populate the
  117. // 'datetime' structure. Note that 'mktime' generates a timestamp for
  118. // the local time zone: this is unfortunate, but probably the best
  119. // that can be done, due to the limited data stored in MS-DOS time
  120. // values.
  121. memset(&datetime, 0, sizeof(struct tm));
  122. datetime.tm_sec = (raw << 1) & 0x3e;
  123. datetime.tm_min = (raw >> 5) & 0x3f;
  124. datetime.tm_hour = (raw >> 11) & 0x1f;
  125. datetime.tm_mday = (raw >> 16) & 0x1f;
  126. datetime.tm_mon = ((raw >> 21) & 0xf) - 1;
  127. datetime.tm_year = 80 + ((raw >> 25) & 0x7f);
  128. datetime.tm_wday = 0;
  129. datetime.tm_yday = 0;
  130. datetime.tm_isdst = -1;
  131. return (unsigned int) mktime(&datetime);
  132. }
  133. // MS-DOS archives (and archives from similar systems) may have paths and
  134. // filenames that are in all-caps. Detect these and convert them to
  135. // lower-case.
  136. static void fix_msdos_allcaps(LHAFileHeader *header)
  137. {
  138. unsigned int i;
  139. int is_allcaps;
  140. // Check both path and filename to see if there are any lower-case
  141. // characters.
  142. is_allcaps = 1;
  143. if (header->path != NULL) {
  144. for (i = 0; header->path[i] != '\0'; ++i) {
  145. if (islower((unsigned) header->path[i])) {
  146. is_allcaps = 0;
  147. break;
  148. }
  149. }
  150. }
  151. if (is_allcaps && header->filename != NULL) {
  152. for (i = 0; header->filename[i] != '\0'; ++i) {
  153. if (islower((unsigned) header->filename[i])) {
  154. is_allcaps = 0;
  155. break;
  156. }
  157. }
  158. }
  159. // If both are all-caps, convert them all to lower-case.
  160. if (is_allcaps) {
  161. if (header->path != NULL) {
  162. for (i = 0; header->path[i] != '\0'; ++i) {
  163. header->path[i]
  164. = tolower((unsigned) header->path[i]);
  165. }
  166. }
  167. if (header->filename != NULL) {
  168. for (i = 0; header->filename[i] != '\0'; ++i) {
  169. header->filename[i]
  170. = tolower((unsigned) header->filename[i]);
  171. }
  172. }
  173. }
  174. }
  175. // Process the OS-9 permissions field and translate into the equivalent
  176. // Unix permissions.
  177. static void os9_to_unix_permissions(LHAFileHeader *header)
  178. {
  179. unsigned int or, ow, oe, pr, pw, pe, d;
  180. // Translate into equivalent Unix permissions. OS-9 just has
  181. // owner and public, so double up public for the owner field.
  182. or = (header->os9_perms & 0x01) != 0;
  183. ow = (header->os9_perms & 0x02) != 0;
  184. oe = (header->os9_perms & 0x04) != 0;
  185. pr = (header->os9_perms & 0x08) != 0;
  186. pw = (header->os9_perms & 0x10) != 0;
  187. pe = (header->os9_perms & 0x20) != 0;
  188. d = (header->os9_perms & 0x80) != 0;
  189. header->extra_flags |= LHA_FILE_UNIX_PERMS;
  190. header->unix_perms = (d << 14)
  191. | (or << 8) | (ow << 7) | (oe << 6) // owner
  192. | (pr << 5) | (pw << 4) | (pe << 3) // group
  193. | (pr << 2) | (pw << 1) | (pe << 0); // everyone
  194. }
  195. // Parse a Unix symbolic link. These are stored in the format:
  196. // filename = symlink|target
  197. static int parse_symlink(LHAFileHeader *header)
  198. {
  199. char *fullpath;
  200. char *p;
  201. // Although the format is always the same, some files have
  202. // symlink headers where the path is split between the path
  203. // and filename headers. For example:
  204. // path = etc|../../
  205. // filename = etc
  206. fullpath = lha_file_header_full_path(header);
  207. if (fullpath == NULL) {
  208. return 0;
  209. }
  210. p = strchr(fullpath, '|');
  211. if (p == NULL) {
  212. free(fullpath);
  213. return 0;
  214. }
  215. header->symlink_target = strdup(p + 1);
  216. if (header->symlink_target == NULL) {
  217. free(fullpath);
  218. return 0;
  219. }
  220. // Cut the string in half at the separator. Keep the left side
  221. // as the value for filename.
  222. *p = '\0';
  223. free(header->path);
  224. free(header->filename);
  225. header->path = NULL;
  226. header->filename = fullpath;
  227. // Having joined path and filename together during processing,
  228. // we now have the opposite problem: header->filename might
  229. // contain a full path rather than just a filename. Split back
  230. // into two again.
  231. return split_header_filename(header);
  232. }
  233. // Decode the path field in the header.
  234. static int process_level0_path(LHAFileHeader *header, uint8_t *data,
  235. size_t data_len)
  236. {
  237. unsigned int i;
  238. // Zero-length filename probably means that this is a directory
  239. // entry. Leave the filename field as NULL - this makes us
  240. // consistent with level 2/3 headers.
  241. if (data_len == 0) {
  242. return 1;
  243. }
  244. header->filename = malloc(data_len + 1);
  245. if (header->filename == NULL) {
  246. return 0;
  247. }
  248. memcpy(header->filename, data, data_len);
  249. header->filename[data_len] = '\0';
  250. // Convert MS-DOS path separators to Unix path separators.
  251. for (i = 0; i < data_len; ++i) {
  252. if (header->filename[i] == '\\') {
  253. header->filename[i] = '/';
  254. }
  255. }
  256. return split_header_filename(header);
  257. }
  258. // Read some more data from the input stream, extending the raw_data
  259. // array (and the size of the header).
  260. static uint8_t *extend_raw_data(LHAFileHeader **header,
  261. LHAInputStream *stream,
  262. size_t nbytes)
  263. {
  264. LHAFileHeader *new_header;
  265. size_t new_raw_len;
  266. uint8_t *result;
  267. if (nbytes > LEVEL_3_MAX_HEADER_LEN) {
  268. return NULL;
  269. }
  270. // Reallocate the header and raw_data area to be larger.
  271. new_raw_len = RAW_DATA_LEN(header) + nbytes;
  272. new_header = realloc(*header, sizeof(LHAFileHeader) + new_raw_len);
  273. if (new_header == NULL) {
  274. return NULL;
  275. }
  276. // Update the header pointer to point to the new area.
  277. *header = new_header;
  278. new_header->raw_data = (uint8_t *) (new_header + 1);
  279. result = new_header->raw_data + new_header->raw_data_len;
  280. // Read data from stream into new area.
  281. if (!lha_input_stream_read(stream, result, nbytes)) {
  282. return NULL;
  283. }
  284. new_header->raw_data_len = new_raw_len;
  285. return result;
  286. }
  287. // Starting at the specified offset in the raw_data array, walk
  288. // through the list of extended headers and parse them.
  289. static int decode_extended_headers(LHAFileHeader **header,
  290. unsigned int offset)
  291. {
  292. unsigned int field_size;
  293. uint8_t *ext_header;
  294. size_t ext_header_len;
  295. size_t available_length;
  296. // Level 3 headers use 32-bit length fields; all others use
  297. // 16-bit fields.
  298. if ((*header)->header_level == 3) {
  299. field_size = 4;
  300. } else {
  301. field_size = 2;
  302. }
  303. available_length = RAW_DATA_LEN(header) - offset - field_size;
  304. while (offset <= RAW_DATA_LEN(header) - field_size) {
  305. ext_header = &RAW_DATA(header, offset + field_size);
  306. if (field_size == 4) {
  307. ext_header_len
  308. = lha_decode_uint32(&RAW_DATA(header, offset));
  309. } else {
  310. ext_header_len
  311. = lha_decode_uint16(&RAW_DATA(header, offset));
  312. }
  313. // Header length zero indicates end of chain. Otherwise, sanity
  314. // check the header length is valid.
  315. if (ext_header_len == 0) {
  316. break;
  317. } else if (ext_header_len < field_size + 1
  318. || ext_header_len > available_length) {
  319. return 0;
  320. }
  321. // Process header:
  322. lha_ext_header_decode(*header, ext_header[0], ext_header + 1,
  323. ext_header_len - field_size - 1);
  324. // Advance to next header.
  325. offset += ext_header_len;
  326. available_length -= ext_header_len;
  327. }
  328. return 1;
  329. }
  330. static int read_next_ext_header(LHAFileHeader **header,
  331. LHAInputStream *stream,
  332. uint8_t **ext_header,
  333. size_t *ext_header_len)
  334. {
  335. // Last two bytes of the header raw data contain the size
  336. // of the next header.
  337. *ext_header_len
  338. = lha_decode_uint16(&RAW_DATA(header, RAW_DATA_LEN(header) - 2));
  339. // No more headers?
  340. if (*ext_header_len == 0) {
  341. *ext_header = NULL;
  342. return 1;
  343. }
  344. *ext_header = extend_raw_data(header, stream, *ext_header_len);
  345. return *ext_header != NULL;
  346. }
  347. // Read extended headers for a level 1 header, extending the
  348. // raw_data block to include them.
  349. static int read_l1_extended_headers(LHAFileHeader **header,
  350. LHAInputStream *stream)
  351. {
  352. uint8_t *ext_header;
  353. size_t ext_header_len;
  354. for (;;) {
  355. // Try to read the next header.
  356. if (!read_next_ext_header(header, stream,
  357. &ext_header, &ext_header_len)) {
  358. return 0;
  359. }
  360. // Last header?
  361. if (ext_header_len == 0) {
  362. break;
  363. }
  364. // For backwards compatibility with level 0 headers,
  365. // the compressed length field is actually "compressed
  366. // length + length of all extended headers":
  367. if ((*header)->compressed_length < ext_header_len) {
  368. return 0;
  369. }
  370. (*header)->compressed_length -= ext_header_len;
  371. // Must be at least 3 bytes - 1 byte header type
  372. // + 2 bytes for next header length
  373. if (ext_header_len < 3) {
  374. return 0;
  375. }
  376. }
  377. return 1;
  378. }
  379. // Process a level 0 Unix extended area.
  380. static void process_level0_unix_area(LHAFileHeader *header,
  381. uint8_t *data, size_t data_len)
  382. {
  383. // A typical Unix extended area:
  384. //
  385. // 00000000 55 00 00 3b 3d 4b 80 81 e8 03 e8 03
  386. // Sanity check.
  387. if (data_len < LEVEL_0_UNIX_EXTENDED_LEN || data[1] != 0x00) {
  388. return;
  389. }
  390. // OS-9/68k generates an extended area that is broadly compatible
  391. // with the Unix one.
  392. // Fill in the header fields from the data from the extended area.
  393. // There's one minor point to note here: OS-9/68k LHA includes the
  394. // timestamp twice - I have no idea why. In order to support both
  395. // variants, read the end fields from the end of the extended area.
  396. header->os_type = data[0];
  397. header->timestamp = lha_decode_uint32(data + 2);
  398. header->unix_perms = lha_decode_uint16(data + data_len - 6);
  399. header->unix_uid = lha_decode_uint16(data + data_len - 4);
  400. header->unix_gid = lha_decode_uint16(data + data_len - 2);
  401. header->extra_flags |= LHA_FILE_UNIX_PERMS | LHA_FILE_UNIX_UID_GID;
  402. }
  403. // Process a level 0 OS-9 extended area.
  404. static void process_level0_os9_area(LHAFileHeader *header,
  405. uint8_t *data, size_t data_len)
  406. {
  407. // A typical OS-9 extended area:
  408. //
  409. // 00000000 39 13 00 00 c3 16 00 0f 00 cc 18 07 09 03 01 16
  410. // 00000010 00 13 00 00 00 00
  411. // Sanity checks:
  412. if (data_len < LEVEL_0_OS9_EXTENDED_LEN
  413. || data[9] != 0xcc || data[1] != data[17] || data[2] != data[18]) {
  414. return;
  415. }
  416. // The contents resemble the contents of the OS-9 extended header.
  417. // We just want the permissions field.
  418. header->os_type = LHA_OS_TYPE_OS9;
  419. header->os9_perms = lha_decode_uint16(data + 1);
  420. header->extra_flags |= LHA_FILE_OS9_PERMS;
  421. }
  422. // Handling for level 0 extended areas.
  423. static void process_level0_extended_area(LHAFileHeader *header,
  424. uint8_t *data, size_t data_len)
  425. {
  426. // PMarc archives can include comments that are stored in the
  427. // extended area. It is possible that this could conflict with
  428. // the logic below, so specifically exclude them.
  429. if (!strncmp(header->compress_method, "-pm", 3)) {
  430. return;
  431. }
  432. // Different tools include different extended areas. Try to
  433. // identify which tool generated this one, based on the first
  434. // byte.
  435. switch (data[0]) {
  436. case LHA_OS_TYPE_UNIX:
  437. case LHA_OS_TYPE_OS9_68K:
  438. process_level0_unix_area(header, data, data_len);
  439. break;
  440. case LHA_OS_TYPE_OS9:
  441. process_level0_os9_area(header, data, data_len);
  442. break;
  443. default:
  444. break;
  445. }
  446. }
  447. // Decode a level 0 or 1 header.
  448. static int decode_level0_header(LHAFileHeader **header, LHAInputStream *stream)
  449. {
  450. uint8_t header_len;
  451. uint8_t header_csum;
  452. size_t path_len;
  453. size_t min_len;
  454. header_len = RAW_DATA(header, 0);
  455. header_csum = RAW_DATA(header, 1);
  456. // Sanity check header length. This is the minimum header length
  457. // for a header that has a zero-length path.
  458. switch ((*header)->header_level) {
  459. case 0:
  460. min_len = LEVEL_0_MIN_HEADER_LEN;
  461. break;
  462. case 1:
  463. min_len = LEVEL_1_MIN_HEADER_LEN;
  464. break;
  465. default:
  466. return 0;
  467. }
  468. if (header_len < min_len) {
  469. return 0;
  470. }
  471. // We only have a partial header so far. Read the full header.
  472. if (!extend_raw_data(header, stream,
  473. header_len + 2 - RAW_DATA_LEN(header))) {
  474. return 0;
  475. }
  476. // Checksum the header.
  477. if (!check_l0_checksum(&RAW_DATA(header, 2),
  478. RAW_DATA_LEN(header) - 2,
  479. header_csum)) {
  480. return 0;
  481. }
  482. // Compression method:
  483. memcpy((*header)->compress_method, &RAW_DATA(header, 2), 5);
  484. (*header)->compress_method[5] = '\0';
  485. // File lengths:
  486. (*header)->compressed_length = lha_decode_uint32(&RAW_DATA(header, 7));
  487. (*header)->length = lha_decode_uint32(&RAW_DATA(header, 11));
  488. // Timestamp:
  489. (*header)->timestamp = decode_ftime(&RAW_DATA(header, 15));
  490. // Read path. Check path length field - is the header long enough
  491. // to hold this full path?
  492. path_len = RAW_DATA(header, 21);
  493. if (min_len + path_len > header_len) {
  494. return 0;
  495. }
  496. // OS type?
  497. if ((*header)->header_level == 0) {
  498. (*header)->os_type = LHA_OS_TYPE_UNKNOWN;
  499. } else {
  500. (*header)->os_type = RAW_DATA(header, 24 + path_len);
  501. }
  502. // Read filename field:
  503. if (!process_level0_path(*header, &RAW_DATA(header, 22), path_len)) {
  504. return 0;
  505. }
  506. // CRC field.
  507. (*header)->crc = lha_decode_uint16(&RAW_DATA(header, 22 + path_len));
  508. // Level 0 headers can contain extended data through different schemes
  509. // to the extended header system used in level 1+.
  510. if ((*header)->header_level == 0
  511. && header_len > LEVEL_0_MIN_HEADER_LEN + path_len) {
  512. process_level0_extended_area(*header,
  513. &RAW_DATA(header, LEVEL_0_MIN_HEADER_LEN + 2 + path_len),
  514. header_len - LEVEL_0_MIN_HEADER_LEN - path_len);
  515. }
  516. return 1;
  517. }
  518. static int decode_level1_header(LHAFileHeader **header, LHAInputStream *stream)
  519. {
  520. unsigned int ext_header_start;
  521. if (!decode_level0_header(header, stream)) {
  522. return 0;
  523. }
  524. // Level 1 headers can have extended headers, so parse them.
  525. ext_header_start = RAW_DATA_LEN(header) - 2;
  526. if (!read_l1_extended_headers(header, stream)
  527. || !decode_extended_headers(header, ext_header_start)) {
  528. return 0;
  529. }
  530. return 1;
  531. }
  532. static int decode_level2_header(LHAFileHeader **header, LHAInputStream *stream)
  533. {
  534. unsigned int header_len;
  535. header_len = lha_decode_uint16(&RAW_DATA(header, 0));
  536. if (header_len < LEVEL_2_HEADER_LEN) {
  537. return 0;
  538. }
  539. // Read the full header.
  540. if (!extend_raw_data(header, stream,
  541. header_len - RAW_DATA_LEN(header))) {
  542. return 0;
  543. }
  544. // Compression method:
  545. memcpy((*header)->compress_method, &RAW_DATA(header, 2), 5);
  546. (*header)->compress_method[5] = '\0';
  547. // File lengths:
  548. (*header)->compressed_length = lha_decode_uint32(&RAW_DATA(header, 7));
  549. (*header)->length = lha_decode_uint32(&RAW_DATA(header, 11));
  550. // Timestamp. Unlike level 0/1, this is a Unix-style timestamp.
  551. (*header)->timestamp = lha_decode_uint32(&RAW_DATA(header, 15));
  552. // CRC.
  553. (*header)->crc = lha_decode_uint16(&RAW_DATA(header, 21));
  554. // OS type:
  555. (*header)->os_type = RAW_DATA(header, 23);
  556. // LHA for OS-9/68k generates broken level 2 archives: the header
  557. // length field is the length of the remainder of the header, not
  558. // the complete header length. As a result it's two bytes too
  559. // short. We can use the OS type field to detect these archives
  560. // and compensate.
  561. if ((*header)->os_type == LHA_OS_TYPE_OS9_68K) {
  562. if (!extend_raw_data(header, stream, 2)) {
  563. return 0;
  564. }
  565. }
  566. if (!decode_extended_headers(header, 24)) {
  567. return 0;
  568. }
  569. return 1;
  570. }
  571. static int decode_level3_header(LHAFileHeader **header, LHAInputStream *stream)
  572. {
  573. unsigned int header_len;
  574. // The first field at the start of a level 3 header is supposed to
  575. // indicate word size, with the idea being that the header format
  576. // can be extended beyond 32-bit words in the future. In practise,
  577. // nothing supports anything other than 32-bit (4 bytes), and neither
  578. // do we.
  579. if (lha_decode_uint16(&RAW_DATA(header, 0)) != 4) {
  580. return 0;
  581. }
  582. // Read the full header.
  583. if (!extend_raw_data(header, stream,
  584. LEVEL_3_HEADER_LEN - RAW_DATA_LEN(header))) {
  585. return 0;
  586. }
  587. // Read the header length field (including extended headers), and
  588. // extend to this full length. Because this is a 32-bit value,
  589. // we must place a sensible limit on the amount of data that will
  590. // be read, to avoid possibly allocating gigabytes of memory.
  591. header_len = lha_decode_uint32(&RAW_DATA(header, 24));
  592. if (header_len > LEVEL_3_MAX_HEADER_LEN
  593. || header_len < RAW_DATA_LEN(header)) {
  594. return 0;
  595. }
  596. if (!extend_raw_data(header, stream,
  597. header_len - RAW_DATA_LEN(header))) {
  598. return 0;
  599. }
  600. // Compression method:
  601. memcpy((*header)->compress_method, &RAW_DATA(header, 2), 5);
  602. (*header)->compress_method[5] = '\0';
  603. // File lengths:
  604. (*header)->compressed_length = lha_decode_uint32(&RAW_DATA(header, 7));
  605. (*header)->length = lha_decode_uint32(&RAW_DATA(header, 11));
  606. // Unix-style timestamp.
  607. (*header)->timestamp = lha_decode_uint32(&RAW_DATA(header, 15));
  608. // CRC.
  609. (*header)->crc = lha_decode_uint16(&RAW_DATA(header, 21));
  610. // OS type:
  611. (*header)->os_type = RAW_DATA(header, 23);
  612. if (!decode_extended_headers(header, 28)) {
  613. return 0;
  614. }
  615. return 1;
  616. }
  617. // "Collapse" a path down, by removing all instances of "." and ".."
  618. // paths. This is to protect against malicious archives that might include
  619. // ".." in a path to break out of the extract directory.
  620. static void collapse_path(char *filename)
  621. {
  622. unsigned int currpath_len;
  623. char *currpath;
  624. char *r, *w;
  625. // If the path starts with a /, it is an absolute path; skip over
  626. // that first character and don't remove it.
  627. if (filename[0] == '/') {
  628. ++filename;
  629. }
  630. // Step through each character, copying it from 'r' to 'w'. It
  631. // is always the case that w <= r, and the final string will
  632. // be equal in length or shorter than the original.
  633. currpath = filename;
  634. w = filename;
  635. for (r = filename; *r != '\0'; ++r) {
  636. *w++ = *r;
  637. // Each time a new path separator is found, examine the
  638. // path that was just written.
  639. if (*r == '/') {
  640. currpath_len = w - currpath - 1;
  641. // Empty path (//) or current directory (.)?
  642. if (currpath_len == 0
  643. || (currpath_len == 1 && currpath[0] == '.')) {
  644. w = currpath;
  645. // Parent directory (..)?
  646. } else if (currpath_len == 2
  647. && currpath[0] == '.' && currpath[1] == '.') {
  648. // Walk back up by one directory. Don't go
  649. // past the start of the string.
  650. if (currpath == filename) {
  651. w = filename;
  652. } else {
  653. w = currpath - 1;
  654. while (w > filename) {
  655. if (*(w - 1) == '/') {
  656. break;
  657. }
  658. --w;
  659. }
  660. currpath = w;
  661. }
  662. // Save for next time we start a new path.
  663. } else {
  664. currpath = w;
  665. }
  666. }
  667. }
  668. *w = '\0';
  669. }
  670. LHAFileHeader *lha_file_header_read(LHAInputStream *stream)
  671. {
  672. LHAFileHeader *header;
  673. int success;
  674. // We cannot decode the file header until we identify the
  675. // header level (as different header level formats are
  676. // decoded in different ways. The header level field is
  677. // located at byte offset 20 within the header, so we
  678. // must read the first 21 bytes to read it (actually this
  679. // reads one byte more, so that we get the filename length
  680. // byte for level 1 headers as well).
  681. // Allocate result structure.
  682. header = calloc(1, sizeof(LHAFileHeader) + COMMON_HEADER_LEN);
  683. if (header == NULL) {
  684. return NULL;
  685. }
  686. memset(header, 0, sizeof(LHAFileHeader));
  687. header->_refcount = 1;
  688. // Read first chunk of header.
  689. header->raw_data = (uint8_t *) (header + 1);
  690. header->raw_data_len = COMMON_HEADER_LEN;
  691. if (!lha_input_stream_read(stream, header->raw_data,
  692. header->raw_data_len)) {
  693. goto fail;
  694. }
  695. // Identify header level, and decode header depending on
  696. // the value encountered.
  697. header->header_level = header->raw_data[20];
  698. switch (header->header_level) {
  699. case 0:
  700. success = decode_level0_header(&header, stream);
  701. break;
  702. case 1:
  703. success = decode_level1_header(&header, stream);
  704. break;
  705. case 2:
  706. success = decode_level2_header(&header, stream);
  707. break;
  708. case 3:
  709. success = decode_level3_header(&header, stream);
  710. break;
  711. default:
  712. success = 0;
  713. break;
  714. }
  715. if (!success) {
  716. goto fail;
  717. }
  718. // Sanity check that we got some headers, at least.
  719. // Directory entries must have a path, and files must have a
  720. // filename. Symlinks are stored using the same compression method
  721. // field string (-lhd-) as directories.
  722. if (strcmp(header->compress_method, LHA_COMPRESS_TYPE_DIR) != 0) {
  723. if (header->filename == NULL) {
  724. goto fail;
  725. }
  726. } else if (!strcmp(header->compress_method, LHA_COMPRESS_TYPE_DIR)
  727. && LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)
  728. && (header->path != NULL || header->filename != NULL)
  729. && (header->unix_perms & 0170000) == 0120000) {
  730. if (!parse_symlink(header)) {
  731. goto fail;
  732. }
  733. } else {
  734. if (header->path == NULL) {
  735. goto fail;
  736. }
  737. }
  738. // Is the path an all-caps filename? If so, it is a DOS path that
  739. // should be translated to lower case.
  740. if (header->os_type == LHA_OS_TYPE_UNKNOWN
  741. || header->os_type == LHA_OS_TYPE_MSDOS
  742. || header->os_type == LHA_OS_TYPE_ATARI
  743. || header->os_type == LHA_OS_TYPE_OS2) {
  744. fix_msdos_allcaps(header);
  745. }
  746. // Collapse special directory paths to ensure the path is clean.
  747. if (header->path != NULL) {
  748. collapse_path(header->path);
  749. }
  750. // Is this header generated by OS-9/68k LHA? If so, any Unix
  751. // permissions are actually OS-9 permissions.
  752. if (header->os_type == LHA_OS_TYPE_OS9_68K
  753. && LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)) {
  754. header->os9_perms = header->unix_perms;
  755. header->extra_flags |= LHA_FILE_OS9_PERMS;
  756. }
  757. // If OS-9 permissions were read, translate into Unix permissions.
  758. if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_OS9_PERMS)) {
  759. os9_to_unix_permissions(header);
  760. }
  761. // Was the "common" extended header read, which contains a CRC of
  762. // the full header? If so, perform a CRC check now.
  763. if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_COMMON_CRC)
  764. && !check_common_crc(header)) {
  765. goto fail;
  766. }
  767. return header;
  768. fail:
  769. lha_file_header_free(header);
  770. return NULL;
  771. }
  772. void lha_file_header_free(LHAFileHeader *header)
  773. {
  774. // Sanity check:
  775. if (header->_refcount == 0) {
  776. return;
  777. }
  778. // Count down references and only free when all have been removed.
  779. --header->_refcount;
  780. if (header->_refcount > 0) {
  781. return;
  782. }
  783. free(header->filename);
  784. free(header->path);
  785. free(header->symlink_target);
  786. free(header->unix_username);
  787. free(header->unix_group);
  788. free(header);
  789. }
  790. void lha_file_header_add_ref(LHAFileHeader *header)
  791. {
  792. ++header->_refcount;
  793. }