123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451 |
- /*
- Copyright (c) 2012, Simon Howard
- Permission to use, copy, modify, and/or distribute this software
- for any purpose with or without fee is hereby granted, provided
- that the above copyright notice and this permission notice appear
- in all copies.
- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
- AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- // Code for handling MacBinary headers.
- //
- // Classic Mac OS attaches more metadata to files than other operating
- // systems. For example, each file has a file type that is used to
- // determine the application to open it with. Files can also have both
- // a data fork and a resource fork. Because of this design, when
- // transferring a file between computers (eg. over a network), all of
- // the data associated with the file must be bundled up together to
- // preserve the file.
- //
- // MacLHA uses the MacBinary container format to do this. Within the
- // compressed data, the file contents are preceded by a 128 byte
- // header that contains the metadata. The data from the data fork can
- // also be followed by the data from the resource fork.
- //
- // Because this is incompatible with .lzh archives from other operating
- // systems, MacLHA has two menu items to create new archives - one
- // creates a "Mac" archive, while the other creates a "non-Mac"
- // (standard) archive that contains just the file contents. This quote
- // from the documentation (MacLHAE.doc) describes what is stored when
- // the latter option is used:
- //
- // > If a file has only either Data Fork or Resource Fork, it's stored
- // > into archives. In case a file has both Data Fork and Resource Fork,
- // > only the Data Fork is stored.
- //
- // --
- //
- // Mac OS X has essentially abandoned this practise of using filesystem
- // metadata and other systems do not use it, either. It is therefore
- // sensible and desirable to strip off the MacBinary header (if present)
- // and extract just the normal file contents. It makes sense to use the
- // same strategy quoted above.
- //
- // The possible presence of a MacBinary header can be inferred using the
- // OS type field from the LHA header - a value of 'm' indicates that it
- // was generated by MacLHA. However, there are some issues with this:
- //
- // 1. This type is set regardless of whether a MacBinary header is
- // attached or not. There is no other field to indicate the
- // difference, and MacBinary headers do not have a magic number, so
- // the presence of one must be determined heuristically.
- // Realistically, though, this can be done without too much
- // difficulty, by strictly checking all the fields in the MacBinary
- // header. If an invalid header is seen, it can be rejected and
- // assumed to be a normal file.
- //
- // 2. MacBinary is a standard container format for transferring files
- // between Macs and not used just by MacLHA. Therefore, it is
- // plausible that a .lzh archive might "deliberately" contain a
- // MacBinary file, in which case it would be a mistake to strip
- // off the header.
- //
- // This is an unlikely but still a plausible scenario. It can be
- // mitigated by comparing the MacBinary header values against the
- // values from the .lzh header. A header added by MacLHA will have
- // a filename that matches the .lzh header's filename (MacBinary
- // files usually have a .bin extension appended, so the filenames
- // would not match. Also, the modification timestamp should match
- // the timestamp from the .lzh header.
- //
- // 3. Comparing the MacBinary header modification timestamp with the
- // .lzh header modification timestamp is complicated by the fact
- // that the former is stored as a Macintosh 1904-based timestamp
- // in the local timezone, while the latter is stored as a Unix
- // timestamp in UTC time. Although converting timestamp formats
- // is trivial, the two do not compare exactly due to the timezone
- // offset.
- //
- // --
- //
- // Summary of MacBinary header fields and policy for each
- // (Z = check zero, C = check value, I = ignore):
- //
- // 0x00 - Z - "Old version number", must be zero for compatibility
- // 0x01 - C - Filename length, must match .lzh header filename.
- // 0x02-0x40 - C - Filename, must match .lzh header filename.
- // Z - Remainder following filename contents must be zero
- // 0x41-0x44 - I - File type
- // 0x45-0x48 - I - File creator
- // 0x49 - I - Finder flags
- // 0x4a - Z - "Must be zero for compatibility"
- // 0x4b-0x4c - I - Icon vertical position
- // 0x4d-0x4e - I - Icon horizonal position
- // 0x4f-0x50 - I - Window ID
- // 0x51 - I - "Protected" flag
- // 0x52 - Z - "Must be zero for compatibility"
- // 0x53-0x56 - C - Data fork length }- added together, equal uncompressed
- // 0x57-0x5a - C - Resource fork length }- data length rounded up to 256
- // 0x5b-0x5e - I - File creation date
- // 0x5f-0x62 - C - File modification date - should match .lzh header
- // 0x63-0x64 - Z - Finder "Get Info" comment length - unused by MacLHA
- // 0x65-0x7f - Z - MacBinary II data - unused by MacLHA
- #include <stdlib.h>
- #include <string.h>
- #include "lha_decoder.h"
- #include "lha_endian.h"
- #include "lha_file_header.h"
- #define OUTPUT_BUFFER_SIZE 4096 /* bytes */
- // Classic Mac OS represents time in seconds since 1904, instead of
- // Unix time's 1970 epoch. This is the difference between the two.
- #define MAC_TIME_OFFSET 2082844800 /* seconds */
- // Size of the MacBinary header.
- #define MBHDR_SIZE 128 /* bytes */
- // Offsets of fields in MacBinary header (and their sizes):
- #define MBHDR_OFF_VERSION 0x00
- #define MBHDR_OFF_FILENAME_LEN 0x01
- #define MBHDR_OFF_FILENAME 0x02
- #define MBHDR_LEN_FILENAME 63
- #define MBHDR_OFF_ZERO_COMPAT1 0x4a
- #define MBHDR_OFF_ZERO_COMPAT2 0x52
- #define MBHDR_OFF_DATA_FORK_LEN 0x53
- #define MBHDR_OFF_RES_FORK_LEN 0x57
- #define MBHDR_OFF_FILE_MOD_DATE 0x5f
- #define MBHDR_OFF_COMMENT_LEN 0x63
- #define MBHDR_OFF_MACBINARY2_DATA 0x65
- #define MBHDR_LEN_MACBINARY2_DATA (MBHDR_SIZE - MBHDR_OFF_MACBINARY2_DATA)
- // Check that the given block of data contains only zero bytes.
- static int block_is_zero(uint8_t *data, size_t data_len)
- {
- unsigned int i;
- for (i = 0; i < data_len; ++i) {
- if (data[i] != 0) {
- return 0;
- }
- }
- return 1;
- }
- // Check that the specified modification time matches the modification
- // time from the file header.
- static int check_modification_time(unsigned int mod_time,
- LHAFileHeader *header)
- {
- unsigned int time_diff;
- // In an ideal world, mod_time should match header->timestamp
- // exactly. However, there's an additional complication
- // because mod_time is local time, not UTC time, so there is
- // a timezone difference.
- if (header->timestamp > mod_time) {
- time_diff = header->timestamp - mod_time;
- } else {
- time_diff = mod_time - header->timestamp;
- }
- // The maximum UTC timezone difference is UTC+14, used in
- // New Zealand and some other islands in the Pacific.
- if (time_diff > 14 * 60 * 60) {
- return 0;
- }
- // If the world was simpler, all time zones would be exact
- // hour offsets, but in fact, some regions use half or
- // quarter hour offsets. So the difference should be a
- // multiple of 15 minutes. Actually, the control panel in
- // Mac OS allows any minute offset to be configured, but if
- // people are crazy enough to do that, they deserve the
- // brokenness they get as a result. It's preferable to use
- // a 15 minute check rather than a 1 minute check, because
- // this allows MacLHA-added MacBinary headers to be
- // distinguished from archived MacBinary files more reliably.
- //return (time_diff % (15 * 60)) == 0;
- // It turns out the assumption above doesn't hold, and MacLHA
- // does generate archives where the timestamps don't always
- // exactly match. Oh well.
- return 1;
- }
- // Given the specified data buffer, check whether it has a MacBinary
- // header with contents that match the specified .lzh header.
- static int is_macbinary_header(uint8_t *data, LHAFileHeader *header)
- {
- unsigned int filename_len;
- unsigned int data_fork_len, res_fork_len, expected_len;
- unsigned int mod_time;
- // Check fields in the header that should be zero.
- if (data[MBHDR_OFF_VERSION] != 0
- || data[MBHDR_OFF_ZERO_COMPAT1] != 0
- || data[MBHDR_OFF_ZERO_COMPAT2] != 0
- || !block_is_zero(&data[MBHDR_OFF_COMMENT_LEN], 2)
- || !block_is_zero(&data[MBHDR_OFF_MACBINARY2_DATA],
- MBHDR_LEN_MACBINARY2_DATA)) {
- return 0;
- }
- // Check that the filename matches the filename from the
- // lzh header.
- filename_len = data[MBHDR_OFF_FILENAME_LEN];
- if (filename_len > MBHDR_LEN_FILENAME
- || filename_len != strlen(header->filename)
- || memcmp(&data[MBHDR_OFF_FILENAME],
- header->filename, filename_len) != 0) {
- return 0;
- }
- // Data following the filename must be zero as well.
- if (!block_is_zero(data + MBHDR_OFF_FILENAME + filename_len,
- MBHDR_LEN_FILENAME - filename_len)) {
- return 0;
- }
- // Decode data fork / resource fork lengths. Their combined
- // lengths, plus the MacBinary header, should match the
- // compressed data length (rounded up to the nearest 128).
- data_fork_len = lha_decode_be_uint32(&data[MBHDR_OFF_DATA_FORK_LEN]);
- res_fork_len = lha_decode_be_uint32(&data[MBHDR_OFF_RES_FORK_LEN]);
- expected_len = (data_fork_len + res_fork_len + MBHDR_SIZE);
- if (header->length != ((expected_len + 0x7f) & ~0x7f)) {
- return 0;
- }
- // Check modification time.
- mod_time = lha_decode_be_uint32(&data[MBHDR_OFF_FILE_MOD_DATE]);
- if (mod_time < MAC_TIME_OFFSET
- || !check_modification_time(mod_time - MAC_TIME_OFFSET, header)) {
- return 0;
- }
- return 1;
- }
- //
- // MacBinary "decoder". This reuses the LHADecoder framework to provide
- // a "pass-through" decoder that detects and strips the MacBinary header.
- //
- typedef struct {
- // When the decoder is initialized, the first 128 bytes of
- // data are read into this buffer and analysed. If it is
- // not a MacBinary header, the data must be kept so that it
- // can be returned in the first call to .read().
- // mb_header_bytes contains the number of bytes still to read.
- uint8_t mb_header[MBHDR_SIZE];
- size_t mb_header_bytes;
- // The "inner" decoder used to read the compressed data.
- LHADecoder *decoder;
- // Number of bytes still to read before decode should be
- // terminated.
- size_t stream_remaining;
- } MacBinaryDecoder;
- // Structure used when initializing a MacBinaryDecoder.
- typedef struct {
- LHADecoder *decoder;
- LHAFileHeader *header;
- } MacBinaryDecoderClosure;
- static int read_macbinary_header(MacBinaryDecoder *decoder,
- LHAFileHeader *header)
- {
- unsigned int data_fork_len, res_fork_len;
- size_t n, bytes;
- bytes = 0;
- while (bytes < MBHDR_SIZE) {
- n = lha_decoder_read(decoder->decoder,
- decoder->mb_header + bytes,
- MBHDR_SIZE - bytes);
- // Unexpected EOF?
- if (n == 0) {
- return 0;
- }
- bytes += n;
- }
- // Check if the data that was read corresponds to a MacBinary
- // header that matches the .lzh header. If not, just decode it
- // as a normal stream.
- if (!is_macbinary_header(decoder->mb_header, header)) {
- decoder->mb_header_bytes = bytes;
- return 1;
- }
- // We have a MacBinary header, so skip over it. Decide how
- // long the data stream is (see policy in comment at start
- // of file).
- decoder->mb_header_bytes = 0;
- data_fork_len = lha_decode_be_uint32(
- &decoder->mb_header[MBHDR_OFF_DATA_FORK_LEN]);
- res_fork_len = lha_decode_be_uint32(
- &decoder->mb_header[MBHDR_OFF_RES_FORK_LEN]);
- if (data_fork_len > 0) {
- decoder->stream_remaining = data_fork_len;
- } else {
- decoder->stream_remaining = res_fork_len;
- }
- return 1;
- }
- static int macbinary_decoder_init(void *_decoder,
- LHADecoderCallback callback,
- void *_closure)
- {
- MacBinaryDecoder *decoder = _decoder;
- MacBinaryDecoderClosure *closure = _closure;
- decoder->decoder = closure->decoder;
- decoder->mb_header_bytes = 0;
- decoder->stream_remaining = closure->header->length;
- if (closure->header->length >= MBHDR_SIZE
- && !read_macbinary_header(decoder, closure->header)) {
- return 0;
- }
- return 1;
- }
- static void decode_to_end(LHADecoder *decoder)
- {
- uint8_t buf[128];
- size_t n;
- do {
- n = lha_decoder_read(decoder, buf, sizeof(buf));
- } while (n > 0);
- }
- static size_t macbinary_decoder_read(void *_decoder, uint8_t *buf)
- {
- MacBinaryDecoder *decoder = _decoder;
- size_t result;
- size_t to_read;
- size_t n;
- result = 0;
- // If there is data from the mb_header buffer waiting to be
- // read, add it first.
- if (decoder->mb_header_bytes > 0) {
- memcpy(buf, decoder->mb_header, decoder->mb_header_bytes);
- result = decoder->mb_header_bytes;
- decoder->mb_header_bytes = 0;
- }
- // Read further data, if there is some in the stream still to read.
- to_read = OUTPUT_BUFFER_SIZE - result;
- if (to_read > decoder->stream_remaining) {
- to_read = decoder->stream_remaining;
- }
- n = lha_decoder_read(decoder->decoder, buf + result, to_read);
- decoder->stream_remaining -= n;
- result += n;
- // Once the end of the stream is reached, there may still be
- // data from the inner decoder to decompress. When this happens,
- // run the decoder until the end.
- if (decoder->stream_remaining == 0) {
- decode_to_end(decoder->decoder);
- }
- return result;
- }
- static LHADecoderType macbinary_decoder_type = {
- macbinary_decoder_init,
- NULL,
- macbinary_decoder_read,
- sizeof(MacBinaryDecoder),
- OUTPUT_BUFFER_SIZE,
- 0,
- };
- LHADecoder *lha_macbinary_passthrough(LHADecoder *decoder,
- LHAFileHeader *header)
- {
- MacBinaryDecoderClosure closure;
- LHADecoder *result;
- closure.decoder = decoder;
- closure.header = header;
- result = lha_decoder_new(&macbinary_decoder_type, NULL,
- &closure, header->length);
- return result;
- }
|