/* Copyright (c) 2011, 2012, Simon Howard Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include "lha_arch.h" #include "lha_decoder.h" #include "lha_basic_reader.h" #include "public/lha_reader.h" #include "macbinary.h" typedef enum { // Initial state at start of stream: CURR_FILE_START, // Current file is a "normal" file (or directory) read from // the input stream. CURR_FILE_NORMAL, // Current file is a directory that has been popped from the // directory stack. CURR_FILE_FAKE_DIR, // Current file is a deferred symbolic link that has been left // to the end of the input stream to be created. CURR_FILE_DEFERRED_SYMLINK, // End of input stream has been reached. CURR_FILE_EOF, } CurrFileType; struct _LHAReader { LHABasicReader *reader; // The current file that we are processing (last file returned // by lha_reader_next_file). LHAFileHeader *curr_file; CurrFileType curr_file_type; // Pointer to decoder being used to decompress the current file, // or NULL if we have not yet started decompression. LHADecoder *decoder; // Pointer to "inner" decoder. Most of the time, // decoder == inner_decoder, but when decoding an archive // generated by MacLHA, inner_decoder points to the actual // decompressor. LHADecoder *inner_decoder; // Policy used to extract directories. LHAReaderDirPolicy dir_policy; // Directories that have been created by lha_reader_extract but // have not yet had their metadata set. This is a linked list // using the _next field in LHAFileHeader. // In the case of LHA_READER_DIR_END_OF_DIR this is a stack; // in the case of LHA_READER_DIR_END_OF_FILE it is a list. LHAFileHeader *dir_stack; // Symbolic links containing absolute paths or '..' are not // created immediately - instead, "placeholder" files are created // in their place, and the symbolic links created at the end // of extraction. LHAFileHeader *deferred_symlinks; }; /** * Free the current decoder structure. * * If the reader has an allocated decoder being used to decompress the * current file, the decoder is freed and the decoder pointer reset * to NULL. * * @param reader Pointer to the LHA reader structure. */ static void close_decoder(LHAReader *reader) { if (reader->decoder != NULL) { if (reader->inner_decoder == reader->decoder) { reader->inner_decoder = NULL; } lha_decoder_free(reader->decoder); reader->decoder = NULL; } if (reader->inner_decoder != NULL) { lha_decoder_free(reader->inner_decoder); reader->inner_decoder = NULL; } } /** * Create the decoder structure to decompress the data from the * current file. * * @param reader Pointer to the LHA reader structure. * @param callback Callback function to invoke to track progress. * @param callback_data Extra pointer to pass to the callback function. * @return Non-zero for success, zero for failure. */ static int open_decoder(LHAReader *reader, LHADecoderProgressCallback callback, void *callback_data) { // Can only read from a normal file. if (reader->curr_file_type != CURR_FILE_NORMAL) { return 0; } reader->inner_decoder = lha_basic_reader_decode(reader->reader); if (reader->inner_decoder == NULL) { return 0; } // Set progress callback for decoder. if (callback != NULL) { lha_decoder_monitor(reader->inner_decoder, callback, callback_data); } // Some archives generated by MacLHA have a MacBinary header // attached to the start, which contains MacOS-specific // metadata about the compressed file. These are identified // and stripped off, using a "passthrough" decoder. if (reader->curr_file->os_type == LHA_OS_TYPE_MACOS) { reader->decoder = lha_macbinary_passthrough( reader->inner_decoder, reader->curr_file); if (reader->decoder == NULL) { return 0; } } else { reader->decoder = reader->inner_decoder; } return 1; } LHAReader *lha_reader_new(LHAInputStream *stream) { LHABasicReader *basic_reader; LHAReader *reader; reader = calloc(1, sizeof(LHAReader)); if (reader == NULL) { return NULL; } basic_reader = lha_basic_reader_new(stream); if (basic_reader == NULL) { free(reader); return NULL; } reader->reader = basic_reader; reader->curr_file = NULL; reader->curr_file_type = CURR_FILE_START; reader->decoder = NULL; reader->inner_decoder = NULL; reader->dir_stack = NULL; reader->dir_policy = LHA_READER_DIR_END_OF_DIR; reader->deferred_symlinks = NULL; return reader; } void lha_reader_free(LHAReader *reader) { LHAFileHeader *header; // Shut down the current decoder, if there is one. close_decoder(reader); // Free any file headers in the stack. while (reader->dir_stack != NULL) { header = reader->dir_stack; reader->dir_stack = header->_next; lha_file_header_free(header); } lha_basic_reader_free(reader->reader); free(reader); } void lha_reader_set_dir_policy(LHAReader *reader, LHAReaderDirPolicy policy) { reader->dir_policy = policy; } /** * Check if the directory at the top of the stack should be popped. * * Extracting a directory is a two stage process; after the directory * is created, it is pushed onto the directory stack. Later the * directory must be popped off the stack and its metadata applied. * * @param reader Pointer to the LHA reader structure. * @return Non-zero if there is a directory at the top of * the stack that should be popped. */ static int end_of_top_dir(LHAReader *reader) { LHAFileHeader *input; // No directories to pop? if (reader->dir_stack == NULL) { return 0; } // Once the end of the input stream is reached, all that is // left to do is pop off the remaining directories. input = lha_basic_reader_curr_file(reader->reader); if (input == NULL) { return 1; } switch (reader->dir_policy) { // Shouldn't happen? case LHA_READER_DIR_PLAIN: default: return 1; // Don't process directories until we reach the end of // the input stream. case LHA_READER_DIR_END_OF_FILE: return 0; // Once we reach a file from the input that is not within // the directory at the top of the stack, we have reached // the end of that directory, so we can pop it off. case LHA_READER_DIR_END_OF_DIR: return input->path == NULL || strncmp(input->path, reader->dir_stack->path, strlen(reader->dir_stack->path)) != 0; } } // Read the next file from the input stream. LHAFileHeader *lha_reader_next_file(LHAReader *reader) { // Free the current decoder if there is one. close_decoder(reader); // No point continuing once the end of the input stream has // been reached. if (reader->curr_file_type == CURR_FILE_EOF) { return NULL; } // Advance to the next file from the input stream? // Don't advance until we've done the fake directories first. if (reader->curr_file_type == CURR_FILE_START || reader->curr_file_type == CURR_FILE_NORMAL) { lha_basic_reader_next_file(reader->reader); } // If the last file we returned was a 'fake' directory, we must // now unreference it. if (reader->curr_file_type == CURR_FILE_FAKE_DIR) { lha_file_header_free(reader->curr_file); } // Pop off all appropriate directories from the stack first. if (end_of_top_dir(reader)) { reader->curr_file = reader->dir_stack; reader->dir_stack = reader->dir_stack->_next; reader->curr_file_type = CURR_FILE_FAKE_DIR; } else { reader->curr_file = lha_basic_reader_curr_file(reader->reader); reader->curr_file_type = CURR_FILE_NORMAL; } // Once we reach the end of the file, there may be deferred // symbolic links still to extract, so process those before // giving up and declaring end of file. if (reader->curr_file == NULL) { if (reader->deferred_symlinks != NULL) { reader->curr_file = reader->deferred_symlinks; reader->curr_file_type = CURR_FILE_DEFERRED_SYMLINK; reader->deferred_symlinks = reader->deferred_symlinks->_next; reader->curr_file->_next = NULL; } else { reader->curr_file_type = CURR_FILE_EOF; } } return reader->curr_file; } size_t lha_reader_read(LHAReader *reader, void *buf, size_t buf_len) { // The first time that we try to read the current file, we // must create the decoder to decompress it. if (reader->decoder == NULL) { if (!open_decoder(reader, NULL, NULL)) { return 0; } } // Read from decoder and return the result. return lha_decoder_read(reader->decoder, buf, buf_len); } /** * Decompress the current file. * * Assumes that @param open_decoder has already been called to * start the decode process. * * @param reader Pointer to the LHA reader structure. * @param output FILE handle to write decompressed data, or NULL * if the decompressed data should be discarded. * @return Non-zero if the file decompressed successfully. */ static int do_decode(LHAReader *reader, FILE *output) { uint8_t buf[64]; unsigned int bytes; // Decompress the current file. do { bytes = lha_reader_read(reader, buf, sizeof(buf)); if (output != NULL) { if (fwrite(buf, 1, bytes, output) < bytes) { return 0; } } } while (bytes > 0); // Decoder stores output position and performs running CRC. // At the end of the stream these should match the header values. return lha_decoder_get_length(reader->inner_decoder) == reader->curr_file->length && lha_decoder_get_crc(reader->inner_decoder) == reader->curr_file->crc; } int lha_reader_check(LHAReader *reader, LHADecoderProgressCallback callback, void *callback_data) { if (reader->curr_file_type != CURR_FILE_NORMAL) { return 0; } // CRC checking of directories is not necessary. if (!strcmp(reader->curr_file->compress_method, LHA_COMPRESS_TYPE_DIR)) { return 1; } // Decode file. return open_decoder(reader, callback, callback_data) && do_decode(reader, NULL); } /** * Open an output stream into which to decompress the current file. * * @param reader Pointer to the LHA reader structure. * @param filename Name of the file to open. * @return FILE handle of the opened file, or NULL in * case of failure. */ static FILE *open_output_file(LHAReader *reader, char *filename) { int unix_uid = -1, unix_gid = -1, unix_perms = -1; if (LHA_FILE_HAVE_EXTRA(reader->curr_file, LHA_FILE_UNIX_UID_GID)) { unix_uid = reader->curr_file->unix_uid; unix_gid = reader->curr_file->unix_gid; } if (LHA_FILE_HAVE_EXTRA(reader->curr_file, LHA_FILE_UNIX_PERMS)) { unix_perms = reader->curr_file->unix_perms; } return lha_arch_fopen(filename, unix_uid, unix_gid, unix_perms); } /** * Set file timestamps for the specified file. * * If possible, the more accurate Windows timestamp values are used; * otherwise normal Unix timestamps are used. * * @param path Path to the file or directory to set. * @param header Pointer to file header structure containing the * timestamps to set. * @return Non-zero if the timestamps were set successfully, * or zero for failure. */ static int set_timestamps_from_header(char *path, LHAFileHeader *header) { #if LHA_ARCH == LHA_ARCH_WINDOWS if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_WINDOWS_TIMESTAMPS)) { return lha_arch_set_windows_timestamps( path, header->win_creation_time, header->win_modification_time, header->win_access_time ); } else // .... #endif if (header->timestamp != 0) { return lha_arch_utime(path, header->timestamp); } else { return 1; } } /** * Set directory metadata. * * This is the second stage of directory extraction. Metadata (timestamps * and permissions) should be set on a dictory after the contents of * the directory has been extracted. * * @param header Pointer to file header structure containing the * metadata to set. * @param path Path to the directory on which to set the metadata. * @return Non-zero for success, or zero for failure. */ static int set_directory_metadata(LHAFileHeader *header, char *path) { // Set timestamp: set_timestamps_from_header(path, header); // Set owner and group: if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_UID_GID)) { if (!lha_arch_chown(path, header->unix_uid, header->unix_gid)) { // On most Unix systems, only root can change // ownership. But if we can't change ownership, // it isn't a fatal error. Ignore the failure // and continue. // TODO: Implement some kind of alternate handling // here? /* return 0; */ } } // Set permissions on directory: if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)) { if (!lha_arch_chmod(path, header->unix_perms)) { return 0; } } return 1; } /** * "Extract" (create) a directory. * * The current file is assumed to be a directory. This is the first * stage in extracting a directory; after the directory is created, * it is added to the directory stack so that the metadata apply stage * runs later. (If the LHA_READER_DIR_PLAIN policy is used, metadata * is just applied now). * * @param reader Pointer to the LHA reader structure. * @param path Path to the directory, or NULL to use the path from * the file header. * @return Non-zero for success, or zero for failure. */ static int extract_directory(LHAReader *reader, char *path) { LHAFileHeader *header; unsigned int mode; header = reader->curr_file; // If path is not specified, use the path from the file header. if (path == NULL) { path = header->path; } // Create directory. If there are permissions to be set, create // the directory with minimal permissions limited to the running // user. Otherwise use the default umask. if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)) { mode = 0700; } else { mode = 0777; } if (!lha_arch_mkdir(path, mode)) { // If the attempt to create the directory failed, it may // be because the directory already exists. Return success // if this is the case; it isn't really an error. return lha_arch_exists(path) == LHA_FILE_DIRECTORY; } // The directory has been created, but the metadata has not yet // been applied. It depends on the directory policy how this // is handled. If we are using LHA_READER_DIR_PLAIN, set // metadata now. Otherwise, save the directory for later. if (reader->dir_policy == LHA_READER_DIR_PLAIN) { set_directory_metadata(header, path); } else { lha_file_header_add_ref(header); header->_next = reader->dir_stack; reader->dir_stack = header; } return 1; } /** * Extract the current file. * * @param reader Pointer to the LHA reader structure. * @param filename Filename into which to extract the file, or NULL * to use the filename from the file header. * @param callback Callback function to invoke to track progress. * @param callback_data Extra pointer to pass to the callback function. * @return Non-zero if the file was successfully extracted, * or zero for failure. */ static int extract_file(LHAReader *reader, char *filename, LHADecoderProgressCallback callback, void *callback_data) { FILE *fstream; char *tmp_filename = NULL; int result; // Construct filename? if (filename == NULL) { tmp_filename = lha_file_header_full_path(reader->curr_file); if (tmp_filename == NULL) { return 0; } filename = tmp_filename; } // Create decoder. If the file cannot be created, there is no // need to even create an output file. If successful, open the // output file and decode. result = 0; if (open_decoder(reader, callback, callback_data)) { fstream = open_output_file(reader, filename); if (fstream != NULL) { result = do_decode(reader, fstream); fclose(fstream); } } // Set timestamp on file: if (result) { set_timestamps_from_header(filename, reader->curr_file); } free(tmp_filename); return result; } /** * Determine whether a header contains a "dangerous" symbolic link. * * Symbolic links that begin with '/' or contain '..' as a path are * Potentially dangerous and could potentially be used to overwrite * arbitrary files on the filesystem. They therefore need to be * treated specially. * * @param header Pointer to a header structure defining a symbolic * link. * @return Non-zero if the symbolic link is potentially * dangerous. */ static int is_dangerous_symlink(LHAFileHeader *header) { char *path_start; char *p; if (header->symlink_target == NULL) { return 0; } // Absolute path symlinks could be used to point to arbitrary // filesystem locations. if (header->symlink_target[0] == '/') { return 1; } // Check for paths containing '..'. path_start = header->symlink_target; for (p = header->symlink_target; *p != '\0'; ++p) { if (*p == '/') { if ((p - path_start) == 2 && path_start[0] == '.' && path_start[1] == '.') { return 1; } path_start = p + 1; } } // The path might also end with '..' (no terminating /) if ((p - path_start) == 2 && path_start[0] == '.' && path_start[1] == '.') { return 1; } return 0; } /** * Get the length of a path defined by a file header. * * @param header The file header structure. * @return Length of the header in bytes. */ static size_t file_header_path_len(LHAFileHeader *header) { size_t result; result = 0; if (header->path != NULL) { result += strlen(header->path); } if (header->filename != NULL) { result += strlen(header->filename); } return result; } /** * Create a "placeholder" symbolic link. * * When a "dangerous" symbolic link is extracted, instead of creating it * immediately, create a "placeholder" empty file to go in its place, and * place it into the deferred_symlinks list to be created later. * * @param reader Pointer to the LHA reader structure. * @param filename Filename into which to extract the symlink. * @return Non-zero if the symlink was extracted successfully, * or zero for failure. */ static int extract_placeholder_symlink(LHAReader *reader, char *filename) { LHAFileHeader **rover; FILE *f; f = lha_arch_fopen(filename, -1, -1, 0600); if (f == NULL) { return 0; } fclose(f); // Insert this header into the list of deferred symbolic links. // The list must be maintained in order of decreasing path length, // so that one symbolic link cannot depend on another. For example: // // etc -> /etc // etc/passwd -> /malicious_path/passwd rover = &reader->deferred_symlinks; while (*rover != NULL && file_header_path_len(*rover) > file_header_path_len(reader->curr_file)) { rover = &(*rover)->_next; } reader->curr_file->_next = *rover; *rover = reader->curr_file; // Save reference to the header so it won't be freed. lha_file_header_add_ref(reader->curr_file); return 1; } /** * Extract a Unix symbolic link. * * @param reader Pointer to the LHA reader structure. * @param filename Filename into which to extract the symlink, or NULL * to use the filename from the file header. * @return Non-zero if the symlink was extracted successfully, * or zero for failure. */ static int extract_symlink(LHAReader *reader, char *filename) { char *tmp_filename = NULL; int result; // Construct filename? if (filename == NULL) { tmp_filename = lha_file_header_full_path(reader->curr_file); if (tmp_filename == NULL) { return 0; } filename = tmp_filename; } if (reader->curr_file_type == CURR_FILE_NORMAL && is_dangerous_symlink(reader->curr_file)) { return extract_placeholder_symlink(reader, filename); } result = lha_arch_symlink(filename, reader->curr_file->symlink_target); // TODO: Set symlink timestamp. free(tmp_filename); return result; } /** * Extract a "normal" file. * * This just extracts the file header most recently read by the * BasicReader. * * @param reader Pointer to the LHA reader structure. * @param filename Filename into which to extract the file, or NULL * to use the filename from the file header. * @param callback Callback function to invoke to track progress. * @param callback_data Extra pointer to pass to the callback function. * @return Non-zero if the file was successfully extracted, * or zero for failure. */ static int extract_normal(LHAReader *reader, char *filename, LHADecoderProgressCallback callback, void *callback_data) { if (strcmp(reader->curr_file->compress_method, LHA_COMPRESS_TYPE_DIR) != 0) { return extract_file(reader, filename, callback, callback_data); } else if (reader->curr_file->symlink_target != NULL) { return extract_symlink(reader, filename); } else { return extract_directory(reader, filename); } } int lha_reader_extract(LHAReader *reader, char *filename, LHADecoderProgressCallback callback, void *callback_data) { switch (reader->curr_file_type) { case CURR_FILE_NORMAL: return extract_normal(reader, filename, callback, callback_data); case CURR_FILE_FAKE_DIR: if (filename == NULL) { filename = reader->curr_file->path; } set_directory_metadata(reader->curr_file, filename); return 1; case CURR_FILE_DEFERRED_SYMLINK: return extract_symlink(reader, filename); case CURR_FILE_START: case CURR_FILE_EOF: break; } return 0; } int lha_reader_current_is_fake(LHAReader *reader) { return reader->curr_file_type == CURR_FILE_FAKE_DIR || reader->curr_file_type == CURR_FILE_DEFERRED_SYMLINK; }