|
- /*
- Copyright (c) 2011, 2012, Simon Howard
- Permission to use, copy, modify, and/or distribute this software
- for any purpose with or without fee is hereby granted, provided
- that the above copyright notice and this permission notice appear
- in all copies.
- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
- WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
- AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
- CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
- LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
- NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "lha_arch.h"
- #include "lha_decoder.h"
- #include "lha_basic_reader.h"
- #include "public/lha_reader.h"
- #include "macbinary.h"
- typedef enum {
- // Initial state at start of stream:
- CURR_FILE_START,
- // Current file is a "normal" file (or directory) read from
- // the input stream.
- CURR_FILE_NORMAL,
- // Current file is a directory that has been popped from the
- // directory stack.
- CURR_FILE_FAKE_DIR,
- // Current file is a deferred symbolic link that has been left
- // to the end of the input stream to be created.
- CURR_FILE_DEFERRED_SYMLINK,
- // End of input stream has been reached.
- CURR_FILE_EOF,
- } CurrFileType;
- struct _LHAReader {
- LHABasicReader *reader;
- // The current file that we are processing (last file returned
- // by lha_reader_next_file).
- LHAFileHeader *curr_file;
- CurrFileType curr_file_type;
- // Pointer to decoder being used to decompress the current file,
- // or NULL if we have not yet started decompression.
- LHADecoder *decoder;
- // Pointer to "inner" decoder. Most of the time,
- // decoder == inner_decoder, but when decoding an archive
- // generated by MacLHA, inner_decoder points to the actual
- // decompressor.
- LHADecoder *inner_decoder;
- // Policy used to extract directories.
- LHAReaderDirPolicy dir_policy;
- // Directories that have been created by lha_reader_extract but
- // have not yet had their metadata set. This is a linked list
- // using the _next field in LHAFileHeader.
- // In the case of LHA_READER_DIR_END_OF_DIR this is a stack;
- // in the case of LHA_READER_DIR_END_OF_FILE it is a list.
- LHAFileHeader *dir_stack;
- // Symbolic links containing absolute paths or '..' are not
- // created immediately - instead, "placeholder" files are created
- // in their place, and the symbolic links created at the end
- // of extraction.
- LHAFileHeader *deferred_symlinks;
- };
- /**
- * Free the current decoder structure.
- *
- * If the reader has an allocated decoder being used to decompress the
- * current file, the decoder is freed and the decoder pointer reset
- * to NULL.
- *
- * @param reader Pointer to the LHA reader structure.
- */
- static void close_decoder(LHAReader *reader)
- {
- if (reader->decoder != NULL) {
- if (reader->inner_decoder == reader->decoder) {
- reader->inner_decoder = NULL;
- }
- lha_decoder_free(reader->decoder);
- reader->decoder = NULL;
- }
- if (reader->inner_decoder != NULL) {
- lha_decoder_free(reader->inner_decoder);
- reader->inner_decoder = NULL;
- }
- }
- /**
- * Create the decoder structure to decompress the data from the
- * current file.
- *
- * @param reader Pointer to the LHA reader structure.
- * @param callback Callback function to invoke to track progress.
- * @param callback_data Extra pointer to pass to the callback function.
- * @return Non-zero for success, zero for failure.
- */
- static int open_decoder(LHAReader *reader,
- LHADecoderProgressCallback callback,
- void *callback_data)
- {
- // Can only read from a normal file.
- if (reader->curr_file_type != CURR_FILE_NORMAL) {
- return 0;
- }
- reader->inner_decoder = lha_basic_reader_decode(reader->reader);
- if (reader->inner_decoder == NULL) {
- return 0;
- }
- // Set progress callback for decoder.
- if (callback != NULL) {
- lha_decoder_monitor(reader->inner_decoder,
- callback, callback_data);
- }
- // Some archives generated by MacLHA have a MacBinary header
- // attached to the start, which contains MacOS-specific
- // metadata about the compressed file. These are identified
- // and stripped off, using a "passthrough" decoder.
- if (reader->curr_file->os_type == LHA_OS_TYPE_MACOS) {
- reader->decoder = lha_macbinary_passthrough(
- reader->inner_decoder, reader->curr_file);
- if (reader->decoder == NULL) {
- return 0;
- }
- } else {
- reader->decoder = reader->inner_decoder;
- }
- return 1;
- }
- LHAReader *lha_reader_new(LHAInputStream *stream)
- {
- LHABasicReader *basic_reader;
- LHAReader *reader;
- reader = calloc(1, sizeof(LHAReader));
- if (reader == NULL) {
- return NULL;
- }
- basic_reader = lha_basic_reader_new(stream);
- if (basic_reader == NULL) {
- free(reader);
- return NULL;
- }
- reader->reader = basic_reader;
- reader->curr_file = NULL;
- reader->curr_file_type = CURR_FILE_START;
- reader->decoder = NULL;
- reader->inner_decoder = NULL;
- reader->dir_stack = NULL;
- reader->dir_policy = LHA_READER_DIR_END_OF_DIR;
- reader->deferred_symlinks = NULL;
- return reader;
- }
- void lha_reader_free(LHAReader *reader)
- {
- LHAFileHeader *header;
- // Shut down the current decoder, if there is one.
- close_decoder(reader);
- // Free any file headers in the stack.
- while (reader->dir_stack != NULL) {
- header = reader->dir_stack;
- reader->dir_stack = header->_next;
- lha_file_header_free(header);
- }
- lha_basic_reader_free(reader->reader);
- free(reader);
- }
- void lha_reader_set_dir_policy(LHAReader *reader,
- LHAReaderDirPolicy policy)
- {
- reader->dir_policy = policy;
- }
- /**
- * Check if the directory at the top of the stack should be popped.
- *
- * Extracting a directory is a two stage process; after the directory
- * is created, it is pushed onto the directory stack. Later the
- * directory must be popped off the stack and its metadata applied.
- *
- * @param reader Pointer to the LHA reader structure.
- * @return Non-zero if there is a directory at the top of
- * the stack that should be popped.
- */
- static int end_of_top_dir(LHAReader *reader)
- {
- LHAFileHeader *input;
- // No directories to pop?
- if (reader->dir_stack == NULL) {
- return 0;
- }
- // Once the end of the input stream is reached, all that is
- // left to do is pop off the remaining directories.
- input = lha_basic_reader_curr_file(reader->reader);
- if (input == NULL) {
- return 1;
- }
- switch (reader->dir_policy) {
- // Shouldn't happen?
- case LHA_READER_DIR_PLAIN:
- default:
- return 1;
- // Don't process directories until we reach the end of
- // the input stream.
- case LHA_READER_DIR_END_OF_FILE:
- return 0;
- // Once we reach a file from the input that is not within
- // the directory at the top of the stack, we have reached
- // the end of that directory, so we can pop it off.
- case LHA_READER_DIR_END_OF_DIR:
- return input->path == NULL
- || strncmp(input->path,
- reader->dir_stack->path,
- strlen(reader->dir_stack->path)) != 0;
- }
- }
- // Read the next file from the input stream.
- LHAFileHeader *lha_reader_next_file(LHAReader *reader)
- {
- // Free the current decoder if there is one.
- close_decoder(reader);
- // No point continuing once the end of the input stream has
- // been reached.
- if (reader->curr_file_type == CURR_FILE_EOF) {
- return NULL;
- }
- // Advance to the next file from the input stream?
- // Don't advance until we've done the fake directories first.
- if (reader->curr_file_type == CURR_FILE_START
- || reader->curr_file_type == CURR_FILE_NORMAL) {
- lha_basic_reader_next_file(reader->reader);
- }
- // If the last file we returned was a 'fake' directory, we must
- // now unreference it.
- if (reader->curr_file_type == CURR_FILE_FAKE_DIR) {
- lha_file_header_free(reader->curr_file);
- }
- // Pop off all appropriate directories from the stack first.
- if (end_of_top_dir(reader)) {
- reader->curr_file = reader->dir_stack;
- reader->dir_stack = reader->dir_stack->_next;
- reader->curr_file_type = CURR_FILE_FAKE_DIR;
- } else {
- reader->curr_file = lha_basic_reader_curr_file(reader->reader);
- reader->curr_file_type = CURR_FILE_NORMAL;
- }
- // Once we reach the end of the file, there may be deferred
- // symbolic links still to extract, so process those before
- // giving up and declaring end of file.
- if (reader->curr_file == NULL) {
- if (reader->deferred_symlinks != NULL) {
- reader->curr_file = reader->deferred_symlinks;
- reader->curr_file_type = CURR_FILE_DEFERRED_SYMLINK;
- reader->deferred_symlinks =
- reader->deferred_symlinks->_next;
- reader->curr_file->_next = NULL;
- } else {
- reader->curr_file_type = CURR_FILE_EOF;
- }
- }
- return reader->curr_file;
- }
- size_t lha_reader_read(LHAReader *reader, void *buf, size_t buf_len)
- {
- // The first time that we try to read the current file, we
- // must create the decoder to decompress it.
- if (reader->decoder == NULL) {
- if (!open_decoder(reader, NULL, NULL)) {
- return 0;
- }
- }
- // Read from decoder and return the result.
- return lha_decoder_read(reader->decoder, buf, buf_len);
- }
- /**
- * Decompress the current file.
- *
- * Assumes that @param open_decoder has already been called to
- * start the decode process.
- *
- * @param reader Pointer to the LHA reader structure.
- * @param output FILE handle to write decompressed data, or NULL
- * if the decompressed data should be discarded.
- * @return Non-zero if the file decompressed successfully.
- */
- static int do_decode(LHAReader *reader, FILE *output)
- {
- uint8_t buf[64];
- unsigned int bytes;
- // Decompress the current file.
- do {
- bytes = lha_reader_read(reader, buf, sizeof(buf));
- if (output != NULL) {
- if (fwrite(buf, 1, bytes, output) < bytes) {
- return 0;
- }
- }
- } while (bytes > 0);
- // Decoder stores output position and performs running CRC.
- // At the end of the stream these should match the header values.
- return lha_decoder_get_length(reader->inner_decoder)
- == reader->curr_file->length
- && lha_decoder_get_crc(reader->inner_decoder)
- == reader->curr_file->crc;
- }
- int lha_reader_check(LHAReader *reader,
- LHADecoderProgressCallback callback,
- void *callback_data)
- {
- if (reader->curr_file_type != CURR_FILE_NORMAL) {
- return 0;
- }
- // CRC checking of directories is not necessary.
- if (!strcmp(reader->curr_file->compress_method,
- LHA_COMPRESS_TYPE_DIR)) {
- return 1;
- }
- // Decode file.
- return open_decoder(reader, callback, callback_data)
- && do_decode(reader, NULL);
- }
- /**
- * Open an output stream into which to decompress the current file.
- *
- * @param reader Pointer to the LHA reader structure.
- * @param filename Name of the file to open.
- * @return FILE handle of the opened file, or NULL in
- * case of failure.
- */
- static FILE *open_output_file(LHAReader *reader, char *filename)
- {
- int unix_uid = -1, unix_gid = -1, unix_perms = -1;
- if (LHA_FILE_HAVE_EXTRA(reader->curr_file, LHA_FILE_UNIX_UID_GID)) {
- unix_uid = reader->curr_file->unix_uid;
- unix_gid = reader->curr_file->unix_gid;
- }
- if (LHA_FILE_HAVE_EXTRA(reader->curr_file, LHA_FILE_UNIX_PERMS)) {
- unix_perms = reader->curr_file->unix_perms;
- }
- return lha_arch_fopen(filename, unix_uid, unix_gid, unix_perms);
- }
- /**
- * Set file timestamps for the specified file.
- *
- * If possible, the more accurate Windows timestamp values are used;
- * otherwise normal Unix timestamps are used.
- *
- * @param path Path to the file or directory to set.
- * @param header Pointer to file header structure containing the
- * timestamps to set.
- * @return Non-zero if the timestamps were set successfully,
- * or zero for failure.
- */
- static int set_timestamps_from_header(char *path, LHAFileHeader *header)
- {
- #if LHA_ARCH == LHA_ARCH_WINDOWS
- if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_WINDOWS_TIMESTAMPS)) {
- return lha_arch_set_windows_timestamps(
- path,
- header->win_creation_time,
- header->win_modification_time,
- header->win_access_time
- );
- } else // ....
- #endif
- if (header->timestamp != 0) {
- return lha_arch_utime(path, header->timestamp);
- } else {
- return 1;
- }
- }
- /**
- * Set directory metadata.
- *
- * This is the second stage of directory extraction. Metadata (timestamps
- * and permissions) should be set on a dictory after the contents of
- * the directory has been extracted.
- *
- * @param header Pointer to file header structure containing the
- * metadata to set.
- * @param path Path to the directory on which to set the metadata.
- * @return Non-zero for success, or zero for failure.
- */
- static int set_directory_metadata(LHAFileHeader *header, char *path)
- {
- // Set timestamp:
- set_timestamps_from_header(path, header);
- // Set owner and group:
- if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_UID_GID)) {
- if (!lha_arch_chown(path, header->unix_uid,
- header->unix_gid)) {
- // On most Unix systems, only root can change
- // ownership. But if we can't change ownership,
- // it isn't a fatal error. Ignore the failure
- // and continue.
- // TODO: Implement some kind of alternate handling
- // here?
- /* return 0; */
- }
- }
- // Set permissions on directory:
- if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)) {
- if (!lha_arch_chmod(path, header->unix_perms)) {
- return 0;
- }
- }
- return 1;
- }
- /**
- * "Extract" (create) a directory.
- *
- * The current file is assumed to be a directory. This is the first
- * stage in extracting a directory; after the directory is created,
- * it is added to the directory stack so that the metadata apply stage
- * runs later. (If the LHA_READER_DIR_PLAIN policy is used, metadata
- * is just applied now).
- *
- * @param reader Pointer to the LHA reader structure.
- * @param path Path to the directory, or NULL to use the path from
- * the file header.
- * @return Non-zero for success, or zero for failure.
- */
- static int extract_directory(LHAReader *reader, char *path)
- {
- LHAFileHeader *header;
- unsigned int mode;
- header = reader->curr_file;
- // If path is not specified, use the path from the file header.
- if (path == NULL) {
- path = header->path;
- }
- // Create directory. If there are permissions to be set, create
- // the directory with minimal permissions limited to the running
- // user. Otherwise use the default umask.
- if (LHA_FILE_HAVE_EXTRA(header, LHA_FILE_UNIX_PERMS)) {
- mode = 0700;
- } else {
- mode = 0777;
- }
- if (!lha_arch_mkdir(path, mode)) {
- // If the attempt to create the directory failed, it may
- // be because the directory already exists. Return success
- // if this is the case; it isn't really an error.
- return lha_arch_exists(path) == LHA_FILE_DIRECTORY;
- }
- // The directory has been created, but the metadata has not yet
- // been applied. It depends on the directory policy how this
- // is handled. If we are using LHA_READER_DIR_PLAIN, set
- // metadata now. Otherwise, save the directory for later.
- if (reader->dir_policy == LHA_READER_DIR_PLAIN) {
- set_directory_metadata(header, path);
- } else {
- lha_file_header_add_ref(header);
- header->_next = reader->dir_stack;
- reader->dir_stack = header;
- }
- return 1;
- }
- /**
- * Extract the current file.
- *
- * @param reader Pointer to the LHA reader structure.
- * @param filename Filename into which to extract the file, or NULL
- * to use the filename from the file header.
- * @param callback Callback function to invoke to track progress.
- * @param callback_data Extra pointer to pass to the callback function.
- * @return Non-zero if the file was successfully extracted,
- * or zero for failure.
- */
- static int extract_file(LHAReader *reader, char *filename,
- LHADecoderProgressCallback callback,
- void *callback_data)
- {
- FILE *fstream;
- char *tmp_filename = NULL;
- int result;
- // Construct filename?
- if (filename == NULL) {
- tmp_filename = lha_file_header_full_path(reader->curr_file);
- if (tmp_filename == NULL) {
- return 0;
- }
- filename = tmp_filename;
- }
- // Create decoder. If the file cannot be created, there is no
- // need to even create an output file. If successful, open the
- // output file and decode.
- result = 0;
- if (open_decoder(reader, callback, callback_data)) {
- fstream = open_output_file(reader, filename);
- if (fstream != NULL) {
- result = do_decode(reader, fstream);
- fclose(fstream);
- }
- }
- // Set timestamp on file:
- if (result) {
- set_timestamps_from_header(filename, reader->curr_file);
- }
- free(tmp_filename);
- return result;
- }
- /**
- * Determine whether a header contains a "dangerous" symbolic link.
- *
- * Symbolic links that begin with '/' or contain '..' as a path are
- * Potentially dangerous and could potentially be used to overwrite
- * arbitrary files on the filesystem. They therefore need to be
- * treated specially.
- *
- * @param header Pointer to a header structure defining a symbolic
- * link.
- * @return Non-zero if the symbolic link is potentially
- * dangerous.
- */
- static int is_dangerous_symlink(LHAFileHeader *header)
- {
- char *path_start;
- char *p;
- if (header->symlink_target == NULL) {
- return 0;
- }
- // Absolute path symlinks could be used to point to arbitrary
- // filesystem locations.
- if (header->symlink_target[0] == '/') {
- return 1;
- }
- // Check for paths containing '..'.
- path_start = header->symlink_target;
- for (p = header->symlink_target; *p != '\0'; ++p) {
- if (*p == '/') {
- if ((p - path_start) == 2
- && path_start[0] == '.' && path_start[1] == '.') {
- return 1;
- }
- path_start = p + 1;
- }
- }
- // The path might also end with '..' (no terminating /)
- if ((p - path_start) == 2
- && path_start[0] == '.' && path_start[1] == '.') {
- return 1;
- }
- return 0;
- }
- /**
- * Get the length of a path defined by a file header.
- *
- * @param header The file header structure.
- * @return Length of the header in bytes.
- */
- static size_t file_header_path_len(LHAFileHeader *header)
- {
- size_t result;
- result = 0;
- if (header->path != NULL) {
- result += strlen(header->path);
- }
- if (header->filename != NULL) {
- result += strlen(header->filename);
- }
- return result;
- }
- /**
- * Create a "placeholder" symbolic link.
- *
- * When a "dangerous" symbolic link is extracted, instead of creating it
- * immediately, create a "placeholder" empty file to go in its place, and
- * place it into the deferred_symlinks list to be created later.
- *
- * @param reader Pointer to the LHA reader structure.
- * @param filename Filename into which to extract the symlink.
- * @return Non-zero if the symlink was extracted successfully,
- * or zero for failure.
- */
- static int extract_placeholder_symlink(LHAReader *reader, char *filename)
- {
- LHAFileHeader **rover;
- FILE *f;
- f = lha_arch_fopen(filename, -1, -1, 0600);
- if (f == NULL) {
- return 0;
- }
- fclose(f);
- // Insert this header into the list of deferred symbolic links.
- // The list must be maintained in order of decreasing path length,
- // so that one symbolic link cannot depend on another. For example:
- //
- // etc -> /etc
- // etc/passwd -> /malicious_path/passwd
- rover = &reader->deferred_symlinks;
- while (*rover != NULL
- && file_header_path_len(*rover)
- > file_header_path_len(reader->curr_file)) {
- rover = &(*rover)->_next;
- }
- reader->curr_file->_next = *rover;
- *rover = reader->curr_file;
- // Save reference to the header so it won't be freed.
- lha_file_header_add_ref(reader->curr_file);
- return 1;
- }
- /**
- * Extract a Unix symbolic link.
- *
- * @param reader Pointer to the LHA reader structure.
- * @param filename Filename into which to extract the symlink, or NULL
- * to use the filename from the file header.
- * @return Non-zero if the symlink was extracted successfully,
- * or zero for failure.
- */
- static int extract_symlink(LHAReader *reader, char *filename)
- {
- char *tmp_filename = NULL;
- int result;
- // Construct filename?
- if (filename == NULL) {
- tmp_filename = lha_file_header_full_path(reader->curr_file);
- if (tmp_filename == NULL) {
- return 0;
- }
- filename = tmp_filename;
- }
- if (reader->curr_file_type == CURR_FILE_NORMAL
- && is_dangerous_symlink(reader->curr_file)) {
- return extract_placeholder_symlink(reader, filename);
- }
- result = lha_arch_symlink(filename, reader->curr_file->symlink_target);
- // TODO: Set symlink timestamp.
- free(tmp_filename);
- return result;
- }
- /**
- * Extract a "normal" file.
- *
- * This just extracts the file header most recently read by the
- * BasicReader.
- *
- * @param reader Pointer to the LHA reader structure.
- * @param filename Filename into which to extract the file, or NULL
- * to use the filename from the file header.
- * @param callback Callback function to invoke to track progress.
- * @param callback_data Extra pointer to pass to the callback function.
- * @return Non-zero if the file was successfully extracted,
- * or zero for failure.
- */
- static int extract_normal(LHAReader *reader,
- char *filename,
- LHADecoderProgressCallback callback,
- void *callback_data)
- {
- if (strcmp(reader->curr_file->compress_method,
- LHA_COMPRESS_TYPE_DIR) != 0) {
- return extract_file(reader, filename, callback, callback_data);
- } else if (reader->curr_file->symlink_target != NULL) {
- return extract_symlink(reader, filename);
- } else {
- return extract_directory(reader, filename);
- }
- }
- int lha_reader_extract(LHAReader *reader,
- char *filename,
- LHADecoderProgressCallback callback,
- void *callback_data)
- {
- switch (reader->curr_file_type) {
- case CURR_FILE_NORMAL:
- return extract_normal(reader, filename, callback,
- callback_data);
- case CURR_FILE_FAKE_DIR:
- if (filename == NULL) {
- filename = reader->curr_file->path;
- }
- set_directory_metadata(reader->curr_file, filename);
- return 1;
- case CURR_FILE_DEFERRED_SYMLINK:
- return extract_symlink(reader, filename);
- case CURR_FILE_START:
- case CURR_FILE_EOF:
- break;
- }
- return 0;
- }
- int lha_reader_current_is_fake(LHAReader *reader)
- {
- return reader->curr_file_type == CURR_FILE_FAKE_DIR
- || reader->curr_file_type == CURR_FILE_DEFERRED_SYMLINK;
- }
|