/* * Copyright (c) Ian F. Darwin 1986-1995. * Software written by Ian F. Darwin and others; * maintained 1995-present by Christos Zoulas and others. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * compress routines: * zmagic() - returns 0 if not recognized, uncompresses and prints * information if recognized * uncompress(method, old, n, newch) - uncompress old into new, * using method, return sizeof new */ #include "file.h" #ifndef lint FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $") #endif #include "magic.h" #include #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_SPAWN_H #include #endif #include #include #include #include #include #ifndef HAVE_SIG_T typedef void (*sig_t)(int); #endif /* HAVE_SIG_T */ #ifdef HAVE_SYS_IOCTL_H #include #endif #ifdef HAVE_SYS_WAIT_H #include #endif #if defined(HAVE_SYS_TIME_H) #include #endif #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT) #define BUILTIN_DECOMPRESS #include #endif #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT) #define BUILTIN_BZLIB #include #endif #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT) #define BUILTIN_XZLIB #include #endif #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT) #define BUILTIN_ZSTDLIB #include #include #endif #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT) #define BUILTIN_LZLIB #include #endif #ifdef DEBUG int tty = -1; #define DPRINTF(...) do { \ if (tty == -1) \ tty = open("/dev/tty", O_RDWR); \ if (tty == -1) \ abort(); \ dprintf(tty, __VA_ARGS__); \ } while (/*CONSTCOND*/0) #else #define DPRINTF(...) #endif #ifdef ZLIBSUPPORT /* * The following python code is not really used because ZLIBSUPPORT is only * defined if we have a built-in zlib, and the built-in zlib handles that. * That is not true for android where we have zlib.h and not -lz. */ static const char zlibcode[] = "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))"; static const char *zlib_args[] = { "python", "-c", zlibcode, NULL }; static int zlibcmp(const unsigned char *buf) { unsigned short x = 1; unsigned char *s = CAST(unsigned char *, CAST(void *, &x)); if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0) return 0; if (s[0] != 1) /* endianness test */ x = buf[0] | (buf[1] << 8); else x = buf[1] | (buf[0] << 8); if (x % 31) return 0; return 1; } #endif static int lzmacmp(const unsigned char *buf) { if (buf[0] != 0x5d || buf[1] || buf[2]) return 0; if (buf[12] && buf[12] != 0xff) return 0; return 1; } #define gzip_flags "-cd" #define lzip_flags gzip_flags static const char *gzip_args[] = { "gzip", gzip_flags, NULL }; static const char *uncompress_args[] = { "uncompress", "-c", NULL }; static const char *bzip2_args[] = { "bzip2", "-cd", NULL }; static const char *lzip_args[] = { "lzip", lzip_flags, NULL }; static const char *xz_args[] = { "xz", "-cd", NULL }; static const char *lrzip_args[] = { "lrzip", "-qdf", "-", NULL }; static const char *lz4_args[] = { "lz4", "-cd", NULL }; static const char *zstd_args[] = { "zstd", "-cd", NULL }; #define do_zlib NULL #define do_bzlib NULL file_private const struct { union { const char *magic; int (*func)(const unsigned char *); } u; int maglen; const char **argv; void *unused; } compr[] = { #define METH_FROZEN 2 #define METH_BZIP 7 #define METH_XZ 9 #define METH_LZIP 8 #define METH_ZSTD 12 #define METH_LZMA 13 #define METH_ZLIB 14 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */ /* Uncompress can get stuck; so use gzip first if we have it * Idea from Damien Clark, thanks! */ { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */ { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */ { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */ { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */ /* the standard pack utilities do not accept standard input */ { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */ { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */ /* ...only first file examined */ { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */ { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */ { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */ { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */ { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */ { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */ { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */ #ifdef ZLIBSUPPORT { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */ #endif }; #define OKDATA 0 #define NODATA 1 #define ERRDATA 2 file_private ssize_t swrite(int, const void *, size_t); #if HAVE_FORK file_private size_t ncompr = __arraycount(compr); file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *, unsigned char **, size_t *); #ifdef BUILTIN_DECOMPRESS file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t, size_t *, int); file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t, size_t *, int); #endif #ifdef BUILTIN_BZLIB file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t, size_t *, int); #endif #ifdef BUILTIN_XZLIB file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t, size_t *, int); #endif #ifdef BUILTIN_ZSTDLIB file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t, size_t *, int); #endif #ifdef BUILTIN_LZLIB file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t, size_t *, int); #endif static int makeerror(unsigned char **, size_t *, const char *, ...) __attribute__((__format__(__printf__, 3, 4))); file_private const char *methodname(size_t); file_private int format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf) { unsigned char *p; int mime = ms->flags & MAGIC_MIME; if (!mime) return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf); for (p = buf; *p; p++) if (!isalnum(*p)) *p = '-'; return file_printf(ms, "application/x-decompression-error-%s-%s", methodname(i), buf); } file_protected int file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name) { unsigned char *newbuf = NULL; size_t i, nsz; char *rbuf; file_pushbuf_t *pb; int urv, prv, rv = 0; int mime = ms->flags & MAGIC_MIME; int fd = b->fd; const unsigned char *buf = CAST(const unsigned char *, b->fbuf); size_t nbytes = b->flen; int sa_saved = 0; struct sigaction sig_act; if ((ms->flags & MAGIC_COMPRESS) == 0) return 0; for (i = 0; i < ncompr; i++) { int zm; if (nbytes < CAST(size_t, abs(compr[i].maglen))) continue; if (compr[i].maglen < 0) { zm = (*compr[i].u.func)(buf); } else { zm = memcmp(buf, compr[i].u.magic, CAST(size_t, compr[i].maglen)) == 0; } if (!zm) continue; /* Prevent SIGPIPE death if child dies unexpectedly */ if (!sa_saved) { //We can use sig_act for both new and old, but struct sigaction new_act; memset(&new_act, 0, sizeof(new_act)); new_act.sa_handler = SIG_IGN; sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1; } nsz = nbytes; free(newbuf); urv = uncompressbuf(fd, ms->bytes_max, i, (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz); DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv, (char *)newbuf, nsz); switch (urv) { case OKDATA: case ERRDATA: ms->flags &= ~MAGIC_COMPRESS; if (urv == ERRDATA) prv = format_decompression_error(ms, i, newbuf); else prv = file_buffer(ms, -1, NULL, name, newbuf, nsz); if (prv == -1) goto error; rv = 1; if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0) goto out; if (mime != MAGIC_MIME && mime != 0) goto out; if ((file_printf(ms, mime ? " compressed-encoding=" : " (")) == -1) goto error; if ((pb = file_push_buffer(ms)) == NULL) goto error; /* * XXX: If file_buffer fails here, we overwrite * the compressed text. FIXME. */ if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) { if (file_pop_buffer(ms, pb) != NULL) abort(); goto error; } if ((rbuf = file_pop_buffer(ms, pb)) != NULL) { if (file_printf(ms, "%s", rbuf) == -1) { free(rbuf); goto error; } free(rbuf); } if (!mime && file_printf(ms, ")") == -1) goto error; /*FALLTHROUGH*/ case NODATA: break; default: abort(); /*NOTREACHED*/ error: rv = -1; break; } } out: DPRINTF("rv = %d\n", rv); if (sa_saved && sig_act.sa_handler != SIG_IGN) (void)sigaction(SIGPIPE, &sig_act, NULL); free(newbuf); ms->flags |= MAGIC_COMPRESS; DPRINTF("Zmagic returns %d\n", rv); return rv; } #endif /* * `safe' write for sockets and pipes. */ file_private ssize_t swrite(int fd, const void *buf, size_t n) { ssize_t rv; size_t rn = n; do switch (rv = write(fd, buf, n)) { case -1: if (errno == EINTR) continue; return -1; default: n -= rv; buf = CAST(const char *, buf) + rv; break; } while (n > 0); return rn; } /* * `safe' read for sockets and pipes. */ file_protected ssize_t sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__))) { ssize_t rv; #if defined(FIONREAD) && !defined(__MINGW32__) int t = 0; #endif size_t rn = n; if (fd == STDIN_FILENO) goto nocheck; #if defined(FIONREAD) && !defined(__MINGW32__) if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) { #ifdef FD_ZERO ssize_t cnt; for (cnt = 0;; cnt++) { fd_set check; struct timeval tout = {0, 100 * 1000}; int selrv; FD_ZERO(&check); FD_SET(fd, &check); /* * Avoid soft deadlock: do not read if there * is nothing to read from sockets and pipes. */ selrv = select(fd + 1, &check, NULL, NULL, &tout); if (selrv == -1) { if (errno == EINTR || errno == EAGAIN) continue; } else if (selrv == 0 && cnt >= 5) { return 0; } else break; } #endif (void)ioctl(fd, FIONREAD, &t); } if (t > 0 && CAST(size_t, t) < n) { n = t; rn = n; } #endif nocheck: do switch ((rv = read(fd, buf, n))) { case -1: if (errno == EINTR) continue; return -1; case 0: return rn - n; default: n -= rv; buf = CAST(char *, CCAST(void *, buf)) + rv; break; } while (n > 0); return rn; } file_protected int file_pipe2file(struct magic_set *ms, int fd, const void *startbuf, size_t nbytes) { char buf[4096]; ssize_t r; int tfd; #ifdef WIN32 const char *t; buf[0] = '\0'; if ((t = getenv("TEMP")) != NULL) (void)strlcpy(buf, t, sizeof(buf)); else if ((t = getenv("TMP")) != NULL) (void)strlcpy(buf, t, sizeof(buf)); else if ((t = getenv("TMPDIR")) != NULL) (void)strlcpy(buf, t, sizeof(buf)); if (buf[0] != '\0') (void)strlcat(buf, "/", sizeof(buf)); (void)strlcat(buf, "file.XXXXXX", sizeof(buf)); #else (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf)); #endif #ifndef HAVE_MKSTEMP { char *ptr = mktemp(buf); tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600); r = errno; (void)unlink(ptr); errno = r; } #else { int te; mode_t ou = umask(0); tfd = mkstemp(buf); (void)umask(ou); te = errno; (void)unlink(buf); errno = te; } #endif if (tfd == -1) { file_error(ms, errno, "cannot create temporary file for pipe copy"); return -1; } if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes)) r = 1; else { while ((r = sread(fd, buf, sizeof(buf), 1)) > 0) if (swrite(tfd, buf, CAST(size_t, r)) != r) break; } switch (r) { case -1: file_error(ms, errno, "error copying from pipe to temp file"); return -1; case 0: break; default: file_error(ms, errno, "error while writing to temp file"); return -1; } /* * We duplicate the file descriptor, because fclose on a * tmpfile will delete the file, but any open descriptors * can still access the phantom inode. */ if ((fd = dup2(tfd, fd)) == -1) { file_error(ms, errno, "could not dup descriptor for temp file"); return -1; } (void)close(tfd); if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) { file_badseek(ms); return -1; } return fd; } #if HAVE_FORK #ifdef BUILTIN_DECOMPRESS #define FHCRC (1 << 1) #define FEXTRA (1 << 2) #define FNAME (1 << 3) #define FCOMMENT (1 << 4) file_private int uncompressgzipped(const unsigned char *old, unsigned char **newch, size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) { unsigned char flg; size_t data_start = 10; if (*n < 4) { goto err; } flg = old[3]; if (flg & FEXTRA) { if (data_start + 1 >= *n) goto err; data_start += 2 + old[data_start] + old[data_start + 1] * 256; } if (flg & FNAME) { while(data_start < *n && old[data_start]) data_start++; data_start++; } if (flg & FCOMMENT) { while(data_start < *n && old[data_start]) data_start++; data_start++; } if (flg & FHCRC) data_start += 2; if (data_start >= *n) goto err; *n -= data_start; old += data_start; return uncompresszlib(old, newch, bytes_max, n, 0); err: return makeerror(newch, n, "File too short"); } file_private int uncompresszlib(const unsigned char *old, unsigned char **newch, size_t bytes_max, size_t *n, int zlib) { int rc; z_stream z; DPRINTF("builtin zlib decompression\n"); z.next_in = CCAST(Bytef *, old); z.avail_in = CAST(uint32_t, *n); z.next_out = *newch; z.avail_out = CAST(unsigned int, bytes_max); z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL; /* LINTED bug in header macro */ rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15); if (rc != Z_OK) goto err; rc = inflate(&z, Z_SYNC_FLUSH); if (rc != Z_OK && rc != Z_STREAM_END) { inflateEnd(&z); goto err; } *n = CAST(size_t, z.total_out); rc = inflateEnd(&z); if (rc != Z_OK) goto err; /* let's keep the nul-terminate tradition */ (*newch)[*n] = '\0'; return OKDATA; err: return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc)); } #endif #ifdef BUILTIN_BZLIB file_private int uncompressbzlib(const unsigned char *old, unsigned char **newch, size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) { int rc; bz_stream bz; DPRINTF("builtin bzlib decompression\n"); memset(&bz, 0, sizeof(bz)); rc = BZ2_bzDecompressInit(&bz, 0, 0); if (rc != BZ_OK) goto err; bz.next_in = CCAST(char *, RCAST(const char *, old)); bz.avail_in = CAST(uint32_t, *n); bz.next_out = RCAST(char *, *newch); bz.avail_out = CAST(unsigned int, bytes_max); rc = BZ2_bzDecompress(&bz); if (rc != BZ_OK && rc != BZ_STREAM_END) { BZ2_bzDecompressEnd(&bz); goto err; } /* Assume byte_max is within 32bit */ /* assert(bz.total_out_hi32 == 0); */ *n = CAST(size_t, bz.total_out_lo32); rc = BZ2_bzDecompressEnd(&bz); if (rc != BZ_OK) goto err; /* let's keep the nul-terminate tradition */ (*newch)[*n] = '\0'; return OKDATA; err: return makeerror(newch, n, "bunzip error %d", rc); } #endif #ifdef BUILTIN_XZLIB file_private int uncompressxzlib(const unsigned char *old, unsigned char **newch, size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) { int rc; lzma_stream xz; DPRINTF("builtin xzlib decompression\n"); memset(&xz, 0, sizeof(xz)); rc = lzma_auto_decoder(&xz, UINT64_MAX, 0); if (rc != LZMA_OK) goto err; xz.next_in = CCAST(const uint8_t *, old); xz.avail_in = CAST(uint32_t, *n); xz.next_out = RCAST(uint8_t *, *newch); xz.avail_out = CAST(unsigned int, bytes_max); rc = lzma_code(&xz, LZMA_RUN); if (rc != LZMA_OK && rc != LZMA_STREAM_END) { lzma_end(&xz); goto err; } *n = CAST(size_t, xz.total_out); lzma_end(&xz); /* let's keep the nul-terminate tradition */ (*newch)[*n] = '\0'; return OKDATA; err: return makeerror(newch, n, "unxz error %d", rc); } #endif #ifdef BUILTIN_ZSTDLIB file_private int uncompresszstd(const unsigned char *old, unsigned char **newch, size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) { size_t rc; ZSTD_DStream *zstd; ZSTD_inBuffer in; ZSTD_outBuffer out; DPRINTF("builtin zstd decompression\n"); if ((zstd = ZSTD_createDStream()) == NULL) { return makeerror(newch, n, "No ZSTD decompression stream, %s", strerror(errno)); } rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only); if (ZSTD_isError(rc)) goto err; in.src = CCAST(const void *, old); in.size = *n; in.pos = 0; out.dst = RCAST(void *, *newch); out.size = bytes_max; out.pos = 0; rc = ZSTD_decompressStream(zstd, &out, &in); if (ZSTD_isError(rc)) goto err; *n = out.pos; ZSTD_freeDStream(zstd); /* let's keep the nul-terminate tradition */ (*newch)[*n] = '\0'; return OKDATA; err: ZSTD_freeDStream(zstd); return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc)); } #endif #ifdef BUILTIN_LZLIB file_private int uncompresslzlib(const unsigned char *old, unsigned char **newch, size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) { enum LZ_Errno err; size_t old_remaining = *n; size_t new_remaining = bytes_max; size_t total_read = 0; unsigned char *bufp; struct LZ_Decoder *dec; bufp = *newch; DPRINTF("builtin lzlib decompression\n"); dec = LZ_decompress_open(); if (!dec) { return makeerror(newch, n, "unable to allocate LZ_Decoder"); } if (LZ_decompress_errno(dec) != LZ_ok) goto err; for (;;) { // LZ_decompress_read() stops at member boundaries, so we may // have more than one successful read after writing all data // we have. if (old_remaining > 0) { int wr = LZ_decompress_write(dec, old, old_remaining); if (wr < 0) goto err; old_remaining -= wr; old += wr; } int rd = LZ_decompress_read(dec, bufp, new_remaining); if (rd > 0) { new_remaining -= rd; bufp += rd; total_read += rd; } if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok) goto err; if (new_remaining == 0) break; if (old_remaining == 0 && rd == 0) break; } LZ_decompress_close(dec); *n = total_read; /* let's keep the nul-terminate tradition */ *bufp = '\0'; return OKDATA; err: err = LZ_decompress_errno(dec); LZ_decompress_close(dec); return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err)); } #endif static int makeerror(unsigned char **buf, size_t *len, const char *fmt, ...) { char *msg; va_list ap; int rv; DPRINTF("Makeerror %s\n", fmt); free(*buf); va_start(ap, fmt); rv = vasprintf(&msg, fmt, ap); va_end(ap); if (rv < 0) { DPRINTF("Makeerror failed"); *buf = NULL; *len = 0; return NODATA; } *buf = RCAST(unsigned char *, msg); *len = strlen(msg); return ERRDATA; } static void closefd(int *fd, size_t i) { if (fd[i] == -1) return; (void) close(fd[i]); fd[i] = -1; } static void closep(int *fd) { size_t i; for (i = 0; i < 2; i++) closefd(fd, i); } static void movedesc(void *v, int i, int fd) { if (fd == i) return; /* "no dup was necessary" */ #ifdef HAVE_POSIX_SPAWNP posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); posix_spawn_file_actions_adddup2(fa, fd, i); posix_spawn_file_actions_addclose(fa, fd); #else if (dup2(fd, i) == -1) { DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno)); exit(EXIT_FAILURE); } close(v ? fd : fd); #endif } static void closedesc(void *v, int fd) { #ifdef HAVE_POSIX_SPAWNP posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); posix_spawn_file_actions_addclose(fa, fd); #else close(v ? fd : fd); #endif } static void handledesc(void *v, int fd, int fdp[3][2]) { if (fd != -1) { (void) lseek(fd, CAST(off_t, 0), SEEK_SET); movedesc(v, STDIN_FILENO, fd); } else { movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]); if (fdp[STDIN_FILENO][1] > 2) closedesc(v, fdp[STDIN_FILENO][1]); } file_clear_closexec(STDIN_FILENO); ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]); if (fdp[STDOUT_FILENO][0] > 2) closedesc(v, fdp[STDOUT_FILENO][0]); file_clear_closexec(STDOUT_FILENO); movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]); if (fdp[STDERR_FILENO][0] > 2) closedesc(v, fdp[STDERR_FILENO][0]); file_clear_closexec(STDERR_FILENO); } static pid_t writechild(int fd, const void *old, size_t n) { pid_t pid; /* * fork again, to avoid blocking because both * pipes filled */ pid = fork(); if (pid == -1) { DPRINTF("Fork failed (%s)\n", strerror(errno)); return -1; } if (pid == 0) { /* child */ if (swrite(fd, old, n) != CAST(ssize_t, n)) { DPRINTF("Write failed (%s)\n", strerror(errno)); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); } /* parent */ return pid; } static ssize_t filter_error(unsigned char *ubuf, ssize_t n) { char *p; char *buf; ubuf[n] = '\0'; buf = RCAST(char *, ubuf); while (isspace(CAST(unsigned char, *buf))) buf++; DPRINTF("Filter error[[[%s]]]\n", buf); if ((p = strchr(CAST(char *, buf), '\n')) != NULL) *p = '\0'; if ((p = strchr(CAST(char *, buf), ';')) != NULL) *p = '\0'; if ((p = strrchr(CAST(char *, buf), ':')) != NULL) { ++p; while (isspace(CAST(unsigned char, *p))) p++; n = strlen(p); memmove(ubuf, p, CAST(size_t, n + 1)); } DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf); if (islower(*ubuf)) *ubuf = toupper(*ubuf); return n; } file_private const char * methodname(size_t method) { switch (method) { #ifdef BUILTIN_DECOMPRESS case METH_FROZEN: case METH_ZLIB: return "zlib"; #endif #ifdef BUILTIN_BZLIB case METH_BZIP: return "bzlib"; #endif #ifdef BUILTIN_XZLIB case METH_XZ: case METH_LZMA: return "xzlib"; #endif #ifdef BUILTIN_ZSTDLIB case METH_ZSTD: return "zstd"; #endif #ifdef BUILTIN_LZLIB case METH_LZIP: return "lzlib"; #endif default: return compr[method].argv[0]; } } file_private int (* getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t, size_t *, int) { switch (method) { #ifdef BUILTIN_DECOMPRESS case METH_FROZEN: return uncompressgzipped; case METH_ZLIB: return uncompresszlib; #endif #ifdef BUILTIN_BZLIB case METH_BZIP: return uncompressbzlib; #endif #ifdef BUILTIN_XZLIB case METH_XZ: case METH_LZMA: return uncompressxzlib; #endif #ifdef BUILTIN_ZSTDLIB case METH_ZSTD: return uncompresszstd; #endif #ifdef BUILTIN_LZLIB case METH_LZIP: return uncompresslzlib; #endif default: return NULL; } } file_private int uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork, const unsigned char *old, unsigned char **newch, size_t* n) { int fdp[3][2]; int status, rv, w; pid_t pid; pid_t writepid = -1; size_t i; ssize_t r, re; char *const *args; #ifdef HAVE_POSIX_SPAWNP posix_spawn_file_actions_t fa; #endif int (*decompress)(const unsigned char *, unsigned char **, size_t, size_t *, int) = getdecompressor(method); *newch = CAST(unsigned char *, malloc(bytes_max + 1)); if (*newch == NULL) return makeerror(newch, n, "No buffer, %s", strerror(errno)); if (decompress) { if (nofork) { return makeerror(newch, n, "Fork is required to uncompress, but disabled"); } return (*decompress)(old, newch, bytes_max, n, 1); } (void)fflush(stdout); (void)fflush(stderr); for (i = 0; i < __arraycount(fdp); i++) fdp[i][0] = fdp[i][1] = -1; /* * There are multithreaded users who run magic_file() * from dozens of threads. If two parallel magic_file() calls * analyze two large compressed files, both will spawn * an uncompressing child here, which writes out uncompressed data. * We read some portion, then close the pipe, then waitpid() the child. * If uncompressed data is larger, child should get EPIPE and exit. * However, with *parallel* calls OTHER child may unintentionally * inherit pipe fds, thus keeping pipe open and making writes in * our child block instead of failing with EPIPE! * (For the bug to occur, two threads must mutually inherit their pipes, * and both must have large outputs. Thus it happens not that often). * To avoid this, be sure to create pipes with O_CLOEXEC. */ if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) || file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 || file_pipe_closexec(fdp[STDERR_FILENO]) == -1) { closep(fdp[STDIN_FILENO]); closep(fdp[STDOUT_FILENO]); return makeerror(newch, n, "Cannot create pipe, %s", strerror(errno)); } args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv)); #ifdef HAVE_POSIX_SPAWNP posix_spawn_file_actions_init(&fa); handledesc(&fa, fd, fdp); DPRINTF("Executing %s\n", compr[method].argv[0]); status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL, args, NULL); posix_spawn_file_actions_destroy(&fa); if (status == -1) { return makeerror(newch, n, "Cannot posix_spawn `%s', %s", compr[method].argv[0], strerror(errno)); } #else /* For processes with large mapped virtual sizes, vfork * may be _much_ faster (10-100 times) than fork. */ pid = vfork(); if (pid == -1) { return makeerror(newch, n, "Cannot vfork, %s", strerror(errno)); } if (pid == 0) { /* child */ /* Note: we are after vfork, do not modify memory * in a way which confuses parent. In particular, * do not modify fdp[i][j]. */ handledesc(NULL, fd, fdp); DPRINTF("Executing %s\n", compr[method].argv[0]); (void)execvp(compr[method].argv[0], args); dprintf(STDERR_FILENO, "exec `%s' failed, %s", compr[method].argv[0], strerror(errno)); _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */ } #endif /* parent */ /* Close write sides of child stdout/err pipes */ for (i = 1; i < __arraycount(fdp); i++) closefd(fdp[i], 1); /* Write the buffer data to child stdin, if we don't have fd */ if (fd == -1) { closefd(fdp[STDIN_FILENO], 0); writepid = writechild(fdp[STDIN_FILENO][1], old, *n); if (writepid == (pid_t)-1) { rv = makeerror(newch, n, "Write to child failed, %s", strerror(errno)); DPRINTF("Write to child failed\n"); goto err; } closefd(fdp[STDIN_FILENO], 1); } rv = OKDATA; r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0); DPRINTF("read got %zd\n", r); if (r < 0) { rv = ERRDATA; DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0], strerror(errno)); goto err; } if (CAST(size_t, r) == bytes_max) { /* * close fd so that the child exits with sigpipe and ignore * errors, otherwise we risk the child blocking and never * exiting. */ DPRINTF("Closing stdout for bytes_max\n"); closefd(fdp[STDOUT_FILENO], 0); goto ok; } if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) { DPRINTF("Got stuff from stderr %s\n", *newch); rv = ERRDATA; r = filter_error(*newch, r); goto ok; } if (re == 0) goto ok; rv = makeerror(newch, n, "Read stderr failed, %s", strerror(errno)); goto err; ok: *n = r; /* NUL terminate, as every buffer is handled here. */ (*newch)[*n] = '\0'; err: closefd(fdp[STDIN_FILENO], 1); closefd(fdp[STDOUT_FILENO], 0); closefd(fdp[STDERR_FILENO], 0); w = waitpid(pid, &status, 0); wait_err: if (w == -1) { rv = makeerror(newch, n, "Wait failed, %s", strerror(errno)); DPRINTF("Child wait return %#x\n", status); } else if (!WIFEXITED(status)) { DPRINTF("Child not exited (%#x)\n", status); } else if (WEXITSTATUS(status) != 0) { DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status)); } if (writepid > 0) { /* _After_ we know decompressor has exited, our input writer * definitely will exit now (at worst, writing fails in it, * since output fd is closed now on the reading size). */ w = waitpid(writepid, &status, 0); writepid = -1; goto wait_err; } closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here! DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv); return rv; } #endif