123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227 |
- /*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- /*
- * compress routines:
- * zmagic() - returns 0 if not recognized, uncompresses and prints
- * information if recognized
- * uncompress(method, old, n, newch) - uncompress old into new,
- * using method, return sizeof new
- */
- #include "file.h"
- #ifndef lint
- FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
- #endif
- #include "magic.h"
- #include <stdlib.h>
- #ifdef HAVE_UNISTD_H
- #include <unistd.h>
- #endif
- #ifdef HAVE_SPAWN_H
- #include <spawn.h>
- #endif
- #include <string.h>
- #include <errno.h>
- #include <ctype.h>
- #include <stdarg.h>
- #include <signal.h>
- #ifndef HAVE_SIG_T
- typedef void (*sig_t)(int);
- #endif /* HAVE_SIG_T */
- #ifdef HAVE_SYS_IOCTL_H
- #include <sys/ioctl.h>
- #endif
- #ifdef HAVE_SYS_WAIT_H
- #include <sys/wait.h>
- #endif
- #if defined(HAVE_SYS_TIME_H)
- #include <sys/time.h>
- #endif
- #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
- #define BUILTIN_DECOMPRESS
- #include <zlib.h>
- #endif
- #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
- #define BUILTIN_BZLIB
- #include <bzlib.h>
- #endif
- #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
- #define BUILTIN_XZLIB
- #include <lzma.h>
- #endif
- #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
- #define BUILTIN_ZSTDLIB
- #include <zstd.h>
- #include <zstd_errors.h>
- #endif
- #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
- #define BUILTIN_LZLIB
- #include <lzlib.h>
- #endif
- #ifdef DEBUG
- int tty = -1;
- #define DPRINTF(...) do { \
- if (tty == -1) \
- tty = open("/dev/tty", O_RDWR); \
- if (tty == -1) \
- abort(); \
- dprintf(tty, __VA_ARGS__); \
- } while (/*CONSTCOND*/0)
- #else
- #define DPRINTF(...)
- #endif
- #ifdef ZLIBSUPPORT
- /*
- * The following python code is not really used because ZLIBSUPPORT is only
- * defined if we have a built-in zlib, and the built-in zlib handles that.
- * That is not true for android where we have zlib.h and not -lz.
- */
- static const char zlibcode[] =
- "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
- static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
- static int
- zlibcmp(const unsigned char *buf)
- {
- unsigned short x = 1;
- unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
- if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
- return 0;
- if (s[0] != 1) /* endianness test */
- x = buf[0] | (buf[1] << 8);
- else
- x = buf[1] | (buf[0] << 8);
- if (x % 31)
- return 0;
- return 1;
- }
- #endif
- static int
- lzmacmp(const unsigned char *buf)
- {
- if (buf[0] != 0x5d || buf[1] || buf[2])
- return 0;
- if (buf[12] && buf[12] != 0xff)
- return 0;
- return 1;
- }
- #define gzip_flags "-cd"
- #define lzip_flags gzip_flags
- static const char *gzip_args[] = {
- "gzip", gzip_flags, NULL
- };
- static const char *uncompress_args[] = {
- "uncompress", "-c", NULL
- };
- static const char *bzip2_args[] = {
- "bzip2", "-cd", NULL
- };
- static const char *lzip_args[] = {
- "lzip", lzip_flags, NULL
- };
- static const char *xz_args[] = {
- "xz", "-cd", NULL
- };
- static const char *lrzip_args[] = {
- "lrzip", "-qdf", "-", NULL
- };
- static const char *lz4_args[] = {
- "lz4", "-cd", NULL
- };
- static const char *zstd_args[] = {
- "zstd", "-cd", NULL
- };
- #define do_zlib NULL
- #define do_bzlib NULL
- file_private const struct {
- union {
- const char *magic;
- int (*func)(const unsigned char *);
- } u;
- int maglen;
- const char **argv;
- void *unused;
- } compr[] = {
- #define METH_FROZEN 2
- #define METH_BZIP 7
- #define METH_XZ 9
- #define METH_LZIP 8
- #define METH_ZSTD 12
- #define METH_LZMA 13
- #define METH_ZLIB 14
- { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
- /* Uncompress can get stuck; so use gzip first if we have it
- * Idea from Damien Clark, thanks! */
- { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
- { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
- { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
- { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
- /* the standard pack utilities do not accept standard input */
- { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
- { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
- /* ...only first file examined */
- { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
- { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
- { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
- { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
- { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
- { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
- { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
- #ifdef ZLIBSUPPORT
- { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
- #endif
- };
- #define OKDATA 0
- #define NODATA 1
- #define ERRDATA 2
- file_private ssize_t swrite(int, const void *, size_t);
- #if HAVE_FORK
- file_private size_t ncompr = __arraycount(compr);
- file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
- unsigned char **, size_t *);
- #ifdef BUILTIN_DECOMPRESS
- file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
- size_t *, int);
- file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
- size_t *, int);
- #endif
- #ifdef BUILTIN_BZLIB
- file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
- size_t *, int);
- #endif
- #ifdef BUILTIN_XZLIB
- file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
- size_t *, int);
- #endif
- #ifdef BUILTIN_ZSTDLIB
- file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
- size_t *, int);
- #endif
- #ifdef BUILTIN_LZLIB
- file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
- size_t *, int);
- #endif
- static int makeerror(unsigned char **, size_t *, const char *, ...)
- __attribute__((__format__(__printf__, 3, 4)));
- file_private const char *methodname(size_t);
- file_private int
- format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
- {
- unsigned char *p;
- int mime = ms->flags & MAGIC_MIME;
- if (!mime)
- return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
- for (p = buf; *p; p++)
- if (!isalnum(*p))
- *p = '-';
- return file_printf(ms, "application/x-decompression-error-%s-%s",
- methodname(i), buf);
- }
- file_protected int
- file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
- {
- unsigned char *newbuf = NULL;
- size_t i, nsz;
- char *rbuf;
- file_pushbuf_t *pb;
- int urv, prv, rv = 0;
- int mime = ms->flags & MAGIC_MIME;
- int fd = b->fd;
- const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
- size_t nbytes = b->flen;
- int sa_saved = 0;
- struct sigaction sig_act;
- if ((ms->flags & MAGIC_COMPRESS) == 0)
- return 0;
- for (i = 0; i < ncompr; i++) {
- int zm;
- if (nbytes < CAST(size_t, abs(compr[i].maglen)))
- continue;
- if (compr[i].maglen < 0) {
- zm = (*compr[i].u.func)(buf);
- } else {
- zm = memcmp(buf, compr[i].u.magic,
- CAST(size_t, compr[i].maglen)) == 0;
- }
- if (!zm)
- continue;
- /* Prevent SIGPIPE death if child dies unexpectedly */
- if (!sa_saved) {
- //We can use sig_act for both new and old, but
- struct sigaction new_act;
- memset(&new_act, 0, sizeof(new_act));
- new_act.sa_handler = SIG_IGN;
- sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
- }
- nsz = nbytes;
- free(newbuf);
- urv = uncompressbuf(fd, ms->bytes_max, i,
- (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
- DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
- (char *)newbuf, nsz);
- switch (urv) {
- case OKDATA:
- case ERRDATA:
- ms->flags &= ~MAGIC_COMPRESS;
- if (urv == ERRDATA)
- prv = format_decompression_error(ms, i, newbuf);
- else
- prv = file_buffer(ms, -1, NULL, name, newbuf,
- nsz);
- if (prv == -1)
- goto error;
- rv = 1;
- if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
- goto out;
- if (mime != MAGIC_MIME && mime != 0)
- goto out;
- if ((file_printf(ms,
- mime ? " compressed-encoding=" : " (")) == -1)
- goto error;
- if ((pb = file_push_buffer(ms)) == NULL)
- goto error;
- /*
- * XXX: If file_buffer fails here, we overwrite
- * the compressed text. FIXME.
- */
- if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
- {
- if (file_pop_buffer(ms, pb) != NULL)
- abort();
- goto error;
- }
- if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
- if (file_printf(ms, "%s", rbuf) == -1) {
- free(rbuf);
- goto error;
- }
- free(rbuf);
- }
- if (!mime && file_printf(ms, ")") == -1)
- goto error;
- /*FALLTHROUGH*/
- case NODATA:
- break;
- default:
- abort();
- /*NOTREACHED*/
- error:
- rv = -1;
- break;
- }
- }
- out:
- DPRINTF("rv = %d\n", rv);
- if (sa_saved && sig_act.sa_handler != SIG_IGN)
- (void)sigaction(SIGPIPE, &sig_act, NULL);
- free(newbuf);
- ms->flags |= MAGIC_COMPRESS;
- DPRINTF("Zmagic returns %d\n", rv);
- return rv;
- }
- #endif
- /*
- * `safe' write for sockets and pipes.
- */
- file_private ssize_t
- swrite(int fd, const void *buf, size_t n)
- {
- ssize_t rv;
- size_t rn = n;
- do
- switch (rv = write(fd, buf, n)) {
- case -1:
- if (errno == EINTR)
- continue;
- return -1;
- default:
- n -= rv;
- buf = CAST(const char *, buf) + rv;
- break;
- }
- while (n > 0);
- return rn;
- }
- /*
- * `safe' read for sockets and pipes.
- */
- file_protected ssize_t
- sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
- {
- ssize_t rv;
- #if defined(FIONREAD) && !defined(__MINGW32__)
- int t = 0;
- #endif
- size_t rn = n;
- if (fd == STDIN_FILENO)
- goto nocheck;
- #if defined(FIONREAD) && !defined(__MINGW32__)
- if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
- #ifdef FD_ZERO
- ssize_t cnt;
- for (cnt = 0;; cnt++) {
- fd_set check;
- struct timeval tout = {0, 100 * 1000};
- int selrv;
- FD_ZERO(&check);
- FD_SET(fd, &check);
- /*
- * Avoid soft deadlock: do not read if there
- * is nothing to read from sockets and pipes.
- */
- selrv = select(fd + 1, &check, NULL, NULL, &tout);
- if (selrv == -1) {
- if (errno == EINTR || errno == EAGAIN)
- continue;
- } else if (selrv == 0 && cnt >= 5) {
- return 0;
- } else
- break;
- }
- #endif
- (void)ioctl(fd, FIONREAD, &t);
- }
- if (t > 0 && CAST(size_t, t) < n) {
- n = t;
- rn = n;
- }
- #endif
- nocheck:
- do
- switch ((rv = read(fd, buf, n))) {
- case -1:
- if (errno == EINTR)
- continue;
- return -1;
- case 0:
- return rn - n;
- default:
- n -= rv;
- buf = CAST(char *, CCAST(void *, buf)) + rv;
- break;
- }
- while (n > 0);
- return rn;
- }
- file_protected int
- file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
- size_t nbytes)
- {
- char buf[4096];
- ssize_t r;
- int tfd;
- #ifdef WIN32
- const char *t;
- buf[0] = '\0';
- if ((t = getenv("TEMP")) != NULL)
- (void)strlcpy(buf, t, sizeof(buf));
- else if ((t = getenv("TMP")) != NULL)
- (void)strlcpy(buf, t, sizeof(buf));
- else if ((t = getenv("TMPDIR")) != NULL)
- (void)strlcpy(buf, t, sizeof(buf));
- if (buf[0] != '\0')
- (void)strlcat(buf, "/", sizeof(buf));
- (void)strlcat(buf, "file.XXXXXX", sizeof(buf));
- #else
- (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
- #endif
- #ifndef HAVE_MKSTEMP
- {
- char *ptr = mktemp(buf);
- tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
- r = errno;
- (void)unlink(ptr);
- errno = r;
- }
- #else
- {
- int te;
- mode_t ou = umask(0);
- tfd = mkstemp(buf);
- (void)umask(ou);
- te = errno;
- (void)unlink(buf);
- errno = te;
- }
- #endif
- if (tfd == -1) {
- file_error(ms, errno,
- "cannot create temporary file for pipe copy");
- return -1;
- }
- if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
- r = 1;
- else {
- while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
- if (swrite(tfd, buf, CAST(size_t, r)) != r)
- break;
- }
- switch (r) {
- case -1:
- file_error(ms, errno, "error copying from pipe to temp file");
- return -1;
- case 0:
- break;
- default:
- file_error(ms, errno, "error while writing to temp file");
- return -1;
- }
- /*
- * We duplicate the file descriptor, because fclose on a
- * tmpfile will delete the file, but any open descriptors
- * can still access the phantom inode.
- */
- if ((fd = dup2(tfd, fd)) == -1) {
- file_error(ms, errno, "could not dup descriptor for temp file");
- return -1;
- }
- (void)close(tfd);
- if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
- file_badseek(ms);
- return -1;
- }
- return fd;
- }
- #if HAVE_FORK
- #ifdef BUILTIN_DECOMPRESS
- #define FHCRC (1 << 1)
- #define FEXTRA (1 << 2)
- #define FNAME (1 << 3)
- #define FCOMMENT (1 << 4)
- file_private int
- uncompressgzipped(const unsigned char *old, unsigned char **newch,
- size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
- {
- unsigned char flg;
- size_t data_start = 10;
- if (*n < 4) {
- goto err;
- }
- flg = old[3];
- if (flg & FEXTRA) {
- if (data_start + 1 >= *n)
- goto err;
- data_start += 2 + old[data_start] + old[data_start + 1] * 256;
- }
- if (flg & FNAME) {
- while(data_start < *n && old[data_start])
- data_start++;
- data_start++;
- }
- if (flg & FCOMMENT) {
- while(data_start < *n && old[data_start])
- data_start++;
- data_start++;
- }
- if (flg & FHCRC)
- data_start += 2;
- if (data_start >= *n)
- goto err;
- *n -= data_start;
- old += data_start;
- return uncompresszlib(old, newch, bytes_max, n, 0);
- err:
- return makeerror(newch, n, "File too short");
- }
- file_private int
- uncompresszlib(const unsigned char *old, unsigned char **newch,
- size_t bytes_max, size_t *n, int zlib)
- {
- int rc;
- z_stream z;
- DPRINTF("builtin zlib decompression\n");
- z.next_in = CCAST(Bytef *, old);
- z.avail_in = CAST(uint32_t, *n);
- z.next_out = *newch;
- z.avail_out = CAST(unsigned int, bytes_max);
- z.zalloc = Z_NULL;
- z.zfree = Z_NULL;
- z.opaque = Z_NULL;
- /* LINTED bug in header macro */
- rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
- if (rc != Z_OK)
- goto err;
- rc = inflate(&z, Z_SYNC_FLUSH);
- if (rc != Z_OK && rc != Z_STREAM_END) {
- inflateEnd(&z);
- goto err;
- }
- *n = CAST(size_t, z.total_out);
- rc = inflateEnd(&z);
- if (rc != Z_OK)
- goto err;
- /* let's keep the nul-terminate tradition */
- (*newch)[*n] = '\0';
- return OKDATA;
- err:
- return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
- }
- #endif
- #ifdef BUILTIN_BZLIB
- file_private int
- uncompressbzlib(const unsigned char *old, unsigned char **newch,
- size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
- {
- int rc;
- bz_stream bz;
- DPRINTF("builtin bzlib decompression\n");
- memset(&bz, 0, sizeof(bz));
- rc = BZ2_bzDecompressInit(&bz, 0, 0);
- if (rc != BZ_OK)
- goto err;
- bz.next_in = CCAST(char *, RCAST(const char *, old));
- bz.avail_in = CAST(uint32_t, *n);
- bz.next_out = RCAST(char *, *newch);
- bz.avail_out = CAST(unsigned int, bytes_max);
- rc = BZ2_bzDecompress(&bz);
- if (rc != BZ_OK && rc != BZ_STREAM_END) {
- BZ2_bzDecompressEnd(&bz);
- goto err;
- }
- /* Assume byte_max is within 32bit */
- /* assert(bz.total_out_hi32 == 0); */
- *n = CAST(size_t, bz.total_out_lo32);
- rc = BZ2_bzDecompressEnd(&bz);
- if (rc != BZ_OK)
- goto err;
- /* let's keep the nul-terminate tradition */
- (*newch)[*n] = '\0';
- return OKDATA;
- err:
- return makeerror(newch, n, "bunzip error %d", rc);
- }
- #endif
- #ifdef BUILTIN_XZLIB
- file_private int
- uncompressxzlib(const unsigned char *old, unsigned char **newch,
- size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
- {
- int rc;
- lzma_stream xz;
- DPRINTF("builtin xzlib decompression\n");
- memset(&xz, 0, sizeof(xz));
- rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
- if (rc != LZMA_OK)
- goto err;
- xz.next_in = CCAST(const uint8_t *, old);
- xz.avail_in = CAST(uint32_t, *n);
- xz.next_out = RCAST(uint8_t *, *newch);
- xz.avail_out = CAST(unsigned int, bytes_max);
- rc = lzma_code(&xz, LZMA_RUN);
- if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
- lzma_end(&xz);
- goto err;
- }
- *n = CAST(size_t, xz.total_out);
- lzma_end(&xz);
- /* let's keep the nul-terminate tradition */
- (*newch)[*n] = '\0';
- return OKDATA;
- err:
- return makeerror(newch, n, "unxz error %d", rc);
- }
- #endif
- #ifdef BUILTIN_ZSTDLIB
- file_private int
- uncompresszstd(const unsigned char *old, unsigned char **newch,
- size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
- {
- size_t rc;
- ZSTD_DStream *zstd;
- ZSTD_inBuffer in;
- ZSTD_outBuffer out;
- DPRINTF("builtin zstd decompression\n");
- if ((zstd = ZSTD_createDStream()) == NULL) {
- return makeerror(newch, n, "No ZSTD decompression stream, %s",
- strerror(errno));
- }
- rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
- if (ZSTD_isError(rc))
- goto err;
- in.src = CCAST(const void *, old);
- in.size = *n;
- in.pos = 0;
- out.dst = RCAST(void *, *newch);
- out.size = bytes_max;
- out.pos = 0;
- rc = ZSTD_decompressStream(zstd, &out, &in);
- if (ZSTD_isError(rc))
- goto err;
- *n = out.pos;
- ZSTD_freeDStream(zstd);
- /* let's keep the nul-terminate tradition */
- (*newch)[*n] = '\0';
- return OKDATA;
- err:
- ZSTD_freeDStream(zstd);
- return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
- }
- #endif
- #ifdef BUILTIN_LZLIB
- file_private int
- uncompresslzlib(const unsigned char *old, unsigned char **newch,
- size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
- {
- enum LZ_Errno err;
- size_t old_remaining = *n;
- size_t new_remaining = bytes_max;
- size_t total_read = 0;
- unsigned char *bufp;
- struct LZ_Decoder *dec;
- bufp = *newch;
- DPRINTF("builtin lzlib decompression\n");
- dec = LZ_decompress_open();
- if (!dec) {
- return makeerror(newch, n, "unable to allocate LZ_Decoder");
- }
- if (LZ_decompress_errno(dec) != LZ_ok)
- goto err;
- for (;;) {
- // LZ_decompress_read() stops at member boundaries, so we may
- // have more than one successful read after writing all data
- // we have.
- if (old_remaining > 0) {
- int wr = LZ_decompress_write(dec, old, old_remaining);
- if (wr < 0)
- goto err;
- old_remaining -= wr;
- old += wr;
- }
- int rd = LZ_decompress_read(dec, bufp, new_remaining);
- if (rd > 0) {
- new_remaining -= rd;
- bufp += rd;
- total_read += rd;
- }
- if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
- goto err;
- if (new_remaining == 0)
- break;
- if (old_remaining == 0 && rd == 0)
- break;
- }
- LZ_decompress_close(dec);
- *n = total_read;
- /* let's keep the nul-terminate tradition */
- *bufp = '\0';
- return OKDATA;
- err:
- err = LZ_decompress_errno(dec);
- LZ_decompress_close(dec);
- return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
- }
- #endif
- static int
- makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
- {
- char *msg;
- va_list ap;
- int rv;
- DPRINTF("Makeerror %s\n", fmt);
- free(*buf);
- va_start(ap, fmt);
- rv = vasprintf(&msg, fmt, ap);
- va_end(ap);
- if (rv < 0) {
- DPRINTF("Makeerror failed");
- *buf = NULL;
- *len = 0;
- return NODATA;
- }
- *buf = RCAST(unsigned char *, msg);
- *len = strlen(msg);
- return ERRDATA;
- }
- static void
- closefd(int *fd, size_t i)
- {
- if (fd[i] == -1)
- return;
- (void) close(fd[i]);
- fd[i] = -1;
- }
- static void
- closep(int *fd)
- {
- size_t i;
- for (i = 0; i < 2; i++)
- closefd(fd, i);
- }
- static void
- movedesc(void *v, int i, int fd)
- {
- if (fd == i)
- return; /* "no dup was necessary" */
- #ifdef HAVE_POSIX_SPAWNP
- posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
- posix_spawn_file_actions_adddup2(fa, fd, i);
- posix_spawn_file_actions_addclose(fa, fd);
- #else
- if (dup2(fd, i) == -1) {
- DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
- exit(EXIT_FAILURE);
- }
- close(v ? fd : fd);
- #endif
- }
- static void
- closedesc(void *v, int fd)
- {
- #ifdef HAVE_POSIX_SPAWNP
- posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
- posix_spawn_file_actions_addclose(fa, fd);
- #else
- close(v ? fd : fd);
- #endif
- }
- static void
- handledesc(void *v, int fd, int fdp[3][2])
- {
- if (fd != -1) {
- (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
- movedesc(v, STDIN_FILENO, fd);
- } else {
- movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
- if (fdp[STDIN_FILENO][1] > 2)
- closedesc(v, fdp[STDIN_FILENO][1]);
- }
- file_clear_closexec(STDIN_FILENO);
- ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
- movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
- if (fdp[STDOUT_FILENO][0] > 2)
- closedesc(v, fdp[STDOUT_FILENO][0]);
- file_clear_closexec(STDOUT_FILENO);
- movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
- if (fdp[STDERR_FILENO][0] > 2)
- closedesc(v, fdp[STDERR_FILENO][0]);
- file_clear_closexec(STDERR_FILENO);
- }
- static pid_t
- writechild(int fd, const void *old, size_t n)
- {
- pid_t pid;
- /*
- * fork again, to avoid blocking because both
- * pipes filled
- */
- pid = fork();
- if (pid == -1) {
- DPRINTF("Fork failed (%s)\n", strerror(errno));
- return -1;
- }
- if (pid == 0) {
- /* child */
- if (swrite(fd, old, n) != CAST(ssize_t, n)) {
- DPRINTF("Write failed (%s)\n", strerror(errno));
- exit(EXIT_FAILURE);
- }
- exit(EXIT_SUCCESS);
- }
- /* parent */
- return pid;
- }
- static ssize_t
- filter_error(unsigned char *ubuf, ssize_t n)
- {
- char *p;
- char *buf;
- ubuf[n] = '\0';
- buf = RCAST(char *, ubuf);
- while (isspace(CAST(unsigned char, *buf)))
- buf++;
- DPRINTF("Filter error[[[%s]]]\n", buf);
- if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
- *p = '\0';
- if ((p = strchr(CAST(char *, buf), ';')) != NULL)
- *p = '\0';
- if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
- ++p;
- while (isspace(CAST(unsigned char, *p)))
- p++;
- n = strlen(p);
- memmove(ubuf, p, CAST(size_t, n + 1));
- }
- DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
- if (islower(*ubuf))
- *ubuf = toupper(*ubuf);
- return n;
- }
- file_private const char *
- methodname(size_t method)
- {
- switch (method) {
- #ifdef BUILTIN_DECOMPRESS
- case METH_FROZEN:
- case METH_ZLIB:
- return "zlib";
- #endif
- #ifdef BUILTIN_BZLIB
- case METH_BZIP:
- return "bzlib";
- #endif
- #ifdef BUILTIN_XZLIB
- case METH_XZ:
- case METH_LZMA:
- return "xzlib";
- #endif
- #ifdef BUILTIN_ZSTDLIB
- case METH_ZSTD:
- return "zstd";
- #endif
- #ifdef BUILTIN_LZLIB
- case METH_LZIP:
- return "lzlib";
- #endif
- default:
- return compr[method].argv[0];
- }
- }
- file_private int (*
- getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
- size_t *, int)
- {
- switch (method) {
- #ifdef BUILTIN_DECOMPRESS
- case METH_FROZEN:
- return uncompressgzipped;
- case METH_ZLIB:
- return uncompresszlib;
- #endif
- #ifdef BUILTIN_BZLIB
- case METH_BZIP:
- return uncompressbzlib;
- #endif
- #ifdef BUILTIN_XZLIB
- case METH_XZ:
- case METH_LZMA:
- return uncompressxzlib;
- #endif
- #ifdef BUILTIN_ZSTDLIB
- case METH_ZSTD:
- return uncompresszstd;
- #endif
- #ifdef BUILTIN_LZLIB
- case METH_LZIP:
- return uncompresslzlib;
- #endif
- default:
- return NULL;
- }
- }
- file_private int
- uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
- const unsigned char *old, unsigned char **newch, size_t* n)
- {
- int fdp[3][2];
- int status, rv, w;
- pid_t pid;
- pid_t writepid = -1;
- size_t i;
- ssize_t r, re;
- char *const *args;
- #ifdef HAVE_POSIX_SPAWNP
- posix_spawn_file_actions_t fa;
- #endif
- int (*decompress)(const unsigned char *, unsigned char **,
- size_t, size_t *, int) = getdecompressor(method);
- *newch = CAST(unsigned char *, malloc(bytes_max + 1));
- if (*newch == NULL)
- return makeerror(newch, n, "No buffer, %s", strerror(errno));
- if (decompress) {
- if (nofork) {
- return makeerror(newch, n,
- "Fork is required to uncompress, but disabled");
- }
- return (*decompress)(old, newch, bytes_max, n, 1);
- }
- (void)fflush(stdout);
- (void)fflush(stderr);
- for (i = 0; i < __arraycount(fdp); i++)
- fdp[i][0] = fdp[i][1] = -1;
- /*
- * There are multithreaded users who run magic_file()
- * from dozens of threads. If two parallel magic_file() calls
- * analyze two large compressed files, both will spawn
- * an uncompressing child here, which writes out uncompressed data.
- * We read some portion, then close the pipe, then waitpid() the child.
- * If uncompressed data is larger, child should get EPIPE and exit.
- * However, with *parallel* calls OTHER child may unintentionally
- * inherit pipe fds, thus keeping pipe open and making writes in
- * our child block instead of failing with EPIPE!
- * (For the bug to occur, two threads must mutually inherit their pipes,
- * and both must have large outputs. Thus it happens not that often).
- * To avoid this, be sure to create pipes with O_CLOEXEC.
- */
- if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
- file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
- file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
- closep(fdp[STDIN_FILENO]);
- closep(fdp[STDOUT_FILENO]);
- return makeerror(newch, n, "Cannot create pipe, %s",
- strerror(errno));
- }
- args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
- #ifdef HAVE_POSIX_SPAWNP
- posix_spawn_file_actions_init(&fa);
- handledesc(&fa, fd, fdp);
- DPRINTF("Executing %s\n", compr[method].argv[0]);
- status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
- args, NULL);
- posix_spawn_file_actions_destroy(&fa);
- if (status == -1) {
- return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
- compr[method].argv[0], strerror(errno));
- }
- #else
- /* For processes with large mapped virtual sizes, vfork
- * may be _much_ faster (10-100 times) than fork.
- */
- pid = vfork();
- if (pid == -1) {
- return makeerror(newch, n, "Cannot vfork, %s",
- strerror(errno));
- }
- if (pid == 0) {
- /* child */
- /* Note: we are after vfork, do not modify memory
- * in a way which confuses parent. In particular,
- * do not modify fdp[i][j].
- */
- handledesc(NULL, fd, fdp);
- DPRINTF("Executing %s\n", compr[method].argv[0]);
- (void)execvp(compr[method].argv[0], args);
- dprintf(STDERR_FILENO, "exec `%s' failed, %s",
- compr[method].argv[0], strerror(errno));
- _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
- }
- #endif
- /* parent */
- /* Close write sides of child stdout/err pipes */
- for (i = 1; i < __arraycount(fdp); i++)
- closefd(fdp[i], 1);
- /* Write the buffer data to child stdin, if we don't have fd */
- if (fd == -1) {
- closefd(fdp[STDIN_FILENO], 0);
- writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
- if (writepid == (pid_t)-1) {
- rv = makeerror(newch, n, "Write to child failed, %s",
- strerror(errno));
- DPRINTF("Write to child failed\n");
- goto err;
- }
- closefd(fdp[STDIN_FILENO], 1);
- }
- rv = OKDATA;
- r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
- DPRINTF("read got %zd\n", r);
- if (r < 0) {
- rv = ERRDATA;
- DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
- strerror(errno));
- goto err;
- }
- if (CAST(size_t, r) == bytes_max) {
- /*
- * close fd so that the child exits with sigpipe and ignore
- * errors, otherwise we risk the child blocking and never
- * exiting.
- */
- DPRINTF("Closing stdout for bytes_max\n");
- closefd(fdp[STDOUT_FILENO], 0);
- goto ok;
- }
- if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
- DPRINTF("Got stuff from stderr %s\n", *newch);
- rv = ERRDATA;
- r = filter_error(*newch, r);
- goto ok;
- }
- if (re == 0)
- goto ok;
- rv = makeerror(newch, n, "Read stderr failed, %s",
- strerror(errno));
- goto err;
- ok:
- *n = r;
- /* NUL terminate, as every buffer is handled here. */
- (*newch)[*n] = '\0';
- err:
- closefd(fdp[STDIN_FILENO], 1);
- closefd(fdp[STDOUT_FILENO], 0);
- closefd(fdp[STDERR_FILENO], 0);
- w = waitpid(pid, &status, 0);
- wait_err:
- if (w == -1) {
- rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
- DPRINTF("Child wait return %#x\n", status);
- } else if (!WIFEXITED(status)) {
- DPRINTF("Child not exited (%#x)\n", status);
- } else if (WEXITSTATUS(status) != 0) {
- DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
- }
- if (writepid > 0) {
- /* _After_ we know decompressor has exited, our input writer
- * definitely will exit now (at worst, writing fails in it,
- * since output fd is closed now on the reading size).
- */
- w = waitpid(writepid, &status, 0);
- writepid = -1;
- goto wait_err;
- }
- closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
- DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
- return rv;
- }
- #endif
|