compress.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215
  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * compress routines:
  30. * zmagic() - returns 0 if not recognized, uncompresses and prints
  31. * information if recognized
  32. * uncompress(method, old, n, newch) - uncompress old into new,
  33. * using method, return sizeof new
  34. */
  35. #include "file.h"
  36. #ifndef lint
  37. FILE_RCSID("@(#)$File: compress.c,v 1.154 2022/12/26 17:35:45 christos Exp $")
  38. #endif
  39. #include "magic.h"
  40. #include <stdlib.h>
  41. #ifdef HAVE_UNISTD_H
  42. #include <unistd.h>
  43. #endif
  44. #ifdef HAVE_SPAWN_H
  45. #include <spawn.h>
  46. #endif
  47. #include <string.h>
  48. #include <errno.h>
  49. #include <ctype.h>
  50. #include <stdarg.h>
  51. #include <signal.h>
  52. #ifndef HAVE_SIG_T
  53. typedef void (*sig_t)(int);
  54. #endif /* HAVE_SIG_T */
  55. #ifdef HAVE_SYS_IOCTL_H
  56. #include <sys/ioctl.h>
  57. #endif
  58. #ifdef HAVE_SYS_WAIT_H
  59. #include <sys/wait.h>
  60. #endif
  61. #if defined(HAVE_SYS_TIME_H)
  62. #include <sys/time.h>
  63. #endif
  64. #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
  65. #define BUILTIN_DECOMPRESS
  66. #include <zlib.h>
  67. #endif
  68. #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
  69. #define BUILTIN_BZLIB
  70. #include <bzlib.h>
  71. #endif
  72. #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
  73. #define BUILTIN_XZLIB
  74. #include <lzma.h>
  75. #endif
  76. #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
  77. #define BUILTIN_ZSTDLIB
  78. #include <zstd.h>
  79. #include <zstd_errors.h>
  80. #endif
  81. #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
  82. #define BUILTIN_LZLIB
  83. #include <lzlib.h>
  84. #endif
  85. #ifdef DEBUG
  86. int tty = -1;
  87. #define DPRINTF(...) do { \
  88. if (tty == -1) \
  89. tty = open("/dev/tty", O_RDWR); \
  90. if (tty == -1) \
  91. abort(); \
  92. dprintf(tty, __VA_ARGS__); \
  93. } while (/*CONSTCOND*/0)
  94. #else
  95. #define DPRINTF(...)
  96. #endif
  97. #ifdef ZLIBSUPPORT
  98. /*
  99. * The following python code is not really used because ZLIBSUPPORT is only
  100. * defined if we have a built-in zlib, and the built-in zlib handles that.
  101. * That is not true for android where we have zlib.h and not -lz.
  102. */
  103. static const char zlibcode[] =
  104. "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
  105. static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
  106. static int
  107. zlibcmp(const unsigned char *buf)
  108. {
  109. unsigned short x = 1;
  110. unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
  111. if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
  112. return 0;
  113. if (s[0] != 1) /* endianness test */
  114. x = buf[0] | (buf[1] << 8);
  115. else
  116. x = buf[1] | (buf[0] << 8);
  117. if (x % 31)
  118. return 0;
  119. return 1;
  120. }
  121. #endif
  122. static int
  123. lzmacmp(const unsigned char *buf)
  124. {
  125. if (buf[0] != 0x5d || buf[1] || buf[2])
  126. return 0;
  127. if (buf[12] && buf[12] != 0xff)
  128. return 0;
  129. return 1;
  130. }
  131. #define gzip_flags "-cd"
  132. #define lrzip_flags "-do"
  133. #define lzip_flags gzip_flags
  134. static const char *gzip_args[] = {
  135. "gzip", gzip_flags, NULL
  136. };
  137. static const char *uncompress_args[] = {
  138. "uncompress", "-c", NULL
  139. };
  140. static const char *bzip2_args[] = {
  141. "bzip2", "-cd", NULL
  142. };
  143. static const char *lzip_args[] = {
  144. "lzip", lzip_flags, NULL
  145. };
  146. static const char *xz_args[] = {
  147. "xz", "-cd", NULL
  148. };
  149. static const char *lrzip_args[] = {
  150. "lrzip", lrzip_flags, NULL
  151. };
  152. static const char *lz4_args[] = {
  153. "lz4", "-cd", NULL
  154. };
  155. static const char *zstd_args[] = {
  156. "zstd", "-cd", NULL
  157. };
  158. #define do_zlib NULL
  159. #define do_bzlib NULL
  160. file_private const struct {
  161. union {
  162. const char *magic;
  163. int (*func)(const unsigned char *);
  164. } u;
  165. int maglen;
  166. const char **argv;
  167. void *unused;
  168. } compr[] = {
  169. #define METH_FROZEN 2
  170. #define METH_BZIP 7
  171. #define METH_XZ 9
  172. #define METH_LZIP 8
  173. #define METH_ZSTD 12
  174. #define METH_LZMA 13
  175. #define METH_ZLIB 14
  176. { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
  177. /* Uncompress can get stuck; so use gzip first if we have it
  178. * Idea from Damien Clark, thanks! */
  179. { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
  180. { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
  181. { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
  182. { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
  183. /* the standard pack utilities do not accept standard input */
  184. { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
  185. { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
  186. /* ...only first file examined */
  187. { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
  188. { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
  189. { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
  190. { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
  191. { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
  192. { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
  193. { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
  194. #ifdef ZLIBSUPPORT
  195. { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
  196. #endif
  197. };
  198. #define OKDATA 0
  199. #define NODATA 1
  200. #define ERRDATA 2
  201. file_private ssize_t swrite(int, const void *, size_t);
  202. #if HAVE_FORK
  203. file_private size_t ncompr = __arraycount(compr);
  204. file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
  205. unsigned char **, size_t *);
  206. #ifdef BUILTIN_DECOMPRESS
  207. file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
  208. size_t *, int);
  209. file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
  210. size_t *, int);
  211. #endif
  212. #ifdef BUILTIN_BZLIB
  213. file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
  214. size_t *, int);
  215. #endif
  216. #ifdef BUILTIN_XZLIB
  217. file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
  218. size_t *, int);
  219. #endif
  220. #ifdef BUILTIN_ZSTDLIB
  221. file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
  222. size_t *, int);
  223. #endif
  224. #ifdef BUILTIN_LZLIB
  225. file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
  226. size_t *, int);
  227. #endif
  228. static int makeerror(unsigned char **, size_t *, const char *, ...)
  229. __attribute__((__format__(__printf__, 3, 4)));
  230. file_private const char *methodname(size_t);
  231. file_private int
  232. format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
  233. {
  234. unsigned char *p;
  235. int mime = ms->flags & MAGIC_MIME;
  236. if (!mime)
  237. return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
  238. for (p = buf; *p; p++)
  239. if (!isalnum(*p))
  240. *p = '-';
  241. return file_printf(ms, "application/x-decompression-error-%s-%s",
  242. methodname(i), buf);
  243. }
  244. file_protected int
  245. file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
  246. {
  247. unsigned char *newbuf = NULL;
  248. size_t i, nsz;
  249. char *rbuf;
  250. file_pushbuf_t *pb;
  251. int urv, prv, rv = 0;
  252. int mime = ms->flags & MAGIC_MIME;
  253. int fd = b->fd;
  254. const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
  255. size_t nbytes = b->flen;
  256. int sa_saved = 0;
  257. struct sigaction sig_act;
  258. if ((ms->flags & MAGIC_COMPRESS) == 0)
  259. return 0;
  260. for (i = 0; i < ncompr; i++) {
  261. int zm;
  262. if (nbytes < CAST(size_t, abs(compr[i].maglen)))
  263. continue;
  264. if (compr[i].maglen < 0) {
  265. zm = (*compr[i].u.func)(buf);
  266. } else {
  267. zm = memcmp(buf, compr[i].u.magic,
  268. CAST(size_t, compr[i].maglen)) == 0;
  269. }
  270. if (!zm)
  271. continue;
  272. /* Prevent SIGPIPE death if child dies unexpectedly */
  273. if (!sa_saved) {
  274. //We can use sig_act for both new and old, but
  275. struct sigaction new_act;
  276. memset(&new_act, 0, sizeof(new_act));
  277. new_act.sa_handler = SIG_IGN;
  278. sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
  279. }
  280. nsz = nbytes;
  281. free(newbuf);
  282. urv = uncompressbuf(fd, ms->bytes_max, i,
  283. (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
  284. DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
  285. (char *)newbuf, nsz);
  286. switch (urv) {
  287. case OKDATA:
  288. case ERRDATA:
  289. ms->flags &= ~MAGIC_COMPRESS;
  290. if (urv == ERRDATA)
  291. prv = format_decompression_error(ms, i, newbuf);
  292. else
  293. prv = file_buffer(ms, -1, NULL, name, newbuf,
  294. nsz);
  295. if (prv == -1)
  296. goto error;
  297. rv = 1;
  298. if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
  299. goto out;
  300. if (mime != MAGIC_MIME && mime != 0)
  301. goto out;
  302. if ((file_printf(ms,
  303. mime ? " compressed-encoding=" : " (")) == -1)
  304. goto error;
  305. if ((pb = file_push_buffer(ms)) == NULL)
  306. goto error;
  307. /*
  308. * XXX: If file_buffer fails here, we overwrite
  309. * the compressed text. FIXME.
  310. */
  311. if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
  312. {
  313. if (file_pop_buffer(ms, pb) != NULL)
  314. abort();
  315. goto error;
  316. }
  317. if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
  318. if (file_printf(ms, "%s", rbuf) == -1) {
  319. free(rbuf);
  320. goto error;
  321. }
  322. free(rbuf);
  323. }
  324. if (!mime && file_printf(ms, ")") == -1)
  325. goto error;
  326. /*FALLTHROUGH*/
  327. case NODATA:
  328. break;
  329. default:
  330. abort();
  331. /*NOTREACHED*/
  332. error:
  333. rv = -1;
  334. break;
  335. }
  336. }
  337. out:
  338. DPRINTF("rv = %d\n", rv);
  339. if (sa_saved && sig_act.sa_handler != SIG_IGN)
  340. (void)sigaction(SIGPIPE, &sig_act, NULL);
  341. free(newbuf);
  342. ms->flags |= MAGIC_COMPRESS;
  343. DPRINTF("Zmagic returns %d\n", rv);
  344. return rv;
  345. }
  346. #endif
  347. /*
  348. * `safe' write for sockets and pipes.
  349. */
  350. file_private ssize_t
  351. swrite(int fd, const void *buf, size_t n)
  352. {
  353. ssize_t rv;
  354. size_t rn = n;
  355. do
  356. switch (rv = write(fd, buf, n)) {
  357. case -1:
  358. if (errno == EINTR)
  359. continue;
  360. return -1;
  361. default:
  362. n -= rv;
  363. buf = CAST(const char *, buf) + rv;
  364. break;
  365. }
  366. while (n > 0);
  367. return rn;
  368. }
  369. /*
  370. * `safe' read for sockets and pipes.
  371. */
  372. file_protected ssize_t
  373. sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
  374. {
  375. ssize_t rv;
  376. #ifdef FIONREAD
  377. int t = 0;
  378. #endif
  379. size_t rn = n;
  380. if (fd == STDIN_FILENO)
  381. goto nocheck;
  382. #ifdef FIONREAD
  383. if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
  384. #ifdef FD_ZERO
  385. ssize_t cnt;
  386. for (cnt = 0;; cnt++) {
  387. fd_set check;
  388. struct timeval tout = {0, 100 * 1000};
  389. int selrv;
  390. FD_ZERO(&check);
  391. FD_SET(fd, &check);
  392. /*
  393. * Avoid soft deadlock: do not read if there
  394. * is nothing to read from sockets and pipes.
  395. */
  396. selrv = select(fd + 1, &check, NULL, NULL, &tout);
  397. if (selrv == -1) {
  398. if (errno == EINTR || errno == EAGAIN)
  399. continue;
  400. } else if (selrv == 0 && cnt >= 5) {
  401. return 0;
  402. } else
  403. break;
  404. }
  405. #endif
  406. (void)ioctl(fd, FIONREAD, &t);
  407. }
  408. if (t > 0 && CAST(size_t, t) < n) {
  409. n = t;
  410. rn = n;
  411. }
  412. #endif
  413. nocheck:
  414. do
  415. switch ((rv = read(fd, buf, n))) {
  416. case -1:
  417. if (errno == EINTR)
  418. continue;
  419. return -1;
  420. case 0:
  421. return rn - n;
  422. default:
  423. n -= rv;
  424. buf = CAST(char *, CCAST(void *, buf)) + rv;
  425. break;
  426. }
  427. while (n > 0);
  428. return rn;
  429. }
  430. file_protected int
  431. file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
  432. size_t nbytes)
  433. {
  434. char buf[4096];
  435. ssize_t r;
  436. int tfd;
  437. #ifdef WIN32
  438. const char *t;
  439. buf[0] = '\0';
  440. if ((t = getenv("TEMP")) != NULL)
  441. (void)strlcpy(buf, t, sizeof(buf));
  442. else if ((t = getenv("TMP")) != NULL)
  443. (void)strlcpy(buf, t, sizeof(buf));
  444. else if ((t = getenv("TMPDIR")) != NULL)
  445. (void)strlcpy(buf, t, sizeof(buf));
  446. if (buf[0] != '\0')
  447. (void)strlcat(buf, "/", sizeof(buf));
  448. (void)strlcat(buf, "file.XXXXXX", sizeof(buf));
  449. #else
  450. (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
  451. #endif
  452. #ifndef HAVE_MKSTEMP
  453. {
  454. char *ptr = mktemp(buf);
  455. tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
  456. r = errno;
  457. (void)unlink(ptr);
  458. errno = r;
  459. }
  460. #else
  461. {
  462. int te;
  463. mode_t ou = umask(0);
  464. tfd = mkstemp(buf);
  465. (void)umask(ou);
  466. te = errno;
  467. (void)unlink(buf);
  468. errno = te;
  469. }
  470. #endif
  471. if (tfd == -1) {
  472. file_error(ms, errno,
  473. "cannot create temporary file for pipe copy");
  474. return -1;
  475. }
  476. if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
  477. r = 1;
  478. else {
  479. while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
  480. if (swrite(tfd, buf, CAST(size_t, r)) != r)
  481. break;
  482. }
  483. switch (r) {
  484. case -1:
  485. file_error(ms, errno, "error copying from pipe to temp file");
  486. return -1;
  487. case 0:
  488. break;
  489. default:
  490. file_error(ms, errno, "error while writing to temp file");
  491. return -1;
  492. }
  493. /*
  494. * We duplicate the file descriptor, because fclose on a
  495. * tmpfile will delete the file, but any open descriptors
  496. * can still access the phantom inode.
  497. */
  498. if ((fd = dup2(tfd, fd)) == -1) {
  499. file_error(ms, errno, "could not dup descriptor for temp file");
  500. return -1;
  501. }
  502. (void)close(tfd);
  503. if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
  504. file_badseek(ms);
  505. return -1;
  506. }
  507. return fd;
  508. }
  509. #if HAVE_FORK
  510. #ifdef BUILTIN_DECOMPRESS
  511. #define FHCRC (1 << 1)
  512. #define FEXTRA (1 << 2)
  513. #define FNAME (1 << 3)
  514. #define FCOMMENT (1 << 4)
  515. file_private int
  516. uncompressgzipped(const unsigned char *old, unsigned char **newch,
  517. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  518. {
  519. unsigned char flg;
  520. size_t data_start = 10;
  521. if (*n < 4) {
  522. goto err;
  523. }
  524. flg = old[3];
  525. if (flg & FEXTRA) {
  526. if (data_start + 1 >= *n)
  527. goto err;
  528. data_start += 2 + old[data_start] + old[data_start + 1] * 256;
  529. }
  530. if (flg & FNAME) {
  531. while(data_start < *n && old[data_start])
  532. data_start++;
  533. data_start++;
  534. }
  535. if (flg & FCOMMENT) {
  536. while(data_start < *n && old[data_start])
  537. data_start++;
  538. data_start++;
  539. }
  540. if (flg & FHCRC)
  541. data_start += 2;
  542. if (data_start >= *n)
  543. goto err;
  544. *n -= data_start;
  545. old += data_start;
  546. return uncompresszlib(old, newch, bytes_max, n, 0);
  547. err:
  548. return makeerror(newch, n, "File too short");
  549. }
  550. file_private int
  551. uncompresszlib(const unsigned char *old, unsigned char **newch,
  552. size_t bytes_max, size_t *n, int zlib)
  553. {
  554. int rc;
  555. z_stream z;
  556. z.next_in = CCAST(Bytef *, old);
  557. z.avail_in = CAST(uint32_t, *n);
  558. z.next_out = *newch;
  559. z.avail_out = CAST(unsigned int, bytes_max);
  560. z.zalloc = Z_NULL;
  561. z.zfree = Z_NULL;
  562. z.opaque = Z_NULL;
  563. /* LINTED bug in header macro */
  564. rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
  565. if (rc != Z_OK)
  566. goto err;
  567. rc = inflate(&z, Z_SYNC_FLUSH);
  568. if (rc != Z_OK && rc != Z_STREAM_END) {
  569. inflateEnd(&z);
  570. goto err;
  571. }
  572. *n = CAST(size_t, z.total_out);
  573. rc = inflateEnd(&z);
  574. if (rc != Z_OK)
  575. goto err;
  576. /* let's keep the nul-terminate tradition */
  577. (*newch)[*n] = '\0';
  578. return OKDATA;
  579. err:
  580. return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
  581. }
  582. #endif
  583. #ifdef BUILTIN_BZLIB
  584. file_private int
  585. uncompressbzlib(const unsigned char *old, unsigned char **newch,
  586. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  587. {
  588. int rc;
  589. bz_stream bz;
  590. memset(&bz, 0, sizeof(bz));
  591. rc = BZ2_bzDecompressInit(&bz, 0, 0);
  592. if (rc != BZ_OK)
  593. goto err;
  594. bz.next_in = CCAST(char *, RCAST(const char *, old));
  595. bz.avail_in = CAST(uint32_t, *n);
  596. bz.next_out = RCAST(char *, *newch);
  597. bz.avail_out = CAST(unsigned int, bytes_max);
  598. rc = BZ2_bzDecompress(&bz);
  599. if (rc != BZ_OK && rc != BZ_STREAM_END) {
  600. BZ2_bzDecompressEnd(&bz);
  601. goto err;
  602. }
  603. /* Assume byte_max is within 32bit */
  604. /* assert(bz.total_out_hi32 == 0); */
  605. *n = CAST(size_t, bz.total_out_lo32);
  606. rc = BZ2_bzDecompressEnd(&bz);
  607. if (rc != BZ_OK)
  608. goto err;
  609. /* let's keep the nul-terminate tradition */
  610. (*newch)[*n] = '\0';
  611. return OKDATA;
  612. err:
  613. return makeerror(newch, n, "bunzip error %d", rc);
  614. }
  615. #endif
  616. #ifdef BUILTIN_XZLIB
  617. file_private int
  618. uncompressxzlib(const unsigned char *old, unsigned char **newch,
  619. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  620. {
  621. int rc;
  622. lzma_stream xz;
  623. memset(&xz, 0, sizeof(xz));
  624. rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
  625. if (rc != LZMA_OK)
  626. goto err;
  627. xz.next_in = CCAST(const uint8_t *, old);
  628. xz.avail_in = CAST(uint32_t, *n);
  629. xz.next_out = RCAST(uint8_t *, *newch);
  630. xz.avail_out = CAST(unsigned int, bytes_max);
  631. rc = lzma_code(&xz, LZMA_RUN);
  632. if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
  633. lzma_end(&xz);
  634. goto err;
  635. }
  636. *n = CAST(size_t, xz.total_out);
  637. lzma_end(&xz);
  638. /* let's keep the nul-terminate tradition */
  639. (*newch)[*n] = '\0';
  640. return OKDATA;
  641. err:
  642. return makeerror(newch, n, "unxz error %d", rc);
  643. }
  644. #endif
  645. #ifdef BUILTIN_ZSTDLIB
  646. file_private int
  647. uncompresszstd(const unsigned char *old, unsigned char **newch,
  648. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  649. {
  650. size_t rc;
  651. ZSTD_DStream *zstd;
  652. ZSTD_inBuffer in;
  653. ZSTD_outBuffer out;
  654. if ((zstd = ZSTD_createDStream()) == NULL) {
  655. return makeerror(newch, n, "No ZSTD decompression stream, %s",
  656. strerror(errno));
  657. }
  658. rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
  659. if (ZSTD_isError(rc))
  660. goto err;
  661. in.src = CCAST(const void *, old);
  662. in.size = *n;
  663. in.pos = 0;
  664. out.dst = RCAST(void *, *newch);
  665. out.size = bytes_max;
  666. out.pos = 0;
  667. rc = ZSTD_decompressStream(zstd, &out, &in);
  668. if (ZSTD_isError(rc))
  669. goto err;
  670. *n = out.pos;
  671. ZSTD_freeDStream(zstd);
  672. /* let's keep the nul-terminate tradition */
  673. (*newch)[*n] = '\0';
  674. return OKDATA;
  675. err:
  676. ZSTD_freeDStream(zstd);
  677. return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
  678. }
  679. #endif
  680. #ifdef BUILTIN_LZLIB
  681. file_private int
  682. uncompresslzlib(const unsigned char *old, unsigned char **newch,
  683. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  684. {
  685. enum LZ_Errno err;
  686. size_t old_remaining = *n;
  687. size_t new_remaining = bytes_max;
  688. size_t total_read = 0;
  689. unsigned char *bufp;
  690. struct LZ_Decoder *dec;
  691. bufp = *newch;
  692. dec = LZ_decompress_open();
  693. if (!dec) {
  694. return makeerror(newch, n, "unable to allocate LZ_Decoder");
  695. }
  696. if (LZ_decompress_errno(dec) != LZ_ok)
  697. goto err;
  698. for (;;) {
  699. // LZ_decompress_read() stops at member boundaries, so we may
  700. // have more than one successful read after writing all data
  701. // we have.
  702. if (old_remaining > 0) {
  703. int wr = LZ_decompress_write(dec, old, old_remaining);
  704. if (wr < 0)
  705. goto err;
  706. old_remaining -= wr;
  707. old += wr;
  708. }
  709. int rd = LZ_decompress_read(dec, bufp, new_remaining);
  710. if (rd > 0) {
  711. new_remaining -= rd;
  712. bufp += rd;
  713. total_read += rd;
  714. }
  715. if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
  716. goto err;
  717. if (new_remaining == 0)
  718. break;
  719. if (old_remaining == 0 && rd == 0)
  720. break;
  721. }
  722. LZ_decompress_close(dec);
  723. *n = total_read;
  724. /* let's keep the nul-terminate tradition */
  725. *bufp = '\0';
  726. return OKDATA;
  727. err:
  728. err = LZ_decompress_errno(dec);
  729. LZ_decompress_close(dec);
  730. return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
  731. }
  732. #endif
  733. static int
  734. makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
  735. {
  736. char *msg;
  737. va_list ap;
  738. int rv;
  739. free(*buf);
  740. va_start(ap, fmt);
  741. rv = vasprintf(&msg, fmt, ap);
  742. va_end(ap);
  743. if (rv < 0) {
  744. *buf = NULL;
  745. *len = 0;
  746. return NODATA;
  747. }
  748. *buf = RCAST(unsigned char *, msg);
  749. *len = strlen(msg);
  750. return ERRDATA;
  751. }
  752. static void
  753. closefd(int *fd, size_t i)
  754. {
  755. if (fd[i] == -1)
  756. return;
  757. (void) close(fd[i]);
  758. fd[i] = -1;
  759. }
  760. static void
  761. closep(int *fd)
  762. {
  763. size_t i;
  764. for (i = 0; i < 2; i++)
  765. closefd(fd, i);
  766. }
  767. static void
  768. movedesc(void *v, int i, int fd)
  769. {
  770. if (fd == i)
  771. return; /* "no dup was necessary" */
  772. #ifdef HAVE_POSIX_SPAWNP
  773. posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
  774. posix_spawn_file_actions_adddup2(fa, fd, i);
  775. posix_spawn_file_actions_addclose(fa, fd);
  776. #else
  777. if (dup2(fd, i) == -1) {
  778. DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
  779. exit(EXIT_FAILURE);
  780. }
  781. close(v ? fd : fd);
  782. #endif
  783. }
  784. static void
  785. closedesc(void *v, int fd)
  786. {
  787. #ifdef HAVE_POSIX_SPAWNP
  788. posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
  789. posix_spawn_file_actions_addclose(fa, fd);
  790. #else
  791. close(v ? fd : fd);
  792. #endif
  793. }
  794. static void
  795. handledesc(void *v, int fd, int fdp[3][2])
  796. {
  797. if (fd != -1) {
  798. (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
  799. movedesc(v, STDIN_FILENO, fd);
  800. } else {
  801. movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
  802. if (fdp[STDIN_FILENO][1] > 2)
  803. closedesc(v, fdp[STDIN_FILENO][1]);
  804. }
  805. file_clear_closexec(STDIN_FILENO);
  806. ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
  807. movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
  808. if (fdp[STDOUT_FILENO][0] > 2)
  809. closedesc(v, fdp[STDOUT_FILENO][0]);
  810. file_clear_closexec(STDOUT_FILENO);
  811. movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
  812. if (fdp[STDERR_FILENO][0] > 2)
  813. closedesc(v, fdp[STDERR_FILENO][0]);
  814. file_clear_closexec(STDERR_FILENO);
  815. }
  816. static pid_t
  817. writechild(int fd, const void *old, size_t n)
  818. {
  819. pid_t pid;
  820. /*
  821. * fork again, to avoid blocking because both
  822. * pipes filled
  823. */
  824. pid = fork();
  825. if (pid == -1) {
  826. DPRINTF("Fork failed (%s)\n", strerror(errno));
  827. return -1;
  828. }
  829. if (pid == 0) {
  830. /* child */
  831. if (swrite(fd, old, n) != CAST(ssize_t, n)) {
  832. DPRINTF("Write failed (%s)\n", strerror(errno));
  833. exit(EXIT_FAILURE);
  834. }
  835. exit(EXIT_SUCCESS);
  836. }
  837. /* parent */
  838. return pid;
  839. }
  840. static ssize_t
  841. filter_error(unsigned char *ubuf, ssize_t n)
  842. {
  843. char *p;
  844. char *buf;
  845. ubuf[n] = '\0';
  846. buf = RCAST(char *, ubuf);
  847. while (isspace(CAST(unsigned char, *buf)))
  848. buf++;
  849. DPRINTF("Filter error[[[%s]]]\n", buf);
  850. if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
  851. *p = '\0';
  852. if ((p = strchr(CAST(char *, buf), ';')) != NULL)
  853. *p = '\0';
  854. if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
  855. ++p;
  856. while (isspace(CAST(unsigned char, *p)))
  857. p++;
  858. n = strlen(p);
  859. memmove(ubuf, p, CAST(size_t, n + 1));
  860. }
  861. DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
  862. if (islower(*ubuf))
  863. *ubuf = toupper(*ubuf);
  864. return n;
  865. }
  866. file_private const char *
  867. methodname(size_t method)
  868. {
  869. switch (method) {
  870. #ifdef BUILTIN_DECOMPRESS
  871. case METH_FROZEN:
  872. case METH_ZLIB:
  873. return "zlib";
  874. #endif
  875. #ifdef BUILTIN_BZLIB
  876. case METH_BZIP:
  877. return "bzlib";
  878. #endif
  879. #ifdef BUILTIN_XZLIB
  880. case METH_XZ:
  881. case METH_LZMA:
  882. return "xzlib";
  883. #endif
  884. #ifdef BUILTIN_ZSTDLIB
  885. case METH_ZSTD:
  886. return "zstd";
  887. #endif
  888. #ifdef BUILTIN_LZLIB
  889. case METH_LZIP:
  890. return "lzlib";
  891. #endif
  892. default:
  893. return compr[method].argv[0];
  894. }
  895. }
  896. file_private int (*
  897. getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
  898. size_t *, int)
  899. {
  900. switch (method) {
  901. #ifdef BUILTIN_DECOMPRESS
  902. case METH_FROZEN:
  903. return uncompressgzipped;
  904. case METH_ZLIB:
  905. return uncompresszlib;
  906. #endif
  907. #ifdef BUILTIN_BZLIB
  908. case METH_BZIP:
  909. return uncompressbzlib;
  910. #endif
  911. #ifdef BUILTIN_XZLIB
  912. case METH_XZ:
  913. case METH_LZMA:
  914. return uncompressxzlib;
  915. #endif
  916. #ifdef BUILTIN_ZSTDLIB
  917. case METH_ZSTD:
  918. return uncompresszstd;
  919. #endif
  920. #ifdef BUILTIN_LZLIB
  921. case METH_LZIP:
  922. return uncompresslzlib;
  923. #endif
  924. default:
  925. return NULL;
  926. }
  927. }
  928. file_private int
  929. uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
  930. const unsigned char *old, unsigned char **newch, size_t* n)
  931. {
  932. int fdp[3][2];
  933. int status, rv, w;
  934. pid_t pid;
  935. pid_t writepid = -1;
  936. size_t i;
  937. ssize_t r;
  938. char *const *args;
  939. #ifdef HAVE_POSIX_SPAWNP
  940. posix_spawn_file_actions_t fa;
  941. #endif
  942. int (*decompress)(const unsigned char *, unsigned char **,
  943. size_t, size_t *, int) = getdecompressor(method);
  944. *newch = CAST(unsigned char *, malloc(bytes_max + 1));
  945. if (*newch == NULL)
  946. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  947. if (decompress) {
  948. if (nofork) {
  949. return makeerror(newch, n,
  950. "Fork is required to uncompress, but disabled");
  951. }
  952. return (*decompress)(old, newch, bytes_max, n, 1);
  953. }
  954. (void)fflush(stdout);
  955. (void)fflush(stderr);
  956. for (i = 0; i < __arraycount(fdp); i++)
  957. fdp[i][0] = fdp[i][1] = -1;
  958. /*
  959. * There are multithreaded users who run magic_file()
  960. * from dozens of threads. If two parallel magic_file() calls
  961. * analyze two large compressed files, both will spawn
  962. * an uncompressing child here, which writes out uncompressed data.
  963. * We read some portion, then close the pipe, then waitpid() the child.
  964. * If uncompressed data is larger, child should get EPIPE and exit.
  965. * However, with *parallel* calls OTHER child may unintentionally
  966. * inherit pipe fds, thus keeping pipe open and making writes in
  967. * our child block instead of failing with EPIPE!
  968. * (For the bug to occur, two threads must mutually inherit their pipes,
  969. * and both must have large outputs. Thus it happens not that often).
  970. * To avoid this, be sure to create pipes with O_CLOEXEC.
  971. */
  972. if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
  973. file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
  974. file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
  975. closep(fdp[STDIN_FILENO]);
  976. closep(fdp[STDOUT_FILENO]);
  977. return makeerror(newch, n, "Cannot create pipe, %s",
  978. strerror(errno));
  979. }
  980. args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
  981. #ifdef HAVE_POSIX_SPAWNP
  982. posix_spawn_file_actions_init(&fa);
  983. handledesc(&fa, fd, fdp);
  984. status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
  985. args, NULL);
  986. posix_spawn_file_actions_destroy(&fa);
  987. if (status == -1) {
  988. return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
  989. compr[method].argv[0], strerror(errno));
  990. }
  991. #else
  992. /* For processes with large mapped virtual sizes, vfork
  993. * may be _much_ faster (10-100 times) than fork.
  994. */
  995. pid = vfork();
  996. if (pid == -1) {
  997. return makeerror(newch, n, "Cannot vfork, %s",
  998. strerror(errno));
  999. }
  1000. if (pid == 0) {
  1001. /* child */
  1002. /* Note: we are after vfork, do not modify memory
  1003. * in a way which confuses parent. In particular,
  1004. * do not modify fdp[i][j].
  1005. */
  1006. handledesc(NULL, fd, fdp);
  1007. (void)execvp(compr[method].argv[0], args);
  1008. dprintf(STDERR_FILENO, "exec `%s' failed, %s",
  1009. compr[method].argv[0], strerror(errno));
  1010. _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
  1011. }
  1012. #endif
  1013. /* parent */
  1014. /* Close write sides of child stdout/err pipes */
  1015. for (i = 1; i < __arraycount(fdp); i++)
  1016. closefd(fdp[i], 1);
  1017. /* Write the buffer data to child stdin, if we don't have fd */
  1018. if (fd == -1) {
  1019. closefd(fdp[STDIN_FILENO], 0);
  1020. writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
  1021. if (writepid == (pid_t)-1) {
  1022. rv = makeerror(newch, n, "Write to child failed, %s",
  1023. strerror(errno));
  1024. goto err;
  1025. }
  1026. closefd(fdp[STDIN_FILENO], 1);
  1027. }
  1028. rv = OKDATA;
  1029. r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
  1030. if (r < 0) {
  1031. rv = ERRDATA;
  1032. DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
  1033. strerror(errno));
  1034. goto err;
  1035. }
  1036. if (CAST(size_t, r) == bytes_max) {
  1037. /*
  1038. * close fd so that the child exits with sigpipe and ignore
  1039. * errors, otherwise we risk the child blocking and never
  1040. * exiting.
  1041. */
  1042. closefd(fdp[STDOUT_FILENO], 0);
  1043. goto ok;
  1044. }
  1045. if ((r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
  1046. rv = ERRDATA;
  1047. r = filter_error(*newch, r);
  1048. goto ok;
  1049. }
  1050. if (r == 0)
  1051. goto ok;
  1052. rv = makeerror(newch, n, "Read stderr failed, %s",
  1053. strerror(errno));
  1054. goto err;
  1055. ok:
  1056. *n = r;
  1057. /* NUL terminate, as every buffer is handled here. */
  1058. (*newch)[*n] = '\0';
  1059. err:
  1060. closefd(fdp[STDIN_FILENO], 1);
  1061. closefd(fdp[STDOUT_FILENO], 0);
  1062. closefd(fdp[STDERR_FILENO], 0);
  1063. w = waitpid(pid, &status, 0);
  1064. wait_err:
  1065. if (w == -1) {
  1066. rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
  1067. DPRINTF("Child wait return %#x\n", status);
  1068. } else if (!WIFEXITED(status)) {
  1069. DPRINTF("Child not exited (%#x)\n", status);
  1070. } else if (WEXITSTATUS(status) != 0) {
  1071. DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
  1072. }
  1073. if (writepid > 0) {
  1074. /* _After_ we know decompressor has exited, our input writer
  1075. * definitely will exit now (at worst, writing fails in it,
  1076. * since output fd is closed now on the reading size).
  1077. */
  1078. w = waitpid(writepid, &status, 0);
  1079. writepid = -1;
  1080. goto wait_err;
  1081. }
  1082. closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
  1083. DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
  1084. return rv;
  1085. }
  1086. #endif