compress.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * compress routines:
  30. * zmagic() - returns 0 if not recognized, uncompresses and prints
  31. * information if recognized
  32. * uncompress(method, old, n, newch) - uncompress old into new,
  33. * using method, return sizeof new
  34. */
  35. #include "file.h"
  36. #ifndef lint
  37. FILE_RCSID("@(#)$File: compress.c,v 1.136 2022/09/13 16:08:34 christos Exp $")
  38. #endif
  39. #include "magic.h"
  40. #include <stdlib.h>
  41. #ifdef HAVE_UNISTD_H
  42. #include <unistd.h>
  43. #endif
  44. #ifdef HAVE_SPAWN_H
  45. #include <spawn.h>
  46. #endif
  47. #include <string.h>
  48. #include <errno.h>
  49. #include <ctype.h>
  50. #include <stdarg.h>
  51. #include <signal.h>
  52. #ifndef HAVE_SIG_T
  53. typedef void (*sig_t)(int);
  54. #endif /* HAVE_SIG_T */
  55. #ifdef HAVE_SYS_IOCTL_H
  56. #include <sys/ioctl.h>
  57. #endif
  58. #ifdef HAVE_SYS_WAIT_H
  59. #include <sys/wait.h>
  60. #endif
  61. #if defined(HAVE_SYS_TIME_H)
  62. #include <sys/time.h>
  63. #endif
  64. #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
  65. #define BUILTIN_DECOMPRESS
  66. #include <zlib.h>
  67. #endif
  68. #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
  69. #define BUILTIN_BZLIB
  70. #include <bzlib.h>
  71. #endif
  72. #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
  73. #define BUILTIN_XZLIB
  74. #include <lzma.h>
  75. #endif
  76. #ifdef DEBUG
  77. int tty = -1;
  78. #define DPRINTF(...) do { \
  79. if (tty == -1) \
  80. tty = open("/dev/tty", O_RDWR); \
  81. if (tty == -1) \
  82. abort(); \
  83. dprintf(tty, __VA_ARGS__); \
  84. } while (/*CONSTCOND*/0)
  85. #else
  86. #define DPRINTF(...)
  87. #endif
  88. #ifdef ZLIBSUPPORT
  89. /*
  90. * The following python code is not really used because ZLIBSUPPORT is only
  91. * defined if we have a built-in zlib, and the built-in zlib handles that.
  92. * That is not true for android where we have zlib.h and not -lz.
  93. */
  94. static const char zlibcode[] =
  95. "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
  96. static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
  97. static int
  98. zlibcmp(const unsigned char *buf)
  99. {
  100. unsigned short x = 1;
  101. unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
  102. if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
  103. return 0;
  104. if (s[0] != 1) /* endianness test */
  105. x = buf[0] | (buf[1] << 8);
  106. else
  107. x = buf[1] | (buf[0] << 8);
  108. if (x % 31)
  109. return 0;
  110. return 1;
  111. }
  112. #endif
  113. static int
  114. lzmacmp(const unsigned char *buf)
  115. {
  116. if (buf[0] != 0x5d || buf[1] || buf[2])
  117. return 0;
  118. if (buf[12] && buf[12] != 0xff)
  119. return 0;
  120. return 1;
  121. }
  122. #define gzip_flags "-cd"
  123. #define lrzip_flags "-do"
  124. #define lzip_flags gzip_flags
  125. static const char *gzip_args[] = {
  126. "gzip", gzip_flags, NULL
  127. };
  128. static const char *uncompress_args[] = {
  129. "uncompress", "-c", NULL
  130. };
  131. static const char *bzip2_args[] = {
  132. "bzip2", "-cd", NULL
  133. };
  134. static const char *lzip_args[] = {
  135. "lzip", lzip_flags, NULL
  136. };
  137. static const char *xz_args[] = {
  138. "xz", "-cd", NULL
  139. };
  140. static const char *lrzip_args[] = {
  141. "lrzip", lrzip_flags, NULL
  142. };
  143. static const char *lz4_args[] = {
  144. "lz4", "-cd", NULL
  145. };
  146. static const char *zstd_args[] = {
  147. "zstd", "-cd", NULL
  148. };
  149. #define do_zlib NULL
  150. #define do_bzlib NULL
  151. private const struct {
  152. union {
  153. const char *magic;
  154. int (*func)(const unsigned char *);
  155. } u;
  156. int maglen;
  157. const char **argv;
  158. void *unused;
  159. } compr[] = {
  160. #define METH_FROZEN 2
  161. #define METH_BZIP 7
  162. #define METH_XZ 9
  163. #define METH_LZMA 13
  164. #define METH_ZLIB 14
  165. { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
  166. /* Uncompress can get stuck; so use gzip first if we have it
  167. * Idea from Damien Clark, thanks! */
  168. { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
  169. { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
  170. { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
  171. { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
  172. /* the standard pack utilities do not accept standard input */
  173. { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
  174. { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
  175. /* ...only first file examined */
  176. { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
  177. { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
  178. { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
  179. { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
  180. { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
  181. { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
  182. { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
  183. #ifdef ZLIBSUPPORT
  184. { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
  185. #endif
  186. };
  187. #define OKDATA 0
  188. #define NODATA 1
  189. #define ERRDATA 2
  190. private ssize_t swrite(int, const void *, size_t);
  191. #if HAVE_FORK
  192. private size_t ncompr = __arraycount(compr);
  193. private int uncompressbuf(int, size_t, size_t, const unsigned char *,
  194. unsigned char **, size_t *);
  195. #ifdef BUILTIN_DECOMPRESS
  196. private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
  197. size_t *, int);
  198. private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
  199. size_t *);
  200. #endif
  201. #ifdef BUILTIN_BZLIB
  202. private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
  203. size_t *);
  204. #endif
  205. #ifdef BUILTIN_XZLIB
  206. private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
  207. size_t *);
  208. #endif
  209. static int makeerror(unsigned char **, size_t *, const char *, ...)
  210. __attribute__((__format__(__printf__, 3, 4)));
  211. private const char *methodname(size_t);
  212. private int
  213. format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
  214. {
  215. unsigned char *p;
  216. int mime = ms->flags & MAGIC_MIME;
  217. if (!mime)
  218. return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
  219. for (p = buf; *p; p++)
  220. if (!isalnum(*p))
  221. *p = '-';
  222. return file_printf(ms, "application/x-decompression-error-%s-%s",
  223. methodname(i), buf);
  224. }
  225. protected int
  226. file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
  227. {
  228. unsigned char *newbuf = NULL;
  229. size_t i, nsz;
  230. char *rbuf;
  231. file_pushbuf_t *pb;
  232. int urv, prv, rv = 0;
  233. int mime = ms->flags & MAGIC_MIME;
  234. int fd = b->fd;
  235. const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
  236. size_t nbytes = b->flen;
  237. int sa_saved = 0;
  238. struct sigaction sig_act;
  239. if ((ms->flags & MAGIC_COMPRESS) == 0)
  240. return 0;
  241. for (i = 0; i < ncompr; i++) {
  242. int zm;
  243. if (nbytes < CAST(size_t, abs(compr[i].maglen)))
  244. continue;
  245. if (compr[i].maglen < 0) {
  246. zm = (*compr[i].u.func)(buf);
  247. } else {
  248. zm = memcmp(buf, compr[i].u.magic,
  249. CAST(size_t, compr[i].maglen)) == 0;
  250. }
  251. if (!zm)
  252. continue;
  253. /* Prevent SIGPIPE death if child dies unexpectedly */
  254. if (!sa_saved) {
  255. //We can use sig_act for both new and old, but
  256. struct sigaction new_act;
  257. memset(&new_act, 0, sizeof(new_act));
  258. new_act.sa_handler = SIG_IGN;
  259. sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
  260. }
  261. nsz = nbytes;
  262. urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
  263. DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
  264. (char *)newbuf, nsz);
  265. switch (urv) {
  266. case OKDATA:
  267. case ERRDATA:
  268. ms->flags &= ~MAGIC_COMPRESS;
  269. if (urv == ERRDATA)
  270. prv = format_decompression_error(ms, i, newbuf);
  271. else
  272. prv = file_buffer(ms, -1, NULL, name, newbuf, nsz);
  273. if (prv == -1)
  274. goto error;
  275. rv = 1;
  276. if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
  277. goto out;
  278. if (mime != MAGIC_MIME && mime != 0)
  279. goto out;
  280. if ((file_printf(ms,
  281. mime ? " compressed-encoding=" : " (")) == -1)
  282. goto error;
  283. if ((pb = file_push_buffer(ms)) == NULL)
  284. goto error;
  285. /*
  286. * XXX: If file_buffer fails here, we overwrite
  287. * the compressed text. FIXME.
  288. */
  289. if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) {
  290. if (file_pop_buffer(ms, pb) != NULL)
  291. abort();
  292. goto error;
  293. }
  294. if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
  295. if (file_printf(ms, "%s", rbuf) == -1) {
  296. free(rbuf);
  297. goto error;
  298. }
  299. free(rbuf);
  300. }
  301. if (!mime && file_printf(ms, ")") == -1)
  302. goto error;
  303. /*FALLTHROUGH*/
  304. case NODATA:
  305. break;
  306. default:
  307. abort();
  308. /*NOTREACHED*/
  309. error:
  310. rv = -1;
  311. break;
  312. }
  313. }
  314. out:
  315. DPRINTF("rv = %d\n", rv);
  316. if (sa_saved && sig_act.sa_handler != SIG_IGN)
  317. (void)sigaction(SIGPIPE, &sig_act, NULL);
  318. free(newbuf);
  319. ms->flags |= MAGIC_COMPRESS;
  320. DPRINTF("Zmagic returns %d\n", rv);
  321. return rv;
  322. }
  323. #endif
  324. /*
  325. * `safe' write for sockets and pipes.
  326. */
  327. private ssize_t
  328. swrite(int fd, const void *buf, size_t n)
  329. {
  330. ssize_t rv;
  331. size_t rn = n;
  332. do
  333. switch (rv = write(fd, buf, n)) {
  334. case -1:
  335. if (errno == EINTR)
  336. continue;
  337. return -1;
  338. default:
  339. n -= rv;
  340. buf = CAST(const char *, buf) + rv;
  341. break;
  342. }
  343. while (n > 0);
  344. return rn;
  345. }
  346. /*
  347. * `safe' read for sockets and pipes.
  348. */
  349. protected ssize_t
  350. sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
  351. {
  352. ssize_t rv;
  353. #ifdef FIONREAD
  354. int t = 0;
  355. #endif
  356. size_t rn = n;
  357. if (fd == STDIN_FILENO)
  358. goto nocheck;
  359. #ifdef FIONREAD
  360. if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
  361. #ifdef FD_ZERO
  362. ssize_t cnt;
  363. for (cnt = 0;; cnt++) {
  364. fd_set check;
  365. struct timeval tout = {0, 100 * 1000};
  366. int selrv;
  367. FD_ZERO(&check);
  368. FD_SET(fd, &check);
  369. /*
  370. * Avoid soft deadlock: do not read if there
  371. * is nothing to read from sockets and pipes.
  372. */
  373. selrv = select(fd + 1, &check, NULL, NULL, &tout);
  374. if (selrv == -1) {
  375. if (errno == EINTR || errno == EAGAIN)
  376. continue;
  377. } else if (selrv == 0 && cnt >= 5) {
  378. return 0;
  379. } else
  380. break;
  381. }
  382. #endif
  383. (void)ioctl(fd, FIONREAD, &t);
  384. }
  385. if (t > 0 && CAST(size_t, t) < n) {
  386. n = t;
  387. rn = n;
  388. }
  389. #endif
  390. nocheck:
  391. do
  392. switch ((rv = read(fd, buf, n))) {
  393. case -1:
  394. if (errno == EINTR)
  395. continue;
  396. return -1;
  397. case 0:
  398. return rn - n;
  399. default:
  400. n -= rv;
  401. buf = CAST(char *, CCAST(void *, buf)) + rv;
  402. break;
  403. }
  404. while (n > 0);
  405. return rn;
  406. }
  407. protected int
  408. file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
  409. size_t nbytes)
  410. {
  411. char buf[4096];
  412. ssize_t r;
  413. int tfd;
  414. #ifdef WIN32
  415. const char *t;
  416. buf[0] = '\0';
  417. if ((t = getenv("TEMP")) != NULL)
  418. (void)strlcpy(buf, t, sizeof(buf));
  419. else if ((t = getenv("TMP")) != NULL)
  420. (void)strlcpy(buf, t, sizeof(buf));
  421. else if ((t = getenv("TMPDIR")) != NULL)
  422. (void)strlcpy(buf, t, sizeof(buf));
  423. if (buf[0] != '\0')
  424. (void)strlcat(buf, "/", sizeof(buf));
  425. (void)strlcat(buf, "file.XXXXXX", sizeof(buf));
  426. #else
  427. (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
  428. #endif
  429. #ifndef HAVE_MKSTEMP
  430. {
  431. char *ptr = mktemp(buf);
  432. tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
  433. r = errno;
  434. (void)unlink(ptr);
  435. errno = r;
  436. }
  437. #else
  438. {
  439. int te;
  440. mode_t ou = umask(0);
  441. tfd = mkstemp(buf);
  442. (void)umask(ou);
  443. te = errno;
  444. (void)unlink(buf);
  445. errno = te;
  446. }
  447. #endif
  448. if (tfd == -1) {
  449. file_error(ms, errno,
  450. "cannot create temporary file for pipe copy");
  451. return -1;
  452. }
  453. if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
  454. r = 1;
  455. else {
  456. while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
  457. if (swrite(tfd, buf, CAST(size_t, r)) != r)
  458. break;
  459. }
  460. switch (r) {
  461. case -1:
  462. file_error(ms, errno, "error copying from pipe to temp file");
  463. return -1;
  464. case 0:
  465. break;
  466. default:
  467. file_error(ms, errno, "error while writing to temp file");
  468. return -1;
  469. }
  470. /*
  471. * We duplicate the file descriptor, because fclose on a
  472. * tmpfile will delete the file, but any open descriptors
  473. * can still access the phantom inode.
  474. */
  475. if ((fd = dup2(tfd, fd)) == -1) {
  476. file_error(ms, errno, "could not dup descriptor for temp file");
  477. return -1;
  478. }
  479. (void)close(tfd);
  480. if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
  481. file_badseek(ms);
  482. return -1;
  483. }
  484. return fd;
  485. }
  486. #if HAVE_FORK
  487. #ifdef BUILTIN_DECOMPRESS
  488. #define FHCRC (1 << 1)
  489. #define FEXTRA (1 << 2)
  490. #define FNAME (1 << 3)
  491. #define FCOMMENT (1 << 4)
  492. private int
  493. uncompressgzipped(const unsigned char *old, unsigned char **newch,
  494. size_t bytes_max, size_t *n)
  495. {
  496. unsigned char flg = old[3];
  497. size_t data_start = 10;
  498. if (flg & FEXTRA) {
  499. if (data_start + 1 >= *n)
  500. goto err;
  501. data_start += 2 + old[data_start] + old[data_start + 1] * 256;
  502. }
  503. if (flg & FNAME) {
  504. while(data_start < *n && old[data_start])
  505. data_start++;
  506. data_start++;
  507. }
  508. if (flg & FCOMMENT) {
  509. while(data_start < *n && old[data_start])
  510. data_start++;
  511. data_start++;
  512. }
  513. if (flg & FHCRC)
  514. data_start += 2;
  515. if (data_start >= *n)
  516. goto err;
  517. *n -= data_start;
  518. old += data_start;
  519. return uncompresszlib(old, newch, bytes_max, n, 0);
  520. err:
  521. return makeerror(newch, n, "File too short");
  522. }
  523. private int
  524. uncompresszlib(const unsigned char *old, unsigned char **newch,
  525. size_t bytes_max, size_t *n, int zlib)
  526. {
  527. int rc;
  528. z_stream z;
  529. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  530. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  531. z.next_in = CCAST(Bytef *, old);
  532. z.avail_in = CAST(uint32_t, *n);
  533. z.next_out = *newch;
  534. z.avail_out = CAST(unsigned int, bytes_max);
  535. z.zalloc = Z_NULL;
  536. z.zfree = Z_NULL;
  537. z.opaque = Z_NULL;
  538. /* LINTED bug in header macro */
  539. rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
  540. if (rc != Z_OK)
  541. goto err;
  542. rc = inflate(&z, Z_SYNC_FLUSH);
  543. if (rc != Z_OK && rc != Z_STREAM_END)
  544. goto err;
  545. *n = CAST(size_t, z.total_out);
  546. rc = inflateEnd(&z);
  547. if (rc != Z_OK)
  548. goto err;
  549. /* let's keep the nul-terminate tradition */
  550. (*newch)[*n] = '\0';
  551. return OKDATA;
  552. err:
  553. strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
  554. *n = strlen(RCAST(char *, *newch));
  555. return ERRDATA;
  556. }
  557. #endif
  558. #ifdef BUILTIN_BZLIB
  559. private int
  560. uncompressbzlib(const unsigned char *old, unsigned char **newch,
  561. size_t bytes_max, size_t *n)
  562. {
  563. int rc;
  564. bz_stream bz;
  565. memset(&bz, 0, sizeof(bz));
  566. rc = BZ2_bzDecompressInit(&bz, 0, 0);
  567. if (rc != BZ_OK)
  568. goto err;
  569. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  570. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  571. bz.next_in = CCAST(char *, RCAST(const char *, old));
  572. bz.avail_in = CAST(uint32_t, *n);
  573. bz.next_out = RCAST(char *, *newch);
  574. bz.avail_out = CAST(unsigned int, bytes_max);
  575. rc = BZ2_bzDecompress(&bz);
  576. if (rc != BZ_OK && rc != BZ_STREAM_END)
  577. goto err;
  578. /* Assume byte_max is within 32bit */
  579. /* assert(bz.total_out_hi32 == 0); */
  580. *n = CAST(size_t, bz.total_out_lo32);
  581. rc = BZ2_bzDecompressEnd(&bz);
  582. if (rc != BZ_OK)
  583. goto err;
  584. /* let's keep the nul-terminate tradition */
  585. (*newch)[*n] = '\0';
  586. return OKDATA;
  587. err:
  588. snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
  589. *n = strlen(RCAST(char *, *newch));
  590. return ERRDATA;
  591. }
  592. #endif
  593. #ifdef BUILTIN_XZLIB
  594. private int
  595. uncompressxzlib(const unsigned char *old, unsigned char **newch,
  596. size_t bytes_max, size_t *n)
  597. {
  598. int rc;
  599. lzma_stream xz;
  600. memset(&xz, 0, sizeof(xz));
  601. rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
  602. if (rc != LZMA_OK)
  603. goto err;
  604. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  605. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  606. xz.next_in = CCAST(const uint8_t *, old);
  607. xz.avail_in = CAST(uint32_t, *n);
  608. xz.next_out = RCAST(uint8_t *, *newch);
  609. xz.avail_out = CAST(unsigned int, bytes_max);
  610. rc = lzma_code(&xz, LZMA_RUN);
  611. if (rc != LZMA_OK && rc != LZMA_STREAM_END)
  612. goto err;
  613. *n = CAST(size_t, xz.total_out);
  614. lzma_end(&xz);
  615. /* let's keep the nul-terminate tradition */
  616. (*newch)[*n] = '\0';
  617. return OKDATA;
  618. err:
  619. snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
  620. *n = strlen(RCAST(char *, *newch));
  621. return ERRDATA;
  622. }
  623. #endif
  624. static int
  625. makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
  626. {
  627. char *msg;
  628. va_list ap;
  629. int rv;
  630. va_start(ap, fmt);
  631. rv = vasprintf(&msg, fmt, ap);
  632. va_end(ap);
  633. if (rv < 0) {
  634. *buf = NULL;
  635. *len = 0;
  636. return NODATA;
  637. }
  638. *buf = RCAST(unsigned char *, msg);
  639. *len = strlen(msg);
  640. return ERRDATA;
  641. }
  642. static void
  643. closefd(int *fd, size_t i)
  644. {
  645. if (fd[i] == -1)
  646. return;
  647. (void) close(fd[i]);
  648. fd[i] = -1;
  649. }
  650. static void
  651. closep(int *fd)
  652. {
  653. size_t i;
  654. for (i = 0; i < 2; i++)
  655. closefd(fd, i);
  656. }
  657. static void
  658. movedesc(void *v, int i, int fd)
  659. {
  660. if (fd == i)
  661. return; /* "no dup was necessary" */
  662. #ifdef HAVE_POSIX_SPAWNP
  663. posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
  664. posix_spawn_file_actions_adddup2(fa, fd, i);
  665. posix_spawn_file_actions_addclose(fa, fd);
  666. #else
  667. if (dup2(fd, i) == -1) {
  668. DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
  669. exit(1);
  670. }
  671. close(v ? fd : fd);
  672. #endif
  673. }
  674. static void
  675. closedesc(void *v, int fd)
  676. {
  677. #ifdef HAVE_POSIX_SPAWNP
  678. posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
  679. posix_spawn_file_actions_addclose(fa, fd);
  680. #else
  681. close(v ? fd : fd);
  682. #endif
  683. }
  684. static void
  685. handledesc(void *v, int fd, int fdp[3][2])
  686. {
  687. if (fd != -1) {
  688. (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
  689. movedesc(v, STDIN_FILENO, fd);
  690. } else {
  691. movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
  692. if (fdp[STDIN_FILENO][1] > 2)
  693. closedesc(v, fdp[STDIN_FILENO][1]);
  694. }
  695. file_clear_closexec(STDIN_FILENO);
  696. ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
  697. movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
  698. if (fdp[STDOUT_FILENO][0] > 2)
  699. closedesc(v, fdp[STDOUT_FILENO][0]);
  700. file_clear_closexec(STDOUT_FILENO);
  701. movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
  702. if (fdp[STDERR_FILENO][0] > 2)
  703. closedesc(v, fdp[STDERR_FILENO][0]);
  704. file_clear_closexec(STDERR_FILENO);
  705. }
  706. static pid_t
  707. writechild(int fd, const void *old, size_t n)
  708. {
  709. pid_t pid;
  710. /*
  711. * fork again, to avoid blocking because both
  712. * pipes filled
  713. */
  714. pid = fork();
  715. if (pid == -1) {
  716. DPRINTF("Fork failed (%s)\n", strerror(errno));
  717. exit(1);
  718. }
  719. if (pid == 0) {
  720. /* child */
  721. if (swrite(fd, old, n) != CAST(ssize_t, n)) {
  722. DPRINTF("Write failed (%s)\n", strerror(errno));
  723. exit(1);
  724. }
  725. exit(0);
  726. }
  727. /* parent */
  728. return pid;
  729. }
  730. static ssize_t
  731. filter_error(unsigned char *ubuf, ssize_t n)
  732. {
  733. char *p;
  734. char *buf;
  735. ubuf[n] = '\0';
  736. buf = RCAST(char *, ubuf);
  737. while (isspace(CAST(unsigned char, *buf)))
  738. buf++;
  739. DPRINTF("Filter error[[[%s]]]\n", buf);
  740. if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
  741. *p = '\0';
  742. if ((p = strchr(CAST(char *, buf), ';')) != NULL)
  743. *p = '\0';
  744. if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
  745. ++p;
  746. while (isspace(CAST(unsigned char, *p)))
  747. p++;
  748. n = strlen(p);
  749. memmove(ubuf, p, CAST(size_t, n + 1));
  750. }
  751. DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
  752. if (islower(*ubuf))
  753. *ubuf = toupper(*ubuf);
  754. return n;
  755. }
  756. private const char *
  757. methodname(size_t method)
  758. {
  759. switch (method) {
  760. #ifdef BUILTIN_DECOMPRESS
  761. case METH_FROZEN:
  762. case METH_ZLIB:
  763. return "zlib";
  764. #endif
  765. #ifdef BUILTIN_BZLIB
  766. case METH_BZIP:
  767. return "bzlib";
  768. #endif
  769. #ifdef BUILTIN_XZLIB
  770. case METH_XZ:
  771. case METH_LZMA:
  772. return "xzlib";
  773. #endif
  774. default:
  775. return compr[method].argv[0];
  776. }
  777. }
  778. private int
  779. uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
  780. unsigned char **newch, size_t* n)
  781. {
  782. int fdp[3][2];
  783. int status, rv, w;
  784. pid_t pid;
  785. pid_t writepid = -1;
  786. size_t i;
  787. ssize_t r;
  788. char *const *args;
  789. #ifdef HAVE_POSIX_SPAWNP
  790. posix_spawn_file_actions_t fa;
  791. #endif
  792. switch (method) {
  793. #ifdef BUILTIN_DECOMPRESS
  794. case METH_FROZEN:
  795. return uncompressgzipped(old, newch, bytes_max, n);
  796. case METH_ZLIB:
  797. return uncompresszlib(old, newch, bytes_max, n, 1);
  798. #endif
  799. #ifdef BUILTIN_BZLIB
  800. case METH_BZIP:
  801. return uncompressbzlib(old, newch, bytes_max, n);
  802. #endif
  803. #ifdef BUILTIN_XZLIB
  804. case METH_XZ:
  805. case METH_LZMA:
  806. return uncompressxzlib(old, newch, bytes_max, n);
  807. #endif
  808. default:
  809. break;
  810. }
  811. (void)fflush(stdout);
  812. (void)fflush(stderr);
  813. for (i = 0; i < __arraycount(fdp); i++)
  814. fdp[i][0] = fdp[i][1] = -1;
  815. /*
  816. * There are multithreaded users who run magic_file()
  817. * from dozens of threads. If two parallel magic_file() calls
  818. * analyze two large compressed files, both will spawn
  819. * an uncompressing child here, which writes out uncompressed data.
  820. * We read some portion, then close the pipe, then waitpid() the child.
  821. * If uncompressed data is larger, child shound get EPIPE and exit.
  822. * However, with *parallel* calls OTHER child may unintentionally
  823. * inherit pipe fds, thus keeping pipe open and making writes in
  824. * our child block instead of failing with EPIPE!
  825. * (For the bug to occur, two threads must mutually inherit their pipes,
  826. * and both must have large outputs. Thus it happens not that often).
  827. * To avoid this, be sure to create pipes with O_CLOEXEC.
  828. */
  829. if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
  830. file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
  831. file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
  832. closep(fdp[STDIN_FILENO]);
  833. closep(fdp[STDOUT_FILENO]);
  834. return makeerror(newch, n, "Cannot create pipe, %s",
  835. strerror(errno));
  836. }
  837. args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
  838. #ifdef HAVE_POSIX_SPAWNP
  839. posix_spawn_file_actions_init(&fa);
  840. handledesc(&fa, fd, fdp);
  841. status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
  842. args, NULL);
  843. posix_spawn_file_actions_destroy(&fa);
  844. if (status == -1) {
  845. return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
  846. compr[method].argv[0], strerror(errno));
  847. }
  848. #else
  849. /* For processes with large mapped virtual sizes, vfork
  850. * may be _much_ faster (10-100 times) than fork.
  851. */
  852. pid = vfork();
  853. if (pid == -1) {
  854. return makeerror(newch, n, "Cannot vfork, %s",
  855. strerror(errno));
  856. }
  857. if (pid == 0) {
  858. /* child */
  859. /* Note: we are after vfork, do not modify memory
  860. * in a way which confuses parent. In particular,
  861. * do not modify fdp[i][j].
  862. */
  863. handledesc(NULL, fd, fdp);
  864. (void)execvp(compr[method].argv[0], args);
  865. dprintf(STDERR_FILENO, "exec `%s' failed, %s",
  866. compr[method].argv[0], strerror(errno));
  867. _exit(1); /* _exit(), not exit(), because of vfork */
  868. }
  869. #endif
  870. /* parent */
  871. /* Close write sides of child stdout/err pipes */
  872. for (i = 1; i < __arraycount(fdp); i++)
  873. closefd(fdp[i], 1);
  874. /* Write the buffer data to child stdin, if we don't have fd */
  875. if (fd == -1) {
  876. closefd(fdp[STDIN_FILENO], 0);
  877. writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
  878. closefd(fdp[STDIN_FILENO], 1);
  879. }
  880. *newch = CAST(unsigned char *, malloc(bytes_max + 1));
  881. if (*newch == NULL) {
  882. rv = makeerror(newch, n, "No buffer, %s",
  883. strerror(errno));
  884. goto err;
  885. }
  886. rv = OKDATA;
  887. errno = 0;
  888. r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
  889. if (r == 0 && errno == 0)
  890. goto ok;
  891. if (r <= 0) {
  892. DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
  893. r != -1 ? strerror(errno) : "no data");
  894. rv = ERRDATA;
  895. if (r == 0 &&
  896. (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
  897. {
  898. r = filter_error(*newch, r);
  899. goto ok;
  900. }
  901. free(*newch);
  902. if (r == 0)
  903. rv = makeerror(newch, n, "Read failed, %s",
  904. strerror(errno));
  905. else
  906. rv = makeerror(newch, n, "No data");
  907. goto err;
  908. }
  909. ok:
  910. *n = r;
  911. /* NUL terminate, as every buffer is handled here. */
  912. (*newch)[*n] = '\0';
  913. err:
  914. closefd(fdp[STDIN_FILENO], 1);
  915. closefd(fdp[STDOUT_FILENO], 0);
  916. closefd(fdp[STDERR_FILENO], 0);
  917. w = waitpid(pid, &status, 0);
  918. wait_err:
  919. if (w == -1) {
  920. free(*newch);
  921. rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
  922. DPRINTF("Child wait return %#x\n", status);
  923. } else if (!WIFEXITED(status)) {
  924. DPRINTF("Child not exited (%#x)\n", status);
  925. } else if (WEXITSTATUS(status) != 0) {
  926. DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
  927. }
  928. if (writepid > 0) {
  929. /* _After_ we know decompressor has exited, our input writer
  930. * definitely will exit now (at worst, writing fails in it,
  931. * since output fd is closed now on the reading size).
  932. */
  933. w = waitpid(writepid, &status, 0);
  934. writepid = -1;
  935. goto wait_err;
  936. }
  937. closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
  938. DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
  939. return rv;
  940. }
  941. #endif