compress.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227
  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * compress routines:
  30. * zmagic() - returns 0 if not recognized, uncompresses and prints
  31. * information if recognized
  32. * uncompress(method, old, n, newch) - uncompress old into new,
  33. * using method, return sizeof new
  34. */
  35. #include "file.h"
  36. #ifndef lint
  37. FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
  38. #endif
  39. #include "magic.h"
  40. #include <stdlib.h>
  41. #ifdef HAVE_UNISTD_H
  42. #include <unistd.h>
  43. #endif
  44. #ifdef HAVE_SPAWN_H
  45. #include <spawn.h>
  46. #endif
  47. #include <string.h>
  48. #include <errno.h>
  49. #include <ctype.h>
  50. #include <stdarg.h>
  51. #include <signal.h>
  52. #ifndef HAVE_SIG_T
  53. typedef void (*sig_t)(int);
  54. #endif /* HAVE_SIG_T */
  55. #ifdef HAVE_SYS_IOCTL_H
  56. #include <sys/ioctl.h>
  57. #endif
  58. #ifdef HAVE_SYS_WAIT_H
  59. #include <sys/wait.h>
  60. #endif
  61. #if defined(HAVE_SYS_TIME_H)
  62. #include <sys/time.h>
  63. #endif
  64. #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
  65. #define BUILTIN_DECOMPRESS
  66. #include <zlib.h>
  67. #endif
  68. #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
  69. #define BUILTIN_BZLIB
  70. #include <bzlib.h>
  71. #endif
  72. #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
  73. #define BUILTIN_XZLIB
  74. #include <lzma.h>
  75. #endif
  76. #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
  77. #define BUILTIN_ZSTDLIB
  78. #include <zstd.h>
  79. #include <zstd_errors.h>
  80. #endif
  81. #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
  82. #define BUILTIN_LZLIB
  83. #include <lzlib.h>
  84. #endif
  85. #ifdef DEBUG
  86. int tty = -1;
  87. #define DPRINTF(...) do { \
  88. if (tty == -1) \
  89. tty = open("/dev/tty", O_RDWR); \
  90. if (tty == -1) \
  91. abort(); \
  92. dprintf(tty, __VA_ARGS__); \
  93. } while (/*CONSTCOND*/0)
  94. #else
  95. #define DPRINTF(...)
  96. #endif
  97. #ifdef ZLIBSUPPORT
  98. /*
  99. * The following python code is not really used because ZLIBSUPPORT is only
  100. * defined if we have a built-in zlib, and the built-in zlib handles that.
  101. * That is not true for android where we have zlib.h and not -lz.
  102. */
  103. static const char zlibcode[] =
  104. "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
  105. static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
  106. static int
  107. zlibcmp(const unsigned char *buf)
  108. {
  109. unsigned short x = 1;
  110. unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
  111. if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
  112. return 0;
  113. if (s[0] != 1) /* endianness test */
  114. x = buf[0] | (buf[1] << 8);
  115. else
  116. x = buf[1] | (buf[0] << 8);
  117. if (x % 31)
  118. return 0;
  119. return 1;
  120. }
  121. #endif
  122. static int
  123. lzmacmp(const unsigned char *buf)
  124. {
  125. if (buf[0] != 0x5d || buf[1] || buf[2])
  126. return 0;
  127. if (buf[12] && buf[12] != 0xff)
  128. return 0;
  129. return 1;
  130. }
  131. #define gzip_flags "-cd"
  132. #define lzip_flags gzip_flags
  133. static const char *gzip_args[] = {
  134. "gzip", gzip_flags, NULL
  135. };
  136. static const char *uncompress_args[] = {
  137. "uncompress", "-c", NULL
  138. };
  139. static const char *bzip2_args[] = {
  140. "bzip2", "-cd", NULL
  141. };
  142. static const char *lzip_args[] = {
  143. "lzip", lzip_flags, NULL
  144. };
  145. static const char *xz_args[] = {
  146. "xz", "-cd", NULL
  147. };
  148. static const char *lrzip_args[] = {
  149. "lrzip", "-qdf", "-", NULL
  150. };
  151. static const char *lz4_args[] = {
  152. "lz4", "-cd", NULL
  153. };
  154. static const char *zstd_args[] = {
  155. "zstd", "-cd", NULL
  156. };
  157. #define do_zlib NULL
  158. #define do_bzlib NULL
  159. file_private const struct {
  160. union {
  161. const char *magic;
  162. int (*func)(const unsigned char *);
  163. } u;
  164. int maglen;
  165. const char **argv;
  166. void *unused;
  167. } compr[] = {
  168. #define METH_FROZEN 2
  169. #define METH_BZIP 7
  170. #define METH_XZ 9
  171. #define METH_LZIP 8
  172. #define METH_ZSTD 12
  173. #define METH_LZMA 13
  174. #define METH_ZLIB 14
  175. { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
  176. /* Uncompress can get stuck; so use gzip first if we have it
  177. * Idea from Damien Clark, thanks! */
  178. { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
  179. { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
  180. { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
  181. { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
  182. /* the standard pack utilities do not accept standard input */
  183. { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
  184. { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
  185. /* ...only first file examined */
  186. { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
  187. { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
  188. { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
  189. { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
  190. { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
  191. { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
  192. { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
  193. #ifdef ZLIBSUPPORT
  194. { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
  195. #endif
  196. };
  197. #define OKDATA 0
  198. #define NODATA 1
  199. #define ERRDATA 2
  200. file_private ssize_t swrite(int, const void *, size_t);
  201. #if HAVE_FORK
  202. file_private size_t ncompr = __arraycount(compr);
  203. file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
  204. unsigned char **, size_t *);
  205. #ifdef BUILTIN_DECOMPRESS
  206. file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
  207. size_t *, int);
  208. file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
  209. size_t *, int);
  210. #endif
  211. #ifdef BUILTIN_BZLIB
  212. file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
  213. size_t *, int);
  214. #endif
  215. #ifdef BUILTIN_XZLIB
  216. file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
  217. size_t *, int);
  218. #endif
  219. #ifdef BUILTIN_ZSTDLIB
  220. file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
  221. size_t *, int);
  222. #endif
  223. #ifdef BUILTIN_LZLIB
  224. file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
  225. size_t *, int);
  226. #endif
  227. static int makeerror(unsigned char **, size_t *, const char *, ...)
  228. __attribute__((__format__(__printf__, 3, 4)));
  229. file_private const char *methodname(size_t);
  230. file_private int
  231. format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
  232. {
  233. unsigned char *p;
  234. int mime = ms->flags & MAGIC_MIME;
  235. if (!mime)
  236. return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
  237. for (p = buf; *p; p++)
  238. if (!isalnum(*p))
  239. *p = '-';
  240. return file_printf(ms, "application/x-decompression-error-%s-%s",
  241. methodname(i), buf);
  242. }
  243. file_protected int
  244. file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
  245. {
  246. unsigned char *newbuf = NULL;
  247. size_t i, nsz;
  248. char *rbuf;
  249. file_pushbuf_t *pb;
  250. int urv, prv, rv = 0;
  251. int mime = ms->flags & MAGIC_MIME;
  252. int fd = b->fd;
  253. const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
  254. size_t nbytes = b->flen;
  255. int sa_saved = 0;
  256. struct sigaction sig_act;
  257. if ((ms->flags & MAGIC_COMPRESS) == 0)
  258. return 0;
  259. for (i = 0; i < ncompr; i++) {
  260. int zm;
  261. if (nbytes < CAST(size_t, abs(compr[i].maglen)))
  262. continue;
  263. if (compr[i].maglen < 0) {
  264. zm = (*compr[i].u.func)(buf);
  265. } else {
  266. zm = memcmp(buf, compr[i].u.magic,
  267. CAST(size_t, compr[i].maglen)) == 0;
  268. }
  269. if (!zm)
  270. continue;
  271. /* Prevent SIGPIPE death if child dies unexpectedly */
  272. if (!sa_saved) {
  273. //We can use sig_act for both new and old, but
  274. struct sigaction new_act;
  275. memset(&new_act, 0, sizeof(new_act));
  276. new_act.sa_handler = SIG_IGN;
  277. sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
  278. }
  279. nsz = nbytes;
  280. free(newbuf);
  281. urv = uncompressbuf(fd, ms->bytes_max, i,
  282. (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
  283. DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
  284. (char *)newbuf, nsz);
  285. switch (urv) {
  286. case OKDATA:
  287. case ERRDATA:
  288. ms->flags &= ~MAGIC_COMPRESS;
  289. if (urv == ERRDATA)
  290. prv = format_decompression_error(ms, i, newbuf);
  291. else
  292. prv = file_buffer(ms, -1, NULL, name, newbuf,
  293. nsz);
  294. if (prv == -1)
  295. goto error;
  296. rv = 1;
  297. if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
  298. goto out;
  299. if (mime != MAGIC_MIME && mime != 0)
  300. goto out;
  301. if ((file_printf(ms,
  302. mime ? " compressed-encoding=" : " (")) == -1)
  303. goto error;
  304. if ((pb = file_push_buffer(ms)) == NULL)
  305. goto error;
  306. /*
  307. * XXX: If file_buffer fails here, we overwrite
  308. * the compressed text. FIXME.
  309. */
  310. if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
  311. {
  312. if (file_pop_buffer(ms, pb) != NULL)
  313. abort();
  314. goto error;
  315. }
  316. if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
  317. if (file_printf(ms, "%s", rbuf) == -1) {
  318. free(rbuf);
  319. goto error;
  320. }
  321. free(rbuf);
  322. }
  323. if (!mime && file_printf(ms, ")") == -1)
  324. goto error;
  325. /*FALLTHROUGH*/
  326. case NODATA:
  327. break;
  328. default:
  329. abort();
  330. /*NOTREACHED*/
  331. error:
  332. rv = -1;
  333. break;
  334. }
  335. }
  336. out:
  337. DPRINTF("rv = %d\n", rv);
  338. if (sa_saved && sig_act.sa_handler != SIG_IGN)
  339. (void)sigaction(SIGPIPE, &sig_act, NULL);
  340. free(newbuf);
  341. ms->flags |= MAGIC_COMPRESS;
  342. DPRINTF("Zmagic returns %d\n", rv);
  343. return rv;
  344. }
  345. #endif
  346. /*
  347. * `safe' write for sockets and pipes.
  348. */
  349. file_private ssize_t
  350. swrite(int fd, const void *buf, size_t n)
  351. {
  352. ssize_t rv;
  353. size_t rn = n;
  354. do
  355. switch (rv = write(fd, buf, n)) {
  356. case -1:
  357. if (errno == EINTR)
  358. continue;
  359. return -1;
  360. default:
  361. n -= rv;
  362. buf = CAST(const char *, buf) + rv;
  363. break;
  364. }
  365. while (n > 0);
  366. return rn;
  367. }
  368. /*
  369. * `safe' read for sockets and pipes.
  370. */
  371. file_protected ssize_t
  372. sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
  373. {
  374. ssize_t rv;
  375. #if defined(FIONREAD) && !defined(__MINGW32__)
  376. int t = 0;
  377. #endif
  378. size_t rn = n;
  379. if (fd == STDIN_FILENO)
  380. goto nocheck;
  381. #if defined(FIONREAD) && !defined(__MINGW32__)
  382. if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
  383. #ifdef FD_ZERO
  384. ssize_t cnt;
  385. for (cnt = 0;; cnt++) {
  386. fd_set check;
  387. struct timeval tout = {0, 100 * 1000};
  388. int selrv;
  389. FD_ZERO(&check);
  390. FD_SET(fd, &check);
  391. /*
  392. * Avoid soft deadlock: do not read if there
  393. * is nothing to read from sockets and pipes.
  394. */
  395. selrv = select(fd + 1, &check, NULL, NULL, &tout);
  396. if (selrv == -1) {
  397. if (errno == EINTR || errno == EAGAIN)
  398. continue;
  399. } else if (selrv == 0 && cnt >= 5) {
  400. return 0;
  401. } else
  402. break;
  403. }
  404. #endif
  405. (void)ioctl(fd, FIONREAD, &t);
  406. }
  407. if (t > 0 && CAST(size_t, t) < n) {
  408. n = t;
  409. rn = n;
  410. }
  411. #endif
  412. nocheck:
  413. do
  414. switch ((rv = read(fd, buf, n))) {
  415. case -1:
  416. if (errno == EINTR)
  417. continue;
  418. return -1;
  419. case 0:
  420. return rn - n;
  421. default:
  422. n -= rv;
  423. buf = CAST(char *, CCAST(void *, buf)) + rv;
  424. break;
  425. }
  426. while (n > 0);
  427. return rn;
  428. }
  429. file_protected int
  430. file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
  431. size_t nbytes)
  432. {
  433. char buf[4096];
  434. ssize_t r;
  435. int tfd;
  436. #ifdef WIN32
  437. const char *t;
  438. buf[0] = '\0';
  439. if ((t = getenv("TEMP")) != NULL)
  440. (void)strlcpy(buf, t, sizeof(buf));
  441. else if ((t = getenv("TMP")) != NULL)
  442. (void)strlcpy(buf, t, sizeof(buf));
  443. else if ((t = getenv("TMPDIR")) != NULL)
  444. (void)strlcpy(buf, t, sizeof(buf));
  445. if (buf[0] != '\0')
  446. (void)strlcat(buf, "/", sizeof(buf));
  447. (void)strlcat(buf, "file.XXXXXX", sizeof(buf));
  448. #else
  449. (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
  450. #endif
  451. #ifndef HAVE_MKSTEMP
  452. {
  453. char *ptr = mktemp(buf);
  454. tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
  455. r = errno;
  456. (void)unlink(ptr);
  457. errno = r;
  458. }
  459. #else
  460. {
  461. int te;
  462. mode_t ou = umask(0);
  463. tfd = mkstemp(buf);
  464. (void)umask(ou);
  465. te = errno;
  466. (void)unlink(buf);
  467. errno = te;
  468. }
  469. #endif
  470. if (tfd == -1) {
  471. file_error(ms, errno,
  472. "cannot create temporary file for pipe copy");
  473. return -1;
  474. }
  475. if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
  476. r = 1;
  477. else {
  478. while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
  479. if (swrite(tfd, buf, CAST(size_t, r)) != r)
  480. break;
  481. }
  482. switch (r) {
  483. case -1:
  484. file_error(ms, errno, "error copying from pipe to temp file");
  485. return -1;
  486. case 0:
  487. break;
  488. default:
  489. file_error(ms, errno, "error while writing to temp file");
  490. return -1;
  491. }
  492. /*
  493. * We duplicate the file descriptor, because fclose on a
  494. * tmpfile will delete the file, but any open descriptors
  495. * can still access the phantom inode.
  496. */
  497. if ((fd = dup2(tfd, fd)) == -1) {
  498. file_error(ms, errno, "could not dup descriptor for temp file");
  499. return -1;
  500. }
  501. (void)close(tfd);
  502. if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
  503. file_badseek(ms);
  504. return -1;
  505. }
  506. return fd;
  507. }
  508. #if HAVE_FORK
  509. #ifdef BUILTIN_DECOMPRESS
  510. #define FHCRC (1 << 1)
  511. #define FEXTRA (1 << 2)
  512. #define FNAME (1 << 3)
  513. #define FCOMMENT (1 << 4)
  514. file_private int
  515. uncompressgzipped(const unsigned char *old, unsigned char **newch,
  516. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  517. {
  518. unsigned char flg;
  519. size_t data_start = 10;
  520. if (*n < 4) {
  521. goto err;
  522. }
  523. flg = old[3];
  524. if (flg & FEXTRA) {
  525. if (data_start + 1 >= *n)
  526. goto err;
  527. data_start += 2 + old[data_start] + old[data_start + 1] * 256;
  528. }
  529. if (flg & FNAME) {
  530. while(data_start < *n && old[data_start])
  531. data_start++;
  532. data_start++;
  533. }
  534. if (flg & FCOMMENT) {
  535. while(data_start < *n && old[data_start])
  536. data_start++;
  537. data_start++;
  538. }
  539. if (flg & FHCRC)
  540. data_start += 2;
  541. if (data_start >= *n)
  542. goto err;
  543. *n -= data_start;
  544. old += data_start;
  545. return uncompresszlib(old, newch, bytes_max, n, 0);
  546. err:
  547. return makeerror(newch, n, "File too short");
  548. }
  549. file_private int
  550. uncompresszlib(const unsigned char *old, unsigned char **newch,
  551. size_t bytes_max, size_t *n, int zlib)
  552. {
  553. int rc;
  554. z_stream z;
  555. DPRINTF("builtin zlib decompression\n");
  556. z.next_in = CCAST(Bytef *, old);
  557. z.avail_in = CAST(uint32_t, *n);
  558. z.next_out = *newch;
  559. z.avail_out = CAST(unsigned int, bytes_max);
  560. z.zalloc = Z_NULL;
  561. z.zfree = Z_NULL;
  562. z.opaque = Z_NULL;
  563. /* LINTED bug in header macro */
  564. rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
  565. if (rc != Z_OK)
  566. goto err;
  567. rc = inflate(&z, Z_SYNC_FLUSH);
  568. if (rc != Z_OK && rc != Z_STREAM_END) {
  569. inflateEnd(&z);
  570. goto err;
  571. }
  572. *n = CAST(size_t, z.total_out);
  573. rc = inflateEnd(&z);
  574. if (rc != Z_OK)
  575. goto err;
  576. /* let's keep the nul-terminate tradition */
  577. (*newch)[*n] = '\0';
  578. return OKDATA;
  579. err:
  580. return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
  581. }
  582. #endif
  583. #ifdef BUILTIN_BZLIB
  584. file_private int
  585. uncompressbzlib(const unsigned char *old, unsigned char **newch,
  586. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  587. {
  588. int rc;
  589. bz_stream bz;
  590. DPRINTF("builtin bzlib decompression\n");
  591. memset(&bz, 0, sizeof(bz));
  592. rc = BZ2_bzDecompressInit(&bz, 0, 0);
  593. if (rc != BZ_OK)
  594. goto err;
  595. bz.next_in = CCAST(char *, RCAST(const char *, old));
  596. bz.avail_in = CAST(uint32_t, *n);
  597. bz.next_out = RCAST(char *, *newch);
  598. bz.avail_out = CAST(unsigned int, bytes_max);
  599. rc = BZ2_bzDecompress(&bz);
  600. if (rc != BZ_OK && rc != BZ_STREAM_END) {
  601. BZ2_bzDecompressEnd(&bz);
  602. goto err;
  603. }
  604. /* Assume byte_max is within 32bit */
  605. /* assert(bz.total_out_hi32 == 0); */
  606. *n = CAST(size_t, bz.total_out_lo32);
  607. rc = BZ2_bzDecompressEnd(&bz);
  608. if (rc != BZ_OK)
  609. goto err;
  610. /* let's keep the nul-terminate tradition */
  611. (*newch)[*n] = '\0';
  612. return OKDATA;
  613. err:
  614. return makeerror(newch, n, "bunzip error %d", rc);
  615. }
  616. #endif
  617. #ifdef BUILTIN_XZLIB
  618. file_private int
  619. uncompressxzlib(const unsigned char *old, unsigned char **newch,
  620. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  621. {
  622. int rc;
  623. lzma_stream xz;
  624. DPRINTF("builtin xzlib decompression\n");
  625. memset(&xz, 0, sizeof(xz));
  626. rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
  627. if (rc != LZMA_OK)
  628. goto err;
  629. xz.next_in = CCAST(const uint8_t *, old);
  630. xz.avail_in = CAST(uint32_t, *n);
  631. xz.next_out = RCAST(uint8_t *, *newch);
  632. xz.avail_out = CAST(unsigned int, bytes_max);
  633. rc = lzma_code(&xz, LZMA_RUN);
  634. if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
  635. lzma_end(&xz);
  636. goto err;
  637. }
  638. *n = CAST(size_t, xz.total_out);
  639. lzma_end(&xz);
  640. /* let's keep the nul-terminate tradition */
  641. (*newch)[*n] = '\0';
  642. return OKDATA;
  643. err:
  644. return makeerror(newch, n, "unxz error %d", rc);
  645. }
  646. #endif
  647. #ifdef BUILTIN_ZSTDLIB
  648. file_private int
  649. uncompresszstd(const unsigned char *old, unsigned char **newch,
  650. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  651. {
  652. size_t rc;
  653. ZSTD_DStream *zstd;
  654. ZSTD_inBuffer in;
  655. ZSTD_outBuffer out;
  656. DPRINTF("builtin zstd decompression\n");
  657. if ((zstd = ZSTD_createDStream()) == NULL) {
  658. return makeerror(newch, n, "No ZSTD decompression stream, %s",
  659. strerror(errno));
  660. }
  661. rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
  662. if (ZSTD_isError(rc))
  663. goto err;
  664. in.src = CCAST(const void *, old);
  665. in.size = *n;
  666. in.pos = 0;
  667. out.dst = RCAST(void *, *newch);
  668. out.size = bytes_max;
  669. out.pos = 0;
  670. rc = ZSTD_decompressStream(zstd, &out, &in);
  671. if (ZSTD_isError(rc))
  672. goto err;
  673. *n = out.pos;
  674. ZSTD_freeDStream(zstd);
  675. /* let's keep the nul-terminate tradition */
  676. (*newch)[*n] = '\0';
  677. return OKDATA;
  678. err:
  679. ZSTD_freeDStream(zstd);
  680. return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
  681. }
  682. #endif
  683. #ifdef BUILTIN_LZLIB
  684. file_private int
  685. uncompresslzlib(const unsigned char *old, unsigned char **newch,
  686. size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
  687. {
  688. enum LZ_Errno err;
  689. size_t old_remaining = *n;
  690. size_t new_remaining = bytes_max;
  691. size_t total_read = 0;
  692. unsigned char *bufp;
  693. struct LZ_Decoder *dec;
  694. bufp = *newch;
  695. DPRINTF("builtin lzlib decompression\n");
  696. dec = LZ_decompress_open();
  697. if (!dec) {
  698. return makeerror(newch, n, "unable to allocate LZ_Decoder");
  699. }
  700. if (LZ_decompress_errno(dec) != LZ_ok)
  701. goto err;
  702. for (;;) {
  703. // LZ_decompress_read() stops at member boundaries, so we may
  704. // have more than one successful read after writing all data
  705. // we have.
  706. if (old_remaining > 0) {
  707. int wr = LZ_decompress_write(dec, old, old_remaining);
  708. if (wr < 0)
  709. goto err;
  710. old_remaining -= wr;
  711. old += wr;
  712. }
  713. int rd = LZ_decompress_read(dec, bufp, new_remaining);
  714. if (rd > 0) {
  715. new_remaining -= rd;
  716. bufp += rd;
  717. total_read += rd;
  718. }
  719. if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
  720. goto err;
  721. if (new_remaining == 0)
  722. break;
  723. if (old_remaining == 0 && rd == 0)
  724. break;
  725. }
  726. LZ_decompress_close(dec);
  727. *n = total_read;
  728. /* let's keep the nul-terminate tradition */
  729. *bufp = '\0';
  730. return OKDATA;
  731. err:
  732. err = LZ_decompress_errno(dec);
  733. LZ_decompress_close(dec);
  734. return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
  735. }
  736. #endif
  737. static int
  738. makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
  739. {
  740. char *msg;
  741. va_list ap;
  742. int rv;
  743. DPRINTF("Makeerror %s\n", fmt);
  744. free(*buf);
  745. va_start(ap, fmt);
  746. rv = vasprintf(&msg, fmt, ap);
  747. va_end(ap);
  748. if (rv < 0) {
  749. DPRINTF("Makeerror failed");
  750. *buf = NULL;
  751. *len = 0;
  752. return NODATA;
  753. }
  754. *buf = RCAST(unsigned char *, msg);
  755. *len = strlen(msg);
  756. return ERRDATA;
  757. }
  758. static void
  759. closefd(int *fd, size_t i)
  760. {
  761. if (fd[i] == -1)
  762. return;
  763. (void) close(fd[i]);
  764. fd[i] = -1;
  765. }
  766. static void
  767. closep(int *fd)
  768. {
  769. size_t i;
  770. for (i = 0; i < 2; i++)
  771. closefd(fd, i);
  772. }
  773. static void
  774. movedesc(void *v, int i, int fd)
  775. {
  776. if (fd == i)
  777. return; /* "no dup was necessary" */
  778. #ifdef HAVE_POSIX_SPAWNP
  779. posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
  780. posix_spawn_file_actions_adddup2(fa, fd, i);
  781. posix_spawn_file_actions_addclose(fa, fd);
  782. #else
  783. if (dup2(fd, i) == -1) {
  784. DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
  785. exit(EXIT_FAILURE);
  786. }
  787. close(v ? fd : fd);
  788. #endif
  789. }
  790. static void
  791. closedesc(void *v, int fd)
  792. {
  793. #ifdef HAVE_POSIX_SPAWNP
  794. posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
  795. posix_spawn_file_actions_addclose(fa, fd);
  796. #else
  797. close(v ? fd : fd);
  798. #endif
  799. }
  800. static void
  801. handledesc(void *v, int fd, int fdp[3][2])
  802. {
  803. if (fd != -1) {
  804. (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
  805. movedesc(v, STDIN_FILENO, fd);
  806. } else {
  807. movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
  808. if (fdp[STDIN_FILENO][1] > 2)
  809. closedesc(v, fdp[STDIN_FILENO][1]);
  810. }
  811. file_clear_closexec(STDIN_FILENO);
  812. ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
  813. movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
  814. if (fdp[STDOUT_FILENO][0] > 2)
  815. closedesc(v, fdp[STDOUT_FILENO][0]);
  816. file_clear_closexec(STDOUT_FILENO);
  817. movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
  818. if (fdp[STDERR_FILENO][0] > 2)
  819. closedesc(v, fdp[STDERR_FILENO][0]);
  820. file_clear_closexec(STDERR_FILENO);
  821. }
  822. static pid_t
  823. writechild(int fd, const void *old, size_t n)
  824. {
  825. pid_t pid;
  826. /*
  827. * fork again, to avoid blocking because both
  828. * pipes filled
  829. */
  830. pid = fork();
  831. if (pid == -1) {
  832. DPRINTF("Fork failed (%s)\n", strerror(errno));
  833. return -1;
  834. }
  835. if (pid == 0) {
  836. /* child */
  837. if (swrite(fd, old, n) != CAST(ssize_t, n)) {
  838. DPRINTF("Write failed (%s)\n", strerror(errno));
  839. exit(EXIT_FAILURE);
  840. }
  841. exit(EXIT_SUCCESS);
  842. }
  843. /* parent */
  844. return pid;
  845. }
  846. static ssize_t
  847. filter_error(unsigned char *ubuf, ssize_t n)
  848. {
  849. char *p;
  850. char *buf;
  851. ubuf[n] = '\0';
  852. buf = RCAST(char *, ubuf);
  853. while (isspace(CAST(unsigned char, *buf)))
  854. buf++;
  855. DPRINTF("Filter error[[[%s]]]\n", buf);
  856. if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
  857. *p = '\0';
  858. if ((p = strchr(CAST(char *, buf), ';')) != NULL)
  859. *p = '\0';
  860. if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
  861. ++p;
  862. while (isspace(CAST(unsigned char, *p)))
  863. p++;
  864. n = strlen(p);
  865. memmove(ubuf, p, CAST(size_t, n + 1));
  866. }
  867. DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
  868. if (islower(*ubuf))
  869. *ubuf = toupper(*ubuf);
  870. return n;
  871. }
  872. file_private const char *
  873. methodname(size_t method)
  874. {
  875. switch (method) {
  876. #ifdef BUILTIN_DECOMPRESS
  877. case METH_FROZEN:
  878. case METH_ZLIB:
  879. return "zlib";
  880. #endif
  881. #ifdef BUILTIN_BZLIB
  882. case METH_BZIP:
  883. return "bzlib";
  884. #endif
  885. #ifdef BUILTIN_XZLIB
  886. case METH_XZ:
  887. case METH_LZMA:
  888. return "xzlib";
  889. #endif
  890. #ifdef BUILTIN_ZSTDLIB
  891. case METH_ZSTD:
  892. return "zstd";
  893. #endif
  894. #ifdef BUILTIN_LZLIB
  895. case METH_LZIP:
  896. return "lzlib";
  897. #endif
  898. default:
  899. return compr[method].argv[0];
  900. }
  901. }
  902. file_private int (*
  903. getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
  904. size_t *, int)
  905. {
  906. switch (method) {
  907. #ifdef BUILTIN_DECOMPRESS
  908. case METH_FROZEN:
  909. return uncompressgzipped;
  910. case METH_ZLIB:
  911. return uncompresszlib;
  912. #endif
  913. #ifdef BUILTIN_BZLIB
  914. case METH_BZIP:
  915. return uncompressbzlib;
  916. #endif
  917. #ifdef BUILTIN_XZLIB
  918. case METH_XZ:
  919. case METH_LZMA:
  920. return uncompressxzlib;
  921. #endif
  922. #ifdef BUILTIN_ZSTDLIB
  923. case METH_ZSTD:
  924. return uncompresszstd;
  925. #endif
  926. #ifdef BUILTIN_LZLIB
  927. case METH_LZIP:
  928. return uncompresslzlib;
  929. #endif
  930. default:
  931. return NULL;
  932. }
  933. }
  934. file_private int
  935. uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
  936. const unsigned char *old, unsigned char **newch, size_t* n)
  937. {
  938. int fdp[3][2];
  939. int status, rv, w;
  940. pid_t pid;
  941. pid_t writepid = -1;
  942. size_t i;
  943. ssize_t r, re;
  944. char *const *args;
  945. #ifdef HAVE_POSIX_SPAWNP
  946. posix_spawn_file_actions_t fa;
  947. #endif
  948. int (*decompress)(const unsigned char *, unsigned char **,
  949. size_t, size_t *, int) = getdecompressor(method);
  950. *newch = CAST(unsigned char *, malloc(bytes_max + 1));
  951. if (*newch == NULL)
  952. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  953. if (decompress) {
  954. if (nofork) {
  955. return makeerror(newch, n,
  956. "Fork is required to uncompress, but disabled");
  957. }
  958. return (*decompress)(old, newch, bytes_max, n, 1);
  959. }
  960. (void)fflush(stdout);
  961. (void)fflush(stderr);
  962. for (i = 0; i < __arraycount(fdp); i++)
  963. fdp[i][0] = fdp[i][1] = -1;
  964. /*
  965. * There are multithreaded users who run magic_file()
  966. * from dozens of threads. If two parallel magic_file() calls
  967. * analyze two large compressed files, both will spawn
  968. * an uncompressing child here, which writes out uncompressed data.
  969. * We read some portion, then close the pipe, then waitpid() the child.
  970. * If uncompressed data is larger, child should get EPIPE and exit.
  971. * However, with *parallel* calls OTHER child may unintentionally
  972. * inherit pipe fds, thus keeping pipe open and making writes in
  973. * our child block instead of failing with EPIPE!
  974. * (For the bug to occur, two threads must mutually inherit their pipes,
  975. * and both must have large outputs. Thus it happens not that often).
  976. * To avoid this, be sure to create pipes with O_CLOEXEC.
  977. */
  978. if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
  979. file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
  980. file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
  981. closep(fdp[STDIN_FILENO]);
  982. closep(fdp[STDOUT_FILENO]);
  983. return makeerror(newch, n, "Cannot create pipe, %s",
  984. strerror(errno));
  985. }
  986. args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
  987. #ifdef HAVE_POSIX_SPAWNP
  988. posix_spawn_file_actions_init(&fa);
  989. handledesc(&fa, fd, fdp);
  990. DPRINTF("Executing %s\n", compr[method].argv[0]);
  991. status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
  992. args, NULL);
  993. posix_spawn_file_actions_destroy(&fa);
  994. if (status == -1) {
  995. return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
  996. compr[method].argv[0], strerror(errno));
  997. }
  998. #else
  999. /* For processes with large mapped virtual sizes, vfork
  1000. * may be _much_ faster (10-100 times) than fork.
  1001. */
  1002. pid = vfork();
  1003. if (pid == -1) {
  1004. return makeerror(newch, n, "Cannot vfork, %s",
  1005. strerror(errno));
  1006. }
  1007. if (pid == 0) {
  1008. /* child */
  1009. /* Note: we are after vfork, do not modify memory
  1010. * in a way which confuses parent. In particular,
  1011. * do not modify fdp[i][j].
  1012. */
  1013. handledesc(NULL, fd, fdp);
  1014. DPRINTF("Executing %s\n", compr[method].argv[0]);
  1015. (void)execvp(compr[method].argv[0], args);
  1016. dprintf(STDERR_FILENO, "exec `%s' failed, %s",
  1017. compr[method].argv[0], strerror(errno));
  1018. _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
  1019. }
  1020. #endif
  1021. /* parent */
  1022. /* Close write sides of child stdout/err pipes */
  1023. for (i = 1; i < __arraycount(fdp); i++)
  1024. closefd(fdp[i], 1);
  1025. /* Write the buffer data to child stdin, if we don't have fd */
  1026. if (fd == -1) {
  1027. closefd(fdp[STDIN_FILENO], 0);
  1028. writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
  1029. if (writepid == (pid_t)-1) {
  1030. rv = makeerror(newch, n, "Write to child failed, %s",
  1031. strerror(errno));
  1032. DPRINTF("Write to child failed\n");
  1033. goto err;
  1034. }
  1035. closefd(fdp[STDIN_FILENO], 1);
  1036. }
  1037. rv = OKDATA;
  1038. r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
  1039. DPRINTF("read got %zd\n", r);
  1040. if (r < 0) {
  1041. rv = ERRDATA;
  1042. DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
  1043. strerror(errno));
  1044. goto err;
  1045. }
  1046. if (CAST(size_t, r) == bytes_max) {
  1047. /*
  1048. * close fd so that the child exits with sigpipe and ignore
  1049. * errors, otherwise we risk the child blocking and never
  1050. * exiting.
  1051. */
  1052. DPRINTF("Closing stdout for bytes_max\n");
  1053. closefd(fdp[STDOUT_FILENO], 0);
  1054. goto ok;
  1055. }
  1056. if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
  1057. DPRINTF("Got stuff from stderr %s\n", *newch);
  1058. rv = ERRDATA;
  1059. r = filter_error(*newch, r);
  1060. goto ok;
  1061. }
  1062. if (re == 0)
  1063. goto ok;
  1064. rv = makeerror(newch, n, "Read stderr failed, %s",
  1065. strerror(errno));
  1066. goto err;
  1067. ok:
  1068. *n = r;
  1069. /* NUL terminate, as every buffer is handled here. */
  1070. (*newch)[*n] = '\0';
  1071. err:
  1072. closefd(fdp[STDIN_FILENO], 1);
  1073. closefd(fdp[STDOUT_FILENO], 0);
  1074. closefd(fdp[STDERR_FILENO], 0);
  1075. w = waitpid(pid, &status, 0);
  1076. wait_err:
  1077. if (w == -1) {
  1078. rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
  1079. DPRINTF("Child wait return %#x\n", status);
  1080. } else if (!WIFEXITED(status)) {
  1081. DPRINTF("Child not exited (%#x)\n", status);
  1082. } else if (WEXITSTATUS(status) != 0) {
  1083. DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
  1084. }
  1085. if (writepid > 0) {
  1086. /* _After_ we know decompressor has exited, our input writer
  1087. * definitely will exit now (at worst, writing fails in it,
  1088. * since output fd is closed now on the reading size).
  1089. */
  1090. w = waitpid(writepid, &status, 0);
  1091. writepid = -1;
  1092. goto wait_err;
  1093. }
  1094. closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
  1095. DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
  1096. return rv;
  1097. }
  1098. #endif