compress.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967
  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * compress routines:
  30. * zmagic() - returns 0 if not recognized, uncompresses and prints
  31. * information if recognized
  32. * uncompress(method, old, n, newch) - uncompress old into new,
  33. * using method, return sizeof new
  34. */
  35. #include "file.h"
  36. #ifndef lint
  37. FILE_RCSID("@(#)$File: compress.c,v 1.124 2019/07/21 11:42:09 christos Exp $")
  38. #endif
  39. #include "magic.h"
  40. #include <stdlib.h>
  41. #ifdef HAVE_UNISTD_H
  42. #include <unistd.h>
  43. #endif
  44. #include <string.h>
  45. #include <errno.h>
  46. #include <ctype.h>
  47. #include <stdarg.h>
  48. #include <signal.h>
  49. #ifndef HAVE_SIG_T
  50. typedef void (*sig_t)(int);
  51. #endif /* HAVE_SIG_T */
  52. #if !defined(__MINGW32__) && !defined(WIN32)
  53. #include <sys/ioctl.h>
  54. #endif
  55. #ifdef HAVE_SYS_WAIT_H
  56. #include <sys/wait.h>
  57. #endif
  58. #if defined(HAVE_SYS_TIME_H)
  59. #include <sys/time.h>
  60. #endif
  61. #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
  62. #define BUILTIN_DECOMPRESS
  63. #include <zlib.h>
  64. #endif
  65. #if defined(HAVE_BZLIB_H) || defined(BZLIBSUPPORT)
  66. #define BUILTIN_BZLIB
  67. #include <bzlib.h>
  68. #endif
  69. #if defined(HAVE_XZLIB_H) || defined(XZLIBSUPPORT)
  70. #define BUILTIN_XZLIB
  71. #include <lzma.h>
  72. #endif
  73. #ifdef DEBUG
  74. int tty = -1;
  75. #define DPRINTF(...) do { \
  76. if (tty == -1) \
  77. tty = open("/dev/tty", O_RDWR); \
  78. if (tty == -1) \
  79. abort(); \
  80. dprintf(tty, __VA_ARGS__); \
  81. } while (/*CONSTCOND*/0)
  82. #else
  83. #define DPRINTF(...)
  84. #endif
  85. #ifdef ZLIBSUPPORT
  86. /*
  87. * The following python code is not really used because ZLIBSUPPORT is only
  88. * defined if we have a built-in zlib, and the built-in zlib handles that.
  89. * That is not true for android where we have zlib.h and not -lz.
  90. */
  91. static const char zlibcode[] =
  92. "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
  93. static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
  94. static int
  95. zlibcmp(const unsigned char *buf)
  96. {
  97. unsigned short x = 1;
  98. unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
  99. if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
  100. return 0;
  101. if (s[0] != 1) /* endianness test */
  102. x = buf[0] | (buf[1] << 8);
  103. else
  104. x = buf[1] | (buf[0] << 8);
  105. if (x % 31)
  106. return 0;
  107. return 1;
  108. }
  109. #endif
  110. static int
  111. lzmacmp(const unsigned char *buf)
  112. {
  113. if (buf[0] != 0x5d || buf[1] || buf[2])
  114. return 0;
  115. if (buf[12] && buf[12] != 0xff)
  116. return 0;
  117. return 1;
  118. }
  119. #define gzip_flags "-cd"
  120. #define lrzip_flags "-do"
  121. #define lzip_flags gzip_flags
  122. static const char *gzip_args[] = {
  123. "gzip", gzip_flags, NULL
  124. };
  125. static const char *uncompress_args[] = {
  126. "uncompress", "-c", NULL
  127. };
  128. static const char *bzip2_args[] = {
  129. "bzip2", "-cd", NULL
  130. };
  131. static const char *lzip_args[] = {
  132. "lzip", lzip_flags, NULL
  133. };
  134. static const char *xz_args[] = {
  135. "xz", "-cd", NULL
  136. };
  137. static const char *lrzip_args[] = {
  138. "lrzip", lrzip_flags, NULL
  139. };
  140. static const char *lz4_args[] = {
  141. "lz4", "-cd", NULL
  142. };
  143. static const char *zstd_args[] = {
  144. "zstd", "-cd", NULL
  145. };
  146. #define do_zlib NULL
  147. #define do_bzlib NULL
  148. private const struct {
  149. const void *magic;
  150. int maglen;
  151. const char **argv;
  152. void *unused;
  153. } compr[] = {
  154. #define METH_FROZEN 2
  155. #define METH_BZIP 7
  156. #define METH_XZ 9
  157. #define METH_LZMA 13
  158. #define METH_ZLIB 14
  159. { "\037\235", 2, gzip_args, NULL }, /* 0, compressed */
  160. /* Uncompress can get stuck; so use gzip first if we have it
  161. * Idea from Damien Clark, thanks! */
  162. { "\037\235", 2, uncompress_args, NULL }, /* 1, compressed */
  163. { "\037\213", 2, gzip_args, do_zlib }, /* 2, gzipped */
  164. { "\037\236", 2, gzip_args, NULL }, /* 3, frozen */
  165. { "\037\240", 2, gzip_args, NULL }, /* 4, SCO LZH */
  166. /* the standard pack utilities do not accept standard input */
  167. { "\037\036", 2, gzip_args, NULL }, /* 5, packed */
  168. { "PK\3\4", 4, gzip_args, NULL }, /* 6, pkzipped, */
  169. /* ...only first file examined */
  170. { "BZh", 3, bzip2_args, do_bzlib }, /* 7, bzip2-ed */
  171. { "LZIP", 4, lzip_args, NULL }, /* 8, lzip-ed */
  172. { "\3757zXZ\0", 6, xz_args, NULL }, /* 9, XZ Utils */
  173. { "LRZI", 4, lrzip_args, NULL }, /* 10, LRZIP */
  174. { "\004\"M\030",4, lz4_args, NULL }, /* 11, LZ4 */
  175. { "\x28\xB5\x2F\xFD", 4, zstd_args, NULL }, /* 12, zstd */
  176. { RCAST(const void *, lzmacmp), -13, xz_args, NULL }, /* 13, lzma */
  177. #ifdef ZLIBSUPPORT
  178. { RCAST(const void *, zlibcmp), -2, zlib_args, NULL }, /* 14, zlib */
  179. #endif
  180. };
  181. #define OKDATA 0
  182. #define NODATA 1
  183. #define ERRDATA 2
  184. private ssize_t swrite(int, const void *, size_t);
  185. #if HAVE_FORK
  186. private size_t ncompr = __arraycount(compr);
  187. private int uncompressbuf(int, size_t, size_t, const unsigned char *,
  188. unsigned char **, size_t *);
  189. #ifdef BUILTIN_DECOMPRESS
  190. private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
  191. size_t *, int);
  192. private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
  193. size_t *);
  194. #endif
  195. #ifdef BUILTIN_BZLIB
  196. private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
  197. size_t *);
  198. #endif
  199. #ifdef BUILTIN_XZLIB
  200. private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
  201. size_t *);
  202. #endif
  203. static int makeerror(unsigned char **, size_t *, const char *, ...)
  204. __attribute__((__format__(__printf__, 3, 4)));
  205. private const char *methodname(size_t);
  206. private int
  207. format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
  208. {
  209. unsigned char *p;
  210. int mime = ms->flags & MAGIC_MIME;
  211. if (!mime)
  212. return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
  213. for (p = buf; *p; p++)
  214. if (!isalnum(*p))
  215. *p = '-';
  216. return file_printf(ms, "application/x-decompression-error-%s-%s",
  217. methodname(i), buf);
  218. }
  219. protected int
  220. file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
  221. {
  222. unsigned char *newbuf = NULL;
  223. size_t i, nsz;
  224. char *rbuf;
  225. file_pushbuf_t *pb;
  226. int urv, prv, rv = 0;
  227. int mime = ms->flags & MAGIC_MIME;
  228. int fd = b->fd;
  229. const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
  230. size_t nbytes = b->flen;
  231. int sa_saved = 0;
  232. struct sigaction sig_act;
  233. if ((ms->flags & MAGIC_COMPRESS) == 0)
  234. return 0;
  235. for (i = 0; i < ncompr; i++) {
  236. int zm;
  237. if (nbytes < CAST(size_t, abs(compr[i].maglen)))
  238. continue;
  239. if (compr[i].maglen < 0) {
  240. zm = (RCAST(int (*)(const unsigned char *),
  241. CCAST(void *, compr[i].magic)))(buf);
  242. } else {
  243. zm = memcmp(buf, compr[i].magic,
  244. CAST(size_t, compr[i].maglen)) == 0;
  245. }
  246. if (!zm)
  247. continue;
  248. /* Prevent SIGPIPE death if child dies unexpectedly */
  249. if (!sa_saved) {
  250. //We can use sig_act for both new and old, but
  251. struct sigaction new_act;
  252. memset(&new_act, 0, sizeof(new_act));
  253. new_act.sa_handler = SIG_IGN;
  254. sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
  255. }
  256. nsz = nbytes;
  257. urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
  258. DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
  259. (char *)newbuf, nsz);
  260. switch (urv) {
  261. case OKDATA:
  262. case ERRDATA:
  263. ms->flags &= ~MAGIC_COMPRESS;
  264. if (urv == ERRDATA)
  265. prv = format_decompression_error(ms, i, newbuf);
  266. else
  267. prv = file_buffer(ms, -1, NULL, name, newbuf, nsz);
  268. if (prv == -1)
  269. goto error;
  270. rv = 1;
  271. if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
  272. goto out;
  273. if (mime != MAGIC_MIME && mime != 0)
  274. goto out;
  275. if ((file_printf(ms,
  276. mime ? " compressed-encoding=" : " (")) == -1)
  277. goto error;
  278. if ((pb = file_push_buffer(ms)) == NULL)
  279. goto error;
  280. /*
  281. * XXX: If file_buffer fails here, we overwrite
  282. * the compressed text. FIXME.
  283. */
  284. if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) {
  285. if (file_pop_buffer(ms, pb) != NULL)
  286. abort();
  287. goto error;
  288. }
  289. if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
  290. if (file_printf(ms, "%s", rbuf) == -1) {
  291. free(rbuf);
  292. goto error;
  293. }
  294. free(rbuf);
  295. }
  296. if (!mime && file_printf(ms, ")") == -1)
  297. goto error;
  298. /*FALLTHROUGH*/
  299. case NODATA:
  300. break;
  301. default:
  302. abort();
  303. /*NOTREACHED*/
  304. error:
  305. rv = -1;
  306. break;
  307. }
  308. }
  309. out:
  310. DPRINTF("rv = %d\n", rv);
  311. if (sa_saved && sig_act.sa_handler != SIG_IGN)
  312. (void)sigaction(SIGPIPE, &sig_act, NULL);
  313. free(newbuf);
  314. ms->flags |= MAGIC_COMPRESS;
  315. DPRINTF("Zmagic returns %d\n", rv);
  316. return rv;
  317. }
  318. #endif
  319. /*
  320. * `safe' write for sockets and pipes.
  321. */
  322. private ssize_t
  323. swrite(int fd, const void *buf, size_t n)
  324. {
  325. ssize_t rv;
  326. size_t rn = n;
  327. do
  328. switch (rv = write(fd, buf, n)) {
  329. case -1:
  330. if (errno == EINTR)
  331. continue;
  332. return -1;
  333. default:
  334. n -= rv;
  335. buf = CAST(const char *, buf) + rv;
  336. break;
  337. }
  338. while (n > 0);
  339. return rn;
  340. }
  341. /*
  342. * `safe' read for sockets and pipes.
  343. */
  344. protected ssize_t
  345. sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
  346. {
  347. ssize_t rv;
  348. #ifdef FIONREAD
  349. int t = 0;
  350. #endif
  351. size_t rn = n;
  352. if (fd == STDIN_FILENO)
  353. goto nocheck;
  354. #ifdef FIONREAD
  355. if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
  356. #ifdef FD_ZERO
  357. ssize_t cnt;
  358. for (cnt = 0;; cnt++) {
  359. fd_set check;
  360. struct timeval tout = {0, 100 * 1000};
  361. int selrv;
  362. FD_ZERO(&check);
  363. FD_SET(fd, &check);
  364. /*
  365. * Avoid soft deadlock: do not read if there
  366. * is nothing to read from sockets and pipes.
  367. */
  368. selrv = select(fd + 1, &check, NULL, NULL, &tout);
  369. if (selrv == -1) {
  370. if (errno == EINTR || errno == EAGAIN)
  371. continue;
  372. } else if (selrv == 0 && cnt >= 5) {
  373. return 0;
  374. } else
  375. break;
  376. }
  377. #endif
  378. (void)ioctl(fd, FIONREAD, &t);
  379. }
  380. if (t > 0 && CAST(size_t, t) < n) {
  381. n = t;
  382. rn = n;
  383. }
  384. #endif
  385. nocheck:
  386. do
  387. switch ((rv = read(fd, buf, n))) {
  388. case -1:
  389. if (errno == EINTR)
  390. continue;
  391. return -1;
  392. case 0:
  393. return rn - n;
  394. default:
  395. n -= rv;
  396. buf = CAST(char *, CCAST(void *, buf)) + rv;
  397. break;
  398. }
  399. while (n > 0);
  400. return rn;
  401. }
  402. protected int
  403. file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
  404. size_t nbytes)
  405. {
  406. char buf[4096];
  407. ssize_t r;
  408. int tfd;
  409. (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
  410. #ifndef HAVE_MKSTEMP
  411. {
  412. char *ptr = mktemp(buf);
  413. tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
  414. r = errno;
  415. (void)unlink(ptr);
  416. errno = r;
  417. }
  418. #else
  419. {
  420. int te;
  421. mode_t ou = umask(0);
  422. tfd = mkstemp(buf);
  423. (void)umask(ou);
  424. te = errno;
  425. (void)unlink(buf);
  426. errno = te;
  427. }
  428. #endif
  429. if (tfd == -1) {
  430. file_error(ms, errno,
  431. "cannot create temporary file for pipe copy");
  432. return -1;
  433. }
  434. if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
  435. r = 1;
  436. else {
  437. while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
  438. if (swrite(tfd, buf, CAST(size_t, r)) != r)
  439. break;
  440. }
  441. switch (r) {
  442. case -1:
  443. file_error(ms, errno, "error copying from pipe to temp file");
  444. return -1;
  445. case 0:
  446. break;
  447. default:
  448. file_error(ms, errno, "error while writing to temp file");
  449. return -1;
  450. }
  451. /*
  452. * We duplicate the file descriptor, because fclose on a
  453. * tmpfile will delete the file, but any open descriptors
  454. * can still access the phantom inode.
  455. */
  456. if ((fd = dup2(tfd, fd)) == -1) {
  457. file_error(ms, errno, "could not dup descriptor for temp file");
  458. return -1;
  459. }
  460. (void)close(tfd);
  461. if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
  462. file_badseek(ms);
  463. return -1;
  464. }
  465. return fd;
  466. }
  467. #if HAVE_FORK
  468. #ifdef BUILTIN_DECOMPRESS
  469. #define FHCRC (1 << 1)
  470. #define FEXTRA (1 << 2)
  471. #define FNAME (1 << 3)
  472. #define FCOMMENT (1 << 4)
  473. private int
  474. uncompressgzipped(const unsigned char *old, unsigned char **newch,
  475. size_t bytes_max, size_t *n)
  476. {
  477. unsigned char flg = old[3];
  478. size_t data_start = 10;
  479. if (flg & FEXTRA) {
  480. if (data_start + 1 >= *n)
  481. goto err;
  482. data_start += 2 + old[data_start] + old[data_start + 1] * 256;
  483. }
  484. if (flg & FNAME) {
  485. while(data_start < *n && old[data_start])
  486. data_start++;
  487. data_start++;
  488. }
  489. if (flg & FCOMMENT) {
  490. while(data_start < *n && old[data_start])
  491. data_start++;
  492. data_start++;
  493. }
  494. if (flg & FHCRC)
  495. data_start += 2;
  496. if (data_start >= *n)
  497. goto err;
  498. *n -= data_start;
  499. old += data_start;
  500. return uncompresszlib(old, newch, bytes_max, n, 0);
  501. err:
  502. return makeerror(newch, n, "File too short");
  503. }
  504. private int
  505. uncompresszlib(const unsigned char *old, unsigned char **newch,
  506. size_t bytes_max, size_t *n, int zlib)
  507. {
  508. int rc;
  509. z_stream z;
  510. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  511. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  512. z.next_in = CCAST(Bytef *, old);
  513. z.avail_in = CAST(uint32_t, *n);
  514. z.next_out = *newch;
  515. z.avail_out = CAST(unsigned int, bytes_max);
  516. z.zalloc = Z_NULL;
  517. z.zfree = Z_NULL;
  518. z.opaque = Z_NULL;
  519. /* LINTED bug in header macro */
  520. rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
  521. if (rc != Z_OK)
  522. goto err;
  523. rc = inflate(&z, Z_SYNC_FLUSH);
  524. if (rc != Z_OK && rc != Z_STREAM_END)
  525. goto err;
  526. *n = CAST(size_t, z.total_out);
  527. rc = inflateEnd(&z);
  528. if (rc != Z_OK)
  529. goto err;
  530. /* let's keep the nul-terminate tradition */
  531. (*newch)[*n] = '\0';
  532. return OKDATA;
  533. err:
  534. strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
  535. *n = strlen(RCAST(char *, *newch));
  536. return ERRDATA;
  537. }
  538. #endif
  539. #ifdef BUILTIN_BZLIB
  540. private int
  541. uncompressbzlib(const unsigned char *old, unsigned char **newch,
  542. size_t bytes_max, size_t *n)
  543. {
  544. int rc;
  545. bz_stream bz;
  546. memset(&bz, 0, sizeof(bz));
  547. rc = BZ2_bzDecompressInit(&bz, 0, 0);
  548. if (rc != BZ_OK)
  549. goto err;
  550. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  551. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  552. bz.next_in = CCAST(char *, RCAST(const char *, old));
  553. bz.avail_in = CAST(uint32_t, *n);
  554. bz.next_out = RCAST(char *, *newch);
  555. bz.avail_out = CAST(unsigned int, bytes_max);
  556. rc = BZ2_bzDecompress(&bz);
  557. if (rc != BZ_OK && rc != BZ_STREAM_END)
  558. goto err;
  559. /* Assume byte_max is within 32bit */
  560. /* assert(bz.total_out_hi32 == 0); */
  561. *n = CAST(size_t, bz.total_out_lo32);
  562. rc = BZ2_bzDecompressEnd(&bz);
  563. if (rc != BZ_OK)
  564. goto err;
  565. /* let's keep the nul-terminate tradition */
  566. (*newch)[*n] = '\0';
  567. return OKDATA;
  568. err:
  569. snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
  570. *n = strlen(RCAST(char *, *newch));
  571. return ERRDATA;
  572. }
  573. #endif
  574. #ifdef BUILTIN_XZLIB
  575. private int
  576. uncompressxzlib(const unsigned char *old, unsigned char **newch,
  577. size_t bytes_max, size_t *n)
  578. {
  579. int rc;
  580. lzma_stream xz;
  581. memset(&xz, 0, sizeof(xz));
  582. rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
  583. if (rc != LZMA_OK)
  584. goto err;
  585. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  586. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  587. xz.next_in = CCAST(const uint8_t *, old);
  588. xz.avail_in = CAST(uint32_t, *n);
  589. xz.next_out = RCAST(uint8_t *, *newch);
  590. xz.avail_out = CAST(unsigned int, bytes_max);
  591. rc = lzma_code(&xz, LZMA_RUN);
  592. if (rc != LZMA_OK && rc != LZMA_STREAM_END)
  593. goto err;
  594. *n = CAST(size_t, xz.total_out);
  595. lzma_end(&xz);
  596. /* let's keep the nul-terminate tradition */
  597. (*newch)[*n] = '\0';
  598. return OKDATA;
  599. err:
  600. snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
  601. *n = strlen(RCAST(char *, *newch));
  602. return ERRDATA;
  603. }
  604. #endif
  605. static int
  606. makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
  607. {
  608. char *msg;
  609. va_list ap;
  610. int rv;
  611. va_start(ap, fmt);
  612. rv = vasprintf(&msg, fmt, ap);
  613. va_end(ap);
  614. if (rv < 0) {
  615. *buf = NULL;
  616. *len = 0;
  617. return NODATA;
  618. }
  619. *buf = RCAST(unsigned char *, msg);
  620. *len = strlen(msg);
  621. return ERRDATA;
  622. }
  623. static void
  624. closefd(int *fd, size_t i)
  625. {
  626. if (fd[i] == -1)
  627. return;
  628. (void) close(fd[i]);
  629. fd[i] = -1;
  630. }
  631. static void
  632. closep(int *fd)
  633. {
  634. size_t i;
  635. for (i = 0; i < 2; i++)
  636. closefd(fd, i);
  637. }
  638. static int
  639. copydesc(int i, int fd)
  640. {
  641. if (fd == i)
  642. return 0; /* "no dup was necessary" */
  643. if (dup2(fd, i) == -1) {
  644. DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
  645. exit(1);
  646. }
  647. return 1;
  648. }
  649. static pid_t
  650. writechild(int fd, const void *old, size_t n)
  651. {
  652. pid_t pid;
  653. /*
  654. * fork again, to avoid blocking because both
  655. * pipes filled
  656. */
  657. pid = fork();
  658. if (pid == -1) {
  659. DPRINTF("Fork failed (%s)\n", strerror(errno));
  660. exit(1);
  661. }
  662. if (pid == 0) {
  663. /* child */
  664. if (swrite(fd, old, n) != CAST(ssize_t, n)) {
  665. DPRINTF("Write failed (%s)\n", strerror(errno));
  666. exit(1);
  667. }
  668. exit(0);
  669. }
  670. /* parent */
  671. return pid;
  672. }
  673. static ssize_t
  674. filter_error(unsigned char *ubuf, ssize_t n)
  675. {
  676. char *p;
  677. char *buf;
  678. ubuf[n] = '\0';
  679. buf = RCAST(char *, ubuf);
  680. while (isspace(CAST(unsigned char, *buf)))
  681. buf++;
  682. DPRINTF("Filter error[[[%s]]]\n", buf);
  683. if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
  684. *p = '\0';
  685. if ((p = strchr(CAST(char *, buf), ';')) != NULL)
  686. *p = '\0';
  687. if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
  688. ++p;
  689. while (isspace(CAST(unsigned char, *p)))
  690. p++;
  691. n = strlen(p);
  692. memmove(ubuf, p, CAST(size_t, n + 1));
  693. }
  694. DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
  695. if (islower(*ubuf))
  696. *ubuf = toupper(*ubuf);
  697. return n;
  698. }
  699. private const char *
  700. methodname(size_t method)
  701. {
  702. switch (method) {
  703. #ifdef BUILTIN_DECOMPRESS
  704. case METH_FROZEN:
  705. case METH_ZLIB:
  706. return "zlib";
  707. #endif
  708. #ifdef BUILTIN_BZLIB
  709. case METH_BZIP:
  710. return "bzlib";
  711. #endif
  712. #ifdef BUILTIN_XZLIB
  713. case METH_XZ:
  714. case METH_LZMA:
  715. return "xzlib";
  716. #endif
  717. default:
  718. return compr[method].argv[0];
  719. }
  720. }
  721. private int
  722. uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
  723. unsigned char **newch, size_t* n)
  724. {
  725. int fdp[3][2];
  726. int status, rv, w;
  727. pid_t pid;
  728. pid_t writepid = -1;
  729. size_t i;
  730. ssize_t r;
  731. switch (method) {
  732. #ifdef BUILTIN_DECOMPRESS
  733. case METH_FROZEN:
  734. return uncompressgzipped(old, newch, bytes_max, n);
  735. case METH_ZLIB:
  736. return uncompresszlib(old, newch, bytes_max, n, 1);
  737. #endif
  738. #ifdef BUILTIN_BZLIB
  739. case METH_BZIP:
  740. return uncompressbzlib(old, newch, bytes_max, n);
  741. #endif
  742. #ifdef BUILTIN_XZLIB
  743. case METH_XZ:
  744. case METH_LZMA:
  745. return uncompressxzlib(old, newch, bytes_max, n);
  746. #endif
  747. default:
  748. break;
  749. }
  750. (void)fflush(stdout);
  751. (void)fflush(stderr);
  752. for (i = 0; i < __arraycount(fdp); i++)
  753. fdp[i][0] = fdp[i][1] = -1;
  754. if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
  755. pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
  756. closep(fdp[STDIN_FILENO]);
  757. closep(fdp[STDOUT_FILENO]);
  758. return makeerror(newch, n, "Cannot create pipe, %s",
  759. strerror(errno));
  760. }
  761. /* For processes with large mapped virtual sizes, vfork
  762. * may be _much_ faster (10-100 times) than fork.
  763. */
  764. pid = vfork();
  765. if (pid == -1) {
  766. return makeerror(newch, n, "Cannot vfork, %s",
  767. strerror(errno));
  768. }
  769. if (pid == 0) {
  770. /* child */
  771. /* Note: we are after vfork, do not modify memory
  772. * in a way which confuses parent. In particular,
  773. * do not modify fdp[i][j].
  774. */
  775. if (fd != -1) {
  776. (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
  777. if (copydesc(STDIN_FILENO, fd))
  778. (void) close(fd);
  779. } else {
  780. if (copydesc(STDIN_FILENO, fdp[STDIN_FILENO][0]))
  781. (void) close(fdp[STDIN_FILENO][0]);
  782. if (fdp[STDIN_FILENO][1] > 2)
  783. (void) close(fdp[STDIN_FILENO][1]);
  784. }
  785. ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
  786. if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
  787. (void) close(fdp[STDOUT_FILENO][1]);
  788. if (fdp[STDOUT_FILENO][0] > 2)
  789. (void) close(fdp[STDOUT_FILENO][0]);
  790. if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
  791. (void) close(fdp[STDERR_FILENO][1]);
  792. if (fdp[STDERR_FILENO][0] > 2)
  793. (void) close(fdp[STDERR_FILENO][0]);
  794. (void)execvp(compr[method].argv[0],
  795. RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
  796. dprintf(STDERR_FILENO, "exec `%s' failed, %s",
  797. compr[method].argv[0], strerror(errno));
  798. _exit(1); /* _exit(), not exit(), because of vfork */
  799. }
  800. /* parent */
  801. /* Close write sides of child stdout/err pipes */
  802. for (i = 1; i < __arraycount(fdp); i++)
  803. closefd(fdp[i], 1);
  804. /* Write the buffer data to child stdin, if we don't have fd */
  805. if (fd == -1) {
  806. closefd(fdp[STDIN_FILENO], 0);
  807. writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
  808. closefd(fdp[STDIN_FILENO], 1);
  809. }
  810. *newch = CAST(unsigned char *, malloc(bytes_max + 1));
  811. if (*newch == NULL) {
  812. rv = makeerror(newch, n, "No buffer, %s",
  813. strerror(errno));
  814. goto err;
  815. }
  816. rv = OKDATA;
  817. r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
  818. if (r <= 0) {
  819. DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
  820. r != -1 ? strerror(errno) : "no data");
  821. rv = ERRDATA;
  822. if (r == 0 &&
  823. (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
  824. {
  825. r = filter_error(*newch, r);
  826. goto ok;
  827. }
  828. free(*newch);
  829. if (r == 0)
  830. rv = makeerror(newch, n, "Read failed, %s",
  831. strerror(errno));
  832. else
  833. rv = makeerror(newch, n, "No data");
  834. goto err;
  835. }
  836. ok:
  837. *n = r;
  838. /* NUL terminate, as every buffer is handled here. */
  839. (*newch)[*n] = '\0';
  840. err:
  841. closefd(fdp[STDIN_FILENO], 1);
  842. closefd(fdp[STDOUT_FILENO], 0);
  843. closefd(fdp[STDERR_FILENO], 0);
  844. w = waitpid(pid, &status, 0);
  845. wait_err:
  846. if (w == -1) {
  847. free(*newch);
  848. rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
  849. DPRINTF("Child wait return %#x\n", status);
  850. } else if (!WIFEXITED(status)) {
  851. DPRINTF("Child not exited (%#x)\n", status);
  852. } else if (WEXITSTATUS(status) != 0) {
  853. DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
  854. }
  855. if (writepid > 0) {
  856. /* _After_ we know decompressor has exited, our input writer
  857. * definitely will exit now (at worst, writing fails in it,
  858. * since output fd is closed now on the reading size).
  859. */
  860. w = waitpid(writepid, &status, 0);
  861. writepid = -1;
  862. goto wait_err;
  863. }
  864. closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
  865. DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
  866. return rv;
  867. }
  868. #endif