compress.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969
  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * compress routines:
  30. * zmagic() - returns 0 if not recognized, uncompresses and prints
  31. * information if recognized
  32. * uncompress(method, old, n, newch) - uncompress old into new,
  33. * using method, return sizeof new
  34. */
  35. #include "file.h"
  36. #ifndef lint
  37. FILE_RCSID("@(#)$File: compress.c,v 1.127 2020/05/31 00:11:06 christos Exp $")
  38. #endif
  39. #include "magic.h"
  40. #include <stdlib.h>
  41. #ifdef HAVE_UNISTD_H
  42. #include <unistd.h>
  43. #endif
  44. #include <string.h>
  45. #include <errno.h>
  46. #include <ctype.h>
  47. #include <stdarg.h>
  48. #include <signal.h>
  49. #ifndef HAVE_SIG_T
  50. typedef void (*sig_t)(int);
  51. #endif /* HAVE_SIG_T */
  52. #if !defined(__MINGW32__) && !defined(WIN32) && !defined(__MINGW64__)
  53. #include <sys/ioctl.h>
  54. #endif
  55. #ifdef HAVE_SYS_WAIT_H
  56. #include <sys/wait.h>
  57. #endif
  58. #if defined(HAVE_SYS_TIME_H)
  59. #include <sys/time.h>
  60. #endif
  61. #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
  62. #define BUILTIN_DECOMPRESS
  63. #include <zlib.h>
  64. #endif
  65. #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
  66. #define BUILTIN_BZLIB
  67. #include <bzlib.h>
  68. #endif
  69. #if defined(HAVE_XZLIB_H) && defined(XZLIBSUPPORT)
  70. #define BUILTIN_XZLIB
  71. #include <lzma.h>
  72. #endif
  73. #ifdef DEBUG
  74. int tty = -1;
  75. #define DPRINTF(...) do { \
  76. if (tty == -1) \
  77. tty = open("/dev/tty", O_RDWR); \
  78. if (tty == -1) \
  79. abort(); \
  80. dprintf(tty, __VA_ARGS__); \
  81. } while (/*CONSTCOND*/0)
  82. #else
  83. #define DPRINTF(...)
  84. #endif
  85. #ifdef ZLIBSUPPORT
  86. /*
  87. * The following python code is not really used because ZLIBSUPPORT is only
  88. * defined if we have a built-in zlib, and the built-in zlib handles that.
  89. * That is not true for android where we have zlib.h and not -lz.
  90. */
  91. static const char zlibcode[] =
  92. "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
  93. static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
  94. static int
  95. zlibcmp(const unsigned char *buf)
  96. {
  97. unsigned short x = 1;
  98. unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
  99. if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
  100. return 0;
  101. if (s[0] != 1) /* endianness test */
  102. x = buf[0] | (buf[1] << 8);
  103. else
  104. x = buf[1] | (buf[0] << 8);
  105. if (x % 31)
  106. return 0;
  107. return 1;
  108. }
  109. #endif
  110. static int
  111. lzmacmp(const unsigned char *buf)
  112. {
  113. if (buf[0] != 0x5d || buf[1] || buf[2])
  114. return 0;
  115. if (buf[12] && buf[12] != 0xff)
  116. return 0;
  117. return 1;
  118. }
  119. #define gzip_flags "-cd"
  120. #define lrzip_flags "-do"
  121. #define lzip_flags gzip_flags
  122. static const char *gzip_args[] = {
  123. "gzip", gzip_flags, NULL
  124. };
  125. static const char *uncompress_args[] = {
  126. "uncompress", "-c", NULL
  127. };
  128. static const char *bzip2_args[] = {
  129. "bzip2", "-cd", NULL
  130. };
  131. static const char *lzip_args[] = {
  132. "lzip", lzip_flags, NULL
  133. };
  134. static const char *xz_args[] = {
  135. "xz", "-cd", NULL
  136. };
  137. static const char *lrzip_args[] = {
  138. "lrzip", lrzip_flags, NULL
  139. };
  140. static const char *lz4_args[] = {
  141. "lz4", "-cd", NULL
  142. };
  143. static const char *zstd_args[] = {
  144. "zstd", "-cd", NULL
  145. };
  146. #define do_zlib NULL
  147. #define do_bzlib NULL
  148. private const struct {
  149. union {
  150. const char *magic;
  151. int (*func)(const unsigned char *);
  152. } u;
  153. int maglen;
  154. const char **argv;
  155. void *unused;
  156. } compr[] = {
  157. #define METH_FROZEN 2
  158. #define METH_BZIP 7
  159. #define METH_XZ 9
  160. #define METH_LZMA 13
  161. #define METH_ZLIB 14
  162. { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
  163. /* Uncompress can get stuck; so use gzip first if we have it
  164. * Idea from Damien Clark, thanks! */
  165. { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
  166. { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
  167. { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
  168. { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
  169. /* the standard pack utilities do not accept standard input */
  170. { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
  171. { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
  172. /* ...only first file examined */
  173. { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
  174. { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
  175. { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
  176. { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
  177. { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
  178. { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
  179. { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
  180. #ifdef ZLIBSUPPORT
  181. { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
  182. #endif
  183. };
  184. #define OKDATA 0
  185. #define NODATA 1
  186. #define ERRDATA 2
  187. private ssize_t swrite(int, const void *, size_t);
  188. #if HAVE_FORK
  189. private size_t ncompr = __arraycount(compr);
  190. private int uncompressbuf(int, size_t, size_t, const unsigned char *,
  191. unsigned char **, size_t *);
  192. #ifdef BUILTIN_DECOMPRESS
  193. private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
  194. size_t *, int);
  195. private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
  196. size_t *);
  197. #endif
  198. #ifdef BUILTIN_BZLIB
  199. private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
  200. size_t *);
  201. #endif
  202. #ifdef BUILTIN_XZLIB
  203. private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
  204. size_t *);
  205. #endif
  206. static int makeerror(unsigned char **, size_t *, const char *, ...)
  207. __attribute__((__format__(__printf__, 3, 4)));
  208. private const char *methodname(size_t);
  209. private int
  210. format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
  211. {
  212. unsigned char *p;
  213. int mime = ms->flags & MAGIC_MIME;
  214. if (!mime)
  215. return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
  216. for (p = buf; *p; p++)
  217. if (!isalnum(*p))
  218. *p = '-';
  219. return file_printf(ms, "application/x-decompression-error-%s-%s",
  220. methodname(i), buf);
  221. }
  222. protected int
  223. file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
  224. {
  225. unsigned char *newbuf = NULL;
  226. size_t i, nsz;
  227. char *rbuf;
  228. file_pushbuf_t *pb;
  229. int urv, prv, rv = 0;
  230. int mime = ms->flags & MAGIC_MIME;
  231. int fd = b->fd;
  232. const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
  233. size_t nbytes = b->flen;
  234. int sa_saved = 0;
  235. struct sigaction sig_act;
  236. if ((ms->flags & MAGIC_COMPRESS) == 0)
  237. return 0;
  238. for (i = 0; i < ncompr; i++) {
  239. int zm;
  240. if (nbytes < CAST(size_t, abs(compr[i].maglen)))
  241. continue;
  242. if (compr[i].maglen < 0) {
  243. zm = (*compr[i].u.func)(buf);
  244. } else {
  245. zm = memcmp(buf, compr[i].u.magic,
  246. CAST(size_t, compr[i].maglen)) == 0;
  247. }
  248. if (!zm)
  249. continue;
  250. /* Prevent SIGPIPE death if child dies unexpectedly */
  251. if (!sa_saved) {
  252. //We can use sig_act for both new and old, but
  253. struct sigaction new_act;
  254. memset(&new_act, 0, sizeof(new_act));
  255. new_act.sa_handler = SIG_IGN;
  256. sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
  257. }
  258. nsz = nbytes;
  259. urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
  260. DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
  261. (char *)newbuf, nsz);
  262. switch (urv) {
  263. case OKDATA:
  264. case ERRDATA:
  265. ms->flags &= ~MAGIC_COMPRESS;
  266. if (urv == ERRDATA)
  267. prv = format_decompression_error(ms, i, newbuf);
  268. else
  269. prv = file_buffer(ms, -1, NULL, name, newbuf, nsz);
  270. if (prv == -1)
  271. goto error;
  272. rv = 1;
  273. if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
  274. goto out;
  275. if (mime != MAGIC_MIME && mime != 0)
  276. goto out;
  277. if ((file_printf(ms,
  278. mime ? " compressed-encoding=" : " (")) == -1)
  279. goto error;
  280. if ((pb = file_push_buffer(ms)) == NULL)
  281. goto error;
  282. /*
  283. * XXX: If file_buffer fails here, we overwrite
  284. * the compressed text. FIXME.
  285. */
  286. if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) {
  287. if (file_pop_buffer(ms, pb) != NULL)
  288. abort();
  289. goto error;
  290. }
  291. if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
  292. if (file_printf(ms, "%s", rbuf) == -1) {
  293. free(rbuf);
  294. goto error;
  295. }
  296. free(rbuf);
  297. }
  298. if (!mime && file_printf(ms, ")") == -1)
  299. goto error;
  300. /*FALLTHROUGH*/
  301. case NODATA:
  302. break;
  303. default:
  304. abort();
  305. /*NOTREACHED*/
  306. error:
  307. rv = -1;
  308. break;
  309. }
  310. }
  311. out:
  312. DPRINTF("rv = %d\n", rv);
  313. if (sa_saved && sig_act.sa_handler != SIG_IGN)
  314. (void)sigaction(SIGPIPE, &sig_act, NULL);
  315. free(newbuf);
  316. ms->flags |= MAGIC_COMPRESS;
  317. DPRINTF("Zmagic returns %d\n", rv);
  318. return rv;
  319. }
  320. #endif
  321. /*
  322. * `safe' write for sockets and pipes.
  323. */
  324. private ssize_t
  325. swrite(int fd, const void *buf, size_t n)
  326. {
  327. ssize_t rv;
  328. size_t rn = n;
  329. do
  330. switch (rv = write(fd, buf, n)) {
  331. case -1:
  332. if (errno == EINTR)
  333. continue;
  334. return -1;
  335. default:
  336. n -= rv;
  337. buf = CAST(const char *, buf) + rv;
  338. break;
  339. }
  340. while (n > 0);
  341. return rn;
  342. }
  343. /*
  344. * `safe' read for sockets and pipes.
  345. */
  346. protected ssize_t
  347. sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
  348. {
  349. ssize_t rv;
  350. #ifdef FIONREAD
  351. int t = 0;
  352. #endif
  353. size_t rn = n;
  354. if (fd == STDIN_FILENO)
  355. goto nocheck;
  356. #ifdef FIONREAD
  357. if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
  358. #ifdef FD_ZERO
  359. ssize_t cnt;
  360. for (cnt = 0;; cnt++) {
  361. fd_set check;
  362. struct timeval tout = {0, 100 * 1000};
  363. int selrv;
  364. FD_ZERO(&check);
  365. FD_SET(fd, &check);
  366. /*
  367. * Avoid soft deadlock: do not read if there
  368. * is nothing to read from sockets and pipes.
  369. */
  370. selrv = select(fd + 1, &check, NULL, NULL, &tout);
  371. if (selrv == -1) {
  372. if (errno == EINTR || errno == EAGAIN)
  373. continue;
  374. } else if (selrv == 0 && cnt >= 5) {
  375. return 0;
  376. } else
  377. break;
  378. }
  379. #endif
  380. (void)ioctl(fd, FIONREAD, &t);
  381. }
  382. if (t > 0 && CAST(size_t, t) < n) {
  383. n = t;
  384. rn = n;
  385. }
  386. #endif
  387. nocheck:
  388. do
  389. switch ((rv = read(fd, buf, n))) {
  390. case -1:
  391. if (errno == EINTR)
  392. continue;
  393. return -1;
  394. case 0:
  395. return rn - n;
  396. default:
  397. n -= rv;
  398. buf = CAST(char *, CCAST(void *, buf)) + rv;
  399. break;
  400. }
  401. while (n > 0);
  402. return rn;
  403. }
  404. protected int
  405. file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
  406. size_t nbytes)
  407. {
  408. char buf[4096];
  409. ssize_t r;
  410. int tfd;
  411. (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
  412. #ifndef HAVE_MKSTEMP
  413. {
  414. char *ptr = mktemp(buf);
  415. tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
  416. r = errno;
  417. (void)unlink(ptr);
  418. errno = r;
  419. }
  420. #else
  421. {
  422. int te;
  423. mode_t ou = umask(0);
  424. tfd = mkstemp(buf);
  425. (void)umask(ou);
  426. te = errno;
  427. (void)unlink(buf);
  428. errno = te;
  429. }
  430. #endif
  431. if (tfd == -1) {
  432. file_error(ms, errno,
  433. "cannot create temporary file for pipe copy");
  434. return -1;
  435. }
  436. if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
  437. r = 1;
  438. else {
  439. while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
  440. if (swrite(tfd, buf, CAST(size_t, r)) != r)
  441. break;
  442. }
  443. switch (r) {
  444. case -1:
  445. file_error(ms, errno, "error copying from pipe to temp file");
  446. return -1;
  447. case 0:
  448. break;
  449. default:
  450. file_error(ms, errno, "error while writing to temp file");
  451. return -1;
  452. }
  453. /*
  454. * We duplicate the file descriptor, because fclose on a
  455. * tmpfile will delete the file, but any open descriptors
  456. * can still access the phantom inode.
  457. */
  458. if ((fd = dup2(tfd, fd)) == -1) {
  459. file_error(ms, errno, "could not dup descriptor for temp file");
  460. return -1;
  461. }
  462. (void)close(tfd);
  463. if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
  464. file_badseek(ms);
  465. return -1;
  466. }
  467. return fd;
  468. }
  469. #if HAVE_FORK
  470. #ifdef BUILTIN_DECOMPRESS
  471. #define FHCRC (1 << 1)
  472. #define FEXTRA (1 << 2)
  473. #define FNAME (1 << 3)
  474. #define FCOMMENT (1 << 4)
  475. private int
  476. uncompressgzipped(const unsigned char *old, unsigned char **newch,
  477. size_t bytes_max, size_t *n)
  478. {
  479. unsigned char flg = old[3];
  480. size_t data_start = 10;
  481. if (flg & FEXTRA) {
  482. if (data_start + 1 >= *n)
  483. goto err;
  484. data_start += 2 + old[data_start] + old[data_start + 1] * 256;
  485. }
  486. if (flg & FNAME) {
  487. while(data_start < *n && old[data_start])
  488. data_start++;
  489. data_start++;
  490. }
  491. if (flg & FCOMMENT) {
  492. while(data_start < *n && old[data_start])
  493. data_start++;
  494. data_start++;
  495. }
  496. if (flg & FHCRC)
  497. data_start += 2;
  498. if (data_start >= *n)
  499. goto err;
  500. *n -= data_start;
  501. old += data_start;
  502. return uncompresszlib(old, newch, bytes_max, n, 0);
  503. err:
  504. return makeerror(newch, n, "File too short");
  505. }
  506. private int
  507. uncompresszlib(const unsigned char *old, unsigned char **newch,
  508. size_t bytes_max, size_t *n, int zlib)
  509. {
  510. int rc;
  511. z_stream z;
  512. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  513. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  514. z.next_in = CCAST(Bytef *, old);
  515. z.avail_in = CAST(uint32_t, *n);
  516. z.next_out = *newch;
  517. z.avail_out = CAST(unsigned int, bytes_max);
  518. z.zalloc = Z_NULL;
  519. z.zfree = Z_NULL;
  520. z.opaque = Z_NULL;
  521. /* LINTED bug in header macro */
  522. rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
  523. if (rc != Z_OK)
  524. goto err;
  525. rc = inflate(&z, Z_SYNC_FLUSH);
  526. if (rc != Z_OK && rc != Z_STREAM_END)
  527. goto err;
  528. *n = CAST(size_t, z.total_out);
  529. rc = inflateEnd(&z);
  530. if (rc != Z_OK)
  531. goto err;
  532. /* let's keep the nul-terminate tradition */
  533. (*newch)[*n] = '\0';
  534. return OKDATA;
  535. err:
  536. strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
  537. *n = strlen(RCAST(char *, *newch));
  538. return ERRDATA;
  539. }
  540. #endif
  541. #ifdef BUILTIN_BZLIB
  542. private int
  543. uncompressbzlib(const unsigned char *old, unsigned char **newch,
  544. size_t bytes_max, size_t *n)
  545. {
  546. int rc;
  547. bz_stream bz;
  548. memset(&bz, 0, sizeof(bz));
  549. rc = BZ2_bzDecompressInit(&bz, 0, 0);
  550. if (rc != BZ_OK)
  551. goto err;
  552. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  553. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  554. bz.next_in = CCAST(char *, RCAST(const char *, old));
  555. bz.avail_in = CAST(uint32_t, *n);
  556. bz.next_out = RCAST(char *, *newch);
  557. bz.avail_out = CAST(unsigned int, bytes_max);
  558. rc = BZ2_bzDecompress(&bz);
  559. if (rc != BZ_OK && rc != BZ_STREAM_END)
  560. goto err;
  561. /* Assume byte_max is within 32bit */
  562. /* assert(bz.total_out_hi32 == 0); */
  563. *n = CAST(size_t, bz.total_out_lo32);
  564. rc = BZ2_bzDecompressEnd(&bz);
  565. if (rc != BZ_OK)
  566. goto err;
  567. /* let's keep the nul-terminate tradition */
  568. (*newch)[*n] = '\0';
  569. return OKDATA;
  570. err:
  571. snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
  572. *n = strlen(RCAST(char *, *newch));
  573. return ERRDATA;
  574. }
  575. #endif
  576. #ifdef BUILTIN_XZLIB
  577. private int
  578. uncompressxzlib(const unsigned char *old, unsigned char **newch,
  579. size_t bytes_max, size_t *n)
  580. {
  581. int rc;
  582. lzma_stream xz;
  583. memset(&xz, 0, sizeof(xz));
  584. rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
  585. if (rc != LZMA_OK)
  586. goto err;
  587. if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
  588. return makeerror(newch, n, "No buffer, %s", strerror(errno));
  589. xz.next_in = CCAST(const uint8_t *, old);
  590. xz.avail_in = CAST(uint32_t, *n);
  591. xz.next_out = RCAST(uint8_t *, *newch);
  592. xz.avail_out = CAST(unsigned int, bytes_max);
  593. rc = lzma_code(&xz, LZMA_RUN);
  594. if (rc != LZMA_OK && rc != LZMA_STREAM_END)
  595. goto err;
  596. *n = CAST(size_t, xz.total_out);
  597. lzma_end(&xz);
  598. /* let's keep the nul-terminate tradition */
  599. (*newch)[*n] = '\0';
  600. return OKDATA;
  601. err:
  602. snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
  603. *n = strlen(RCAST(char *, *newch));
  604. return ERRDATA;
  605. }
  606. #endif
  607. static int
  608. makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
  609. {
  610. char *msg;
  611. va_list ap;
  612. int rv;
  613. va_start(ap, fmt);
  614. rv = vasprintf(&msg, fmt, ap);
  615. va_end(ap);
  616. if (rv < 0) {
  617. *buf = NULL;
  618. *len = 0;
  619. return NODATA;
  620. }
  621. *buf = RCAST(unsigned char *, msg);
  622. *len = strlen(msg);
  623. return ERRDATA;
  624. }
  625. static void
  626. closefd(int *fd, size_t i)
  627. {
  628. if (fd[i] == -1)
  629. return;
  630. (void) close(fd[i]);
  631. fd[i] = -1;
  632. }
  633. static void
  634. closep(int *fd)
  635. {
  636. size_t i;
  637. for (i = 0; i < 2; i++)
  638. closefd(fd, i);
  639. }
  640. static int
  641. copydesc(int i, int fd)
  642. {
  643. if (fd == i)
  644. return 0; /* "no dup was necessary" */
  645. if (dup2(fd, i) == -1) {
  646. DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
  647. exit(1);
  648. }
  649. return 1;
  650. }
  651. static pid_t
  652. writechild(int fd, const void *old, size_t n)
  653. {
  654. pid_t pid;
  655. /*
  656. * fork again, to avoid blocking because both
  657. * pipes filled
  658. */
  659. pid = fork();
  660. if (pid == -1) {
  661. DPRINTF("Fork failed (%s)\n", strerror(errno));
  662. exit(1);
  663. }
  664. if (pid == 0) {
  665. /* child */
  666. if (swrite(fd, old, n) != CAST(ssize_t, n)) {
  667. DPRINTF("Write failed (%s)\n", strerror(errno));
  668. exit(1);
  669. }
  670. exit(0);
  671. }
  672. /* parent */
  673. return pid;
  674. }
  675. static ssize_t
  676. filter_error(unsigned char *ubuf, ssize_t n)
  677. {
  678. char *p;
  679. char *buf;
  680. ubuf[n] = '\0';
  681. buf = RCAST(char *, ubuf);
  682. while (isspace(CAST(unsigned char, *buf)))
  683. buf++;
  684. DPRINTF("Filter error[[[%s]]]\n", buf);
  685. if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
  686. *p = '\0';
  687. if ((p = strchr(CAST(char *, buf), ';')) != NULL)
  688. *p = '\0';
  689. if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
  690. ++p;
  691. while (isspace(CAST(unsigned char, *p)))
  692. p++;
  693. n = strlen(p);
  694. memmove(ubuf, p, CAST(size_t, n + 1));
  695. }
  696. DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
  697. if (islower(*ubuf))
  698. *ubuf = toupper(*ubuf);
  699. return n;
  700. }
  701. private const char *
  702. methodname(size_t method)
  703. {
  704. switch (method) {
  705. #ifdef BUILTIN_DECOMPRESS
  706. case METH_FROZEN:
  707. case METH_ZLIB:
  708. return "zlib";
  709. #endif
  710. #ifdef BUILTIN_BZLIB
  711. case METH_BZIP:
  712. return "bzlib";
  713. #endif
  714. #ifdef BUILTIN_XZLIB
  715. case METH_XZ:
  716. case METH_LZMA:
  717. return "xzlib";
  718. #endif
  719. default:
  720. return compr[method].argv[0];
  721. }
  722. }
  723. private int
  724. uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
  725. unsigned char **newch, size_t* n)
  726. {
  727. int fdp[3][2];
  728. int status, rv, w;
  729. pid_t pid;
  730. pid_t writepid = -1;
  731. size_t i;
  732. ssize_t r;
  733. switch (method) {
  734. #ifdef BUILTIN_DECOMPRESS
  735. case METH_FROZEN:
  736. return uncompressgzipped(old, newch, bytes_max, n);
  737. case METH_ZLIB:
  738. return uncompresszlib(old, newch, bytes_max, n, 1);
  739. #endif
  740. #ifdef BUILTIN_BZLIB
  741. case METH_BZIP:
  742. return uncompressbzlib(old, newch, bytes_max, n);
  743. #endif
  744. #ifdef BUILTIN_XZLIB
  745. case METH_XZ:
  746. case METH_LZMA:
  747. return uncompressxzlib(old, newch, bytes_max, n);
  748. #endif
  749. default:
  750. break;
  751. }
  752. (void)fflush(stdout);
  753. (void)fflush(stderr);
  754. for (i = 0; i < __arraycount(fdp); i++)
  755. fdp[i][0] = fdp[i][1] = -1;
  756. if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
  757. pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
  758. closep(fdp[STDIN_FILENO]);
  759. closep(fdp[STDOUT_FILENO]);
  760. return makeerror(newch, n, "Cannot create pipe, %s",
  761. strerror(errno));
  762. }
  763. /* For processes with large mapped virtual sizes, vfork
  764. * may be _much_ faster (10-100 times) than fork.
  765. */
  766. pid = vfork();
  767. if (pid == -1) {
  768. return makeerror(newch, n, "Cannot vfork, %s",
  769. strerror(errno));
  770. }
  771. if (pid == 0) {
  772. /* child */
  773. /* Note: we are after vfork, do not modify memory
  774. * in a way which confuses parent. In particular,
  775. * do not modify fdp[i][j].
  776. */
  777. if (fd != -1) {
  778. (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
  779. if (copydesc(STDIN_FILENO, fd))
  780. (void) close(fd);
  781. } else {
  782. if (copydesc(STDIN_FILENO, fdp[STDIN_FILENO][0]))
  783. (void) close(fdp[STDIN_FILENO][0]);
  784. if (fdp[STDIN_FILENO][1] > 2)
  785. (void) close(fdp[STDIN_FILENO][1]);
  786. }
  787. ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
  788. if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
  789. (void) close(fdp[STDOUT_FILENO][1]);
  790. if (fdp[STDOUT_FILENO][0] > 2)
  791. (void) close(fdp[STDOUT_FILENO][0]);
  792. if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
  793. (void) close(fdp[STDERR_FILENO][1]);
  794. if (fdp[STDERR_FILENO][0] > 2)
  795. (void) close(fdp[STDERR_FILENO][0]);
  796. (void)execvp(compr[method].argv[0],
  797. RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
  798. dprintf(STDERR_FILENO, "exec `%s' failed, %s",
  799. compr[method].argv[0], strerror(errno));
  800. _exit(1); /* _exit(), not exit(), because of vfork */
  801. }
  802. /* parent */
  803. /* Close write sides of child stdout/err pipes */
  804. for (i = 1; i < __arraycount(fdp); i++)
  805. closefd(fdp[i], 1);
  806. /* Write the buffer data to child stdin, if we don't have fd */
  807. if (fd == -1) {
  808. closefd(fdp[STDIN_FILENO], 0);
  809. writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
  810. closefd(fdp[STDIN_FILENO], 1);
  811. }
  812. *newch = CAST(unsigned char *, malloc(bytes_max + 1));
  813. if (*newch == NULL) {
  814. rv = makeerror(newch, n, "No buffer, %s",
  815. strerror(errno));
  816. goto err;
  817. }
  818. rv = OKDATA;
  819. r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
  820. if (r <= 0) {
  821. DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
  822. r != -1 ? strerror(errno) : "no data");
  823. rv = ERRDATA;
  824. if (r == 0 &&
  825. (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
  826. {
  827. r = filter_error(*newch, r);
  828. goto ok;
  829. }
  830. free(*newch);
  831. if (r == 0)
  832. rv = makeerror(newch, n, "Read failed, %s",
  833. strerror(errno));
  834. else
  835. rv = makeerror(newch, n, "No data");
  836. goto err;
  837. }
  838. ok:
  839. *n = r;
  840. /* NUL terminate, as every buffer is handled here. */
  841. (*newch)[*n] = '\0';
  842. err:
  843. closefd(fdp[STDIN_FILENO], 1);
  844. closefd(fdp[STDOUT_FILENO], 0);
  845. closefd(fdp[STDERR_FILENO], 0);
  846. w = waitpid(pid, &status, 0);
  847. wait_err:
  848. if (w == -1) {
  849. free(*newch);
  850. rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
  851. DPRINTF("Child wait return %#x\n", status);
  852. } else if (!WIFEXITED(status)) {
  853. DPRINTF("Child not exited (%#x)\n", status);
  854. } else if (WEXITSTATUS(status) != 0) {
  855. DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
  856. }
  857. if (writepid > 0) {
  858. /* _After_ we know decompressor has exited, our input writer
  859. * definitely will exit now (at worst, writing fails in it,
  860. * since output fd is closed now on the reading size).
  861. */
  862. w = waitpid(writepid, &status, 0);
  863. writepid = -1;
  864. goto wait_err;
  865. }
  866. closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
  867. DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
  868. return rv;
  869. }
  870. #endif