compress.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * compress routines:
  30. * zmagic() - returns 0 if not recognized, uncompresses and prints
  31. * information if recognized
  32. * uncompress(method, old, n, newch) - uncompress old into new,
  33. * using method, return sizeof new
  34. */
  35. #include "file.h"
  36. #ifndef lint
  37. FILE_RCSID("@(#)$File: compress.c,v 1.64 2009/05/08 17:41:58 christos Exp $")
  38. #endif
  39. #include "magic.h"
  40. #include <stdlib.h>
  41. #ifdef HAVE_UNISTD_H
  42. #include <unistd.h>
  43. #endif
  44. #include <string.h>
  45. #include <errno.h>
  46. #include <sys/ioctl.h>
  47. #ifdef HAVE_SYS_WAIT_H
  48. #include <sys/wait.h>
  49. #endif
  50. #if defined(HAVE_SYS_TIME_H)
  51. #include <sys/time.h>
  52. #endif
  53. #if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
  54. #define BUILTIN_DECOMPRESS
  55. #include <zlib.h>
  56. #endif
  57. private const struct {
  58. const char magic[8];
  59. size_t maglen;
  60. const char *argv[3];
  61. int silent;
  62. } compr[] = {
  63. { "\037\235", 2, { "gzip", "-cdq", NULL }, 1 }, /* compressed */
  64. /* Uncompress can get stuck; so use gzip first if we have it
  65. * Idea from Damien Clark, thanks! */
  66. { "\037\235", 2, { "uncompress", "-c", NULL }, 1 }, /* compressed */
  67. { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 }, /* gzipped */
  68. { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 }, /* frozen */
  69. { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */
  70. /* the standard pack utilities do not accept standard input */
  71. { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */
  72. { "PK\3\4", 4, { "gzip", "-cdq", NULL }, 1 }, /* pkzipped, */
  73. /* ...only first file examined */
  74. { "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */
  75. { "LZIP", 4, { "lzip", "-cdq", NULL }, 1 },
  76. { "\3757zXZ\0",6,{ "xz", "-cd", NULL }, 1 }, /* XZ Utils */
  77. };
  78. private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
  79. #define NODATA ((size_t)~0)
  80. private ssize_t swrite(int, const void *, size_t);
  81. private size_t uncompressbuf(struct magic_set *, int, size_t,
  82. const unsigned char *, unsigned char **, size_t);
  83. #ifdef BUILTIN_DECOMPRESS
  84. private size_t uncompressgzipped(struct magic_set *, const unsigned char *,
  85. unsigned char **, size_t);
  86. #endif
  87. protected int
  88. file_zmagic(struct magic_set *ms, int fd, const char *name,
  89. const unsigned char *buf, size_t nbytes)
  90. {
  91. unsigned char *newbuf = NULL;
  92. size_t i, nsz;
  93. int rv = 0;
  94. int mime = ms->flags & MAGIC_MIME;
  95. if ((ms->flags & MAGIC_COMPRESS) == 0)
  96. return 0;
  97. for (i = 0; i < ncompr; i++) {
  98. if (nbytes < compr[i].maglen)
  99. continue;
  100. if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
  101. (nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
  102. nbytes)) != NODATA) {
  103. ms->flags &= ~MAGIC_COMPRESS;
  104. rv = -1;
  105. if (file_buffer(ms, -1, name, newbuf, nsz) == -1)
  106. goto error;
  107. if (mime == MAGIC_MIME || mime == 0) {
  108. if (file_printf(ms, mime ?
  109. " compressed-encoding=" : " (") == -1)
  110. goto error;
  111. }
  112. if ((mime == 0 || mime & MAGIC_MIME_ENCODING) &&
  113. file_buffer(ms, -1, NULL, buf, nbytes) == -1)
  114. goto error;
  115. if (!mime && file_printf(ms, ")") == -1)
  116. goto error;
  117. rv = 1;
  118. break;
  119. }
  120. }
  121. error:
  122. if (newbuf)
  123. free(newbuf);
  124. ms->flags |= MAGIC_COMPRESS;
  125. return rv;
  126. }
  127. /*
  128. * `safe' write for sockets and pipes.
  129. */
  130. private ssize_t
  131. swrite(int fd, const void *buf, size_t n)
  132. {
  133. ssize_t rv;
  134. size_t rn = n;
  135. do
  136. switch (rv = write(fd, buf, n)) {
  137. case -1:
  138. if (errno == EINTR)
  139. continue;
  140. return -1;
  141. default:
  142. n -= rv;
  143. buf = CAST(const char *, buf) + rv;
  144. break;
  145. }
  146. while (n > 0);
  147. return rn;
  148. }
  149. /*
  150. * `safe' read for sockets and pipes.
  151. */
  152. protected ssize_t
  153. sread(int fd, void *buf, size_t n, int canbepipe)
  154. {
  155. ssize_t rv, cnt;
  156. #ifdef FIONREAD
  157. int t = 0;
  158. #endif
  159. size_t rn = n;
  160. if (fd == STDIN_FILENO)
  161. goto nocheck;
  162. #ifdef FIONREAD
  163. if ((canbepipe && (ioctl(fd, FIONREAD, &t) == -1)) || (t == 0)) {
  164. #ifdef FD_ZERO
  165. for (cnt = 0;; cnt++) {
  166. fd_set check;
  167. struct timeval tout = {0, 100 * 1000};
  168. int selrv;
  169. FD_ZERO(&check);
  170. FD_SET(fd, &check);
  171. /*
  172. * Avoid soft deadlock: do not read if there
  173. * is nothing to read from sockets and pipes.
  174. */
  175. selrv = select(fd + 1, &check, NULL, NULL, &tout);
  176. if (selrv == -1) {
  177. if (errno == EINTR || errno == EAGAIN)
  178. continue;
  179. } else if (selrv == 0 && cnt >= 5) {
  180. return 0;
  181. } else
  182. break;
  183. }
  184. #endif
  185. (void)ioctl(fd, FIONREAD, &t);
  186. }
  187. if (t > 0 && (size_t)t < n) {
  188. n = t;
  189. rn = n;
  190. }
  191. #endif
  192. nocheck:
  193. do
  194. switch ((rv = read(fd, buf, n))) {
  195. case -1:
  196. if (errno == EINTR)
  197. continue;
  198. return -1;
  199. case 0:
  200. return rn - n;
  201. default:
  202. n -= rv;
  203. buf = ((char *)buf) + rv;
  204. break;
  205. }
  206. while (n > 0);
  207. return rn;
  208. }
  209. protected int
  210. file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
  211. size_t nbytes)
  212. {
  213. char buf[4096];
  214. ssize_t r;
  215. int tfd, te;
  216. (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
  217. #ifndef HAVE_MKSTEMP
  218. {
  219. char *ptr = mktemp(buf);
  220. tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
  221. r = errno;
  222. (void)unlink(ptr);
  223. errno = r;
  224. }
  225. #else
  226. tfd = mkstemp(buf);
  227. te = errno;
  228. (void)unlink(buf);
  229. errno = te;
  230. #endif
  231. if (tfd == -1) {
  232. file_error(ms, errno,
  233. "cannot create temporary file for pipe copy");
  234. return -1;
  235. }
  236. if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
  237. r = 1;
  238. else {
  239. while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
  240. if (swrite(tfd, buf, (size_t)r) != r)
  241. break;
  242. }
  243. switch (r) {
  244. case -1:
  245. file_error(ms, errno, "error copying from pipe to temp file");
  246. return -1;
  247. case 0:
  248. break;
  249. default:
  250. file_error(ms, errno, "error while writing to temp file");
  251. return -1;
  252. }
  253. /*
  254. * We duplicate the file descriptor, because fclose on a
  255. * tmpfile will delete the file, but any open descriptors
  256. * can still access the phantom inode.
  257. */
  258. if ((fd = dup2(tfd, fd)) == -1) {
  259. file_error(ms, errno, "could not dup descriptor for temp file");
  260. return -1;
  261. }
  262. (void)close(tfd);
  263. if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
  264. file_badseek(ms);
  265. return -1;
  266. }
  267. return fd;
  268. }
  269. #ifdef BUILTIN_DECOMPRESS
  270. #define FHCRC (1 << 1)
  271. #define FEXTRA (1 << 2)
  272. #define FNAME (1 << 3)
  273. #define FCOMMENT (1 << 4)
  274. private size_t
  275. uncompressgzipped(struct magic_set *ms, const unsigned char *old,
  276. unsigned char **newch, size_t n)
  277. {
  278. unsigned char flg = old[3];
  279. size_t data_start = 10;
  280. z_stream z;
  281. int rc;
  282. if (flg & FEXTRA) {
  283. if (data_start+1 >= n)
  284. return 0;
  285. data_start += 2 + old[data_start] + old[data_start + 1] * 256;
  286. }
  287. if (flg & FNAME) {
  288. while(data_start < n && old[data_start])
  289. data_start++;
  290. data_start++;
  291. }
  292. if(flg & FCOMMENT) {
  293. while(data_start < n && old[data_start])
  294. data_start++;
  295. data_start++;
  296. }
  297. if(flg & FHCRC)
  298. data_start += 2;
  299. if (data_start >= n)
  300. return 0;
  301. if ((*newch = CAST(unsigned char *, malloc(HOWMANY + 1))) == NULL) {
  302. return 0;
  303. }
  304. /* XXX: const castaway, via strchr */
  305. z.next_in = (Bytef *)strchr((const char *)old + data_start,
  306. old[data_start]);
  307. z.avail_in = CAST(uint32_t, (n - data_start));
  308. z.next_out = *newch;
  309. z.avail_out = HOWMANY;
  310. z.zalloc = Z_NULL;
  311. z.zfree = Z_NULL;
  312. z.opaque = Z_NULL;
  313. /* LINTED bug in header macro */
  314. rc = inflateInit2(&z, -15);
  315. if (rc != Z_OK) {
  316. file_error(ms, 0, "zlib: %s", z.msg);
  317. return 0;
  318. }
  319. rc = inflate(&z, Z_SYNC_FLUSH);
  320. if (rc != Z_OK && rc != Z_STREAM_END) {
  321. file_error(ms, 0, "zlib: %s", z.msg);
  322. return 0;
  323. }
  324. n = (size_t)z.total_out;
  325. (void)inflateEnd(&z);
  326. /* let's keep the nul-terminate tradition */
  327. (*newch)[n] = '\0';
  328. return n;
  329. }
  330. #endif
  331. private size_t
  332. uncompressbuf(struct magic_set *ms, int fd, size_t method,
  333. const unsigned char *old, unsigned char **newch, size_t n)
  334. {
  335. int fdin[2], fdout[2];
  336. ssize_t r;
  337. #ifdef BUILTIN_DECOMPRESS
  338. /* FIXME: This doesn't cope with bzip2 */
  339. if (method == 2)
  340. return uncompressgzipped(ms, old, newch, n);
  341. #endif
  342. (void)fflush(stdout);
  343. (void)fflush(stderr);
  344. if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
  345. file_error(ms, errno, "cannot create pipe");
  346. return NODATA;
  347. }
  348. switch (fork()) {
  349. case 0: /* child */
  350. (void) close(0);
  351. if (fd != -1) {
  352. (void) dup(fd);
  353. (void) lseek(0, (off_t)0, SEEK_SET);
  354. } else {
  355. (void) dup(fdin[0]);
  356. (void) close(fdin[0]);
  357. (void) close(fdin[1]);
  358. }
  359. (void) close(1);
  360. (void) dup(fdout[1]);
  361. (void) close(fdout[0]);
  362. (void) close(fdout[1]);
  363. #ifndef DEBUG
  364. if (compr[method].silent)
  365. (void)close(2);
  366. #endif
  367. (void)execvp(compr[method].argv[0],
  368. (char *const *)(intptr_t)compr[method].argv);
  369. #ifdef DEBUG
  370. (void)fprintf(stderr, "exec `%s' failed (%s)\n",
  371. compr[method].argv[0], strerror(errno));
  372. #endif
  373. exit(1);
  374. /*NOTREACHED*/
  375. case -1:
  376. file_error(ms, errno, "could not fork");
  377. return NODATA;
  378. default: /* parent */
  379. (void) close(fdout[1]);
  380. if (fd == -1) {
  381. (void) close(fdin[0]);
  382. /*
  383. * fork again, to avoid blocking because both
  384. * pipes filled
  385. */
  386. switch (fork()) {
  387. case 0: /* child */
  388. (void)close(fdout[0]);
  389. if (swrite(fdin[1], old, n) != (ssize_t)n) {
  390. #ifdef DEBUG
  391. (void)fprintf(stderr,
  392. "Write failed (%s)\n",
  393. strerror(errno));
  394. #endif
  395. exit(1);
  396. }
  397. exit(0);
  398. /*NOTREACHED*/
  399. case -1:
  400. #ifdef DEBUG
  401. (void)fprintf(stderr, "Fork failed (%s)\n",
  402. strerror(errno));
  403. #endif
  404. exit(1);
  405. /*NOTREACHED*/
  406. default: /* parent */
  407. break;
  408. }
  409. (void) close(fdin[1]);
  410. fdin[1] = -1;
  411. }
  412. if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) {
  413. #ifdef DEBUG
  414. (void)fprintf(stderr, "Malloc failed (%s)\n",
  415. strerror(errno));
  416. #endif
  417. n = 0;
  418. goto err;
  419. }
  420. if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) {
  421. #ifdef DEBUG
  422. (void)fprintf(stderr, "Read failed (%s)\n",
  423. strerror(errno));
  424. #endif
  425. free(*newch);
  426. n = 0;
  427. newch[0] = '\0';
  428. goto err;
  429. } else {
  430. n = r;
  431. }
  432. /* NUL terminate, as every buffer is handled here. */
  433. (*newch)[n] = '\0';
  434. err:
  435. if (fdin[1] != -1)
  436. (void) close(fdin[1]);
  437. (void) close(fdout[0]);
  438. #ifdef WNOHANG
  439. while (waitpid(-1, NULL, WNOHANG) != -1)
  440. continue;
  441. #else
  442. (void)wait(NULL);
  443. #endif
  444. (void) close(fdin[0]);
  445. return n;
  446. }
  447. }