apprentice.c 26 KB


  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * apprentice - make one pass through /etc/magic, learning its secrets.
  30. */
  31. #include "file.h"
  32. #include "magic.h"
  33. #include <stdlib.h>
  34. #ifdef HAVE_UNISTD_H
  35. #include <unistd.h>
  36. #endif
  37. #include <string.h>
  38. #include <ctype.h>
  39. #include <fcntl.h>
  40. #include <sys/stat.h>
  41. #include <sys/param.h>
  42. #ifdef QUICK
  43. #include <sys/mman.h>
  44. #endif
  45. #ifndef lint
  46. FILE_RCSID("@(#)$Id: apprentice.c,v 1.84 2005/03/25 18:03:18 christos Exp $")
  47. #endif /* lint */
  48. #define EATAB {while (isascii((unsigned char) *l) && \
  49. isspace((unsigned char) *l)) ++l;}
  50. #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
  51. tolower((unsigned char) (l)) : (l))
  52. /*
  53. * Work around a bug in headers on Digital Unix.
  54. * At least confirmed for: OSF1 V4.0 878
  55. */
  56. #if defined(__osf__) && defined(__DECC)
  57. #ifdef MAP_FAILED
  58. #undef MAP_FAILED
  59. #endif
  60. #endif
  61. #ifndef MAP_FAILED
  62. #define MAP_FAILED (void *) -1
  63. #endif
  64. #ifndef MAP_FILE
  65. #define MAP_FILE 0
  66. #endif
  67. #ifndef MAXPATHLEN
  68. #define MAXPATHLEN 1024
  69. #endif
  70. #define IS_PLAINSTRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \
  71. (t) == FILE_BESTRING16 || (t) == FILE_LESTRING16)
  72. #define IS_STRING(t) (IS_PLAINSTRING(t) || (t) == FILE_REGEX || \
  73. (t) == FILE_SEARCH)
  74. private int getvalue(struct magic_set *ms, struct magic *, char **);
  75. private int hextoint(int);
  76. private char *getstr(struct magic_set *, char *, char *, int, int *);
  77. private int parse(struct magic_set *, struct magic **, uint32_t *, char *, int);
  78. private void eatsize(char **);
  79. private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
  80. private int apprentice_file(struct magic_set *, struct magic **, uint32_t *,
  81. const char *, int);
  82. private void byteswap(struct magic *, uint32_t);
  83. private void bs1(struct magic *);
  84. private uint16_t swap2(uint16_t);
  85. private uint32_t swap4(uint32_t);
  86. private char *mkdbname(const char *, char *, size_t, int);
  87. private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
  88. const char *);
  89. private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
  90. const char *);
  91. private int check_format(struct magic_set *, struct magic *);
  92. private size_t maxmagic = 0;
  93. private size_t magicsize = sizeof(struct magic);
  94. #ifdef COMPILE_ONLY
  95. int main(int, char *[]);
  96. int
  97. main(int argc, char *argv[])
  98. {
  99. int ret;
  100. struct magic_set *ms;
  101. char *progname;
  102. if ((progname = strrchr(argv[0], '/')) != NULL)
  103. progname++;
  104. else
  105. progname = argv[0];
  106. if (argc != 2) {
  107. (void)fprintf(stderr, "Usage: %s file\n", progname);
  108. return 1;
  109. }
  110. if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
  111. (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
  112. return 1;
  113. }
  114. ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
  115. if (ret == 1)
  116. (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
  117. magic_close(ms);
  118. return ret;
  119. }
  120. #endif /* COMPILE_ONLY */
  121. /*
  122. * Handle one file.
  123. */
  124. private int
  125. apprentice_1(struct magic_set *ms, const char *fn, int action,
  126. struct mlist *mlist)
  127. {
  128. struct magic *magic = NULL;
  129. uint32_t nmagic = 0;
  130. struct mlist *ml;
  131. int rv = -1;
  132. int mapped;
  133. if (magicsize != FILE_MAGICSIZE) {
  134. file_error(ms, 0, "magic element size %lu != %lu",
  135. (unsigned long)sizeof(*magic),
  136. (unsigned long)FILE_MAGICSIZE);
  137. return -1;
  138. }
  139. if (action == FILE_COMPILE) {
  140. rv = apprentice_file(ms, &magic, &nmagic, fn, action);
  141. if (rv != 0)
  142. return -1;
  143. rv = apprentice_compile(ms, &magic, &nmagic, fn);
  144. free(magic);
  145. return rv;
  146. }
  147. #ifndef COMPILE_ONLY
  148. if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
  149. if (ms->flags & MAGIC_CHECK)
  150. file_magwarn(ms, "using regular magic file `%s'", fn);
  151. rv = apprentice_file(ms, &magic, &nmagic, fn, action);
  152. if (rv != 0)
  153. return -1;
  154. mapped = 0;
  155. }
  156. if (rv == -1)
  157. return rv;
  158. mapped = rv;
  159. if (magic == NULL || nmagic == 0) {
  160. file_delmagic(magic, mapped, nmagic);
  161. return -1;
  162. }
  163. if ((ml = malloc(sizeof(*ml))) == NULL) {
  164. file_delmagic(magic, mapped, nmagic);
  165. file_oomem(ms);
  166. return -1;
  167. }
  168. ml->magic = magic;
  169. ml->nmagic = nmagic;
  170. ml->mapped = mapped;
  171. mlist->prev->next = ml;
  172. ml->prev = mlist->prev;
  173. ml->next = mlist;
  174. mlist->prev = ml;
  175. return 0;
  176. #endif /* COMPILE_ONLY */
  177. }
  178. protected void
  179. file_delmagic(struct magic *p, int type, size_t entries)
  180. {
  181. if (p == NULL)
  182. return;
  183. switch (type) {
  184. case 2:
  185. p--;
  186. (void)munmap((void *)p, sizeof(*p) * (entries + 1));
  187. break;
  188. case 1:
  189. p--;
  190. /*FALLTHROUGH*/
  191. case 0:
  192. free(p);
  193. break;
  194. default:
  195. abort();
  196. }
  197. }
  198. /* const char *fn: list of magic files */
  199. protected struct mlist *
  200. file_apprentice(struct magic_set *ms, const char *fn, int action)
  201. {
  202. char *p, *mfn, *afn = NULL;
  203. int file_err, errs = -1;
  204. struct mlist *mlist;
  205. if (fn == NULL)
  206. fn = getenv("MAGIC");
  207. if (fn == NULL)
  208. fn = MAGIC;
  209. if ((fn = mfn = strdup(fn)) == NULL) {
  210. file_oomem(ms);
  211. return NULL;
  212. }
  213. if ((mlist = malloc(sizeof(*mlist))) == NULL) {
  214. free(mfn);
  215. file_oomem(ms);
  216. return NULL;
  217. }
  218. mlist->next = mlist->prev = mlist;
  219. while (fn) {
  220. p = strchr(fn, PATHSEP);
  221. if (p)
  222. *p++ = '\0';
  223. if (*fn == '\0')
  224. break;
  225. if (ms->flags & MAGIC_MIME) {
  226. if ((afn = malloc(strlen(fn) + 5 + 1)) == NULL) {
  227. free(mfn);
  228. free(mlist);
  229. file_oomem(ms);
  230. return NULL;
  231. }
  232. (void)strcpy(afn, fn);
  233. (void)strcat(afn, ".mime");
  234. fn = afn;
  235. }
  236. file_err = apprentice_1(ms, fn, action, mlist);
  237. if (file_err > errs)
  238. errs = file_err;
  239. if (afn) {
  240. free(afn);
  241. afn = NULL;
  242. }
  243. fn = p;
  244. }
  245. if (errs == -1) {
  246. free(mfn);
  247. free(mlist);
  248. mlist = NULL;
  249. file_error(ms, 0, "could not find any magic files!");
  250. return NULL;
  251. }
  252. free(mfn);
  253. return mlist;
  254. }
  255. /*
  256. * parse from a file
  257. * const char *fn: name of magic file
  258. */
  259. private int
  260. apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
  261. const char *fn, int action)
  262. {
  263. private const char hdr[] =
  264. "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
  265. FILE *f;
  266. char line[BUFSIZ+1];
  267. int errs = 0;
  268. f = fopen(ms->file = fn, "r");
  269. if (f == NULL) {
  270. if (errno != ENOENT)
  271. file_error(ms, errno, "cannot read magic file `%s'",
  272. fn);
  273. return -1;
  274. }
  275. maxmagic = MAXMAGIS;
  276. *magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
  277. if (*magicp == NULL) {
  278. (void)fclose(f);
  279. file_oomem(ms);
  280. return -1;
  281. }
  282. /* print silly verbose header for USG compat. */
  283. if (action == FILE_CHECK)
  284. (void)fprintf(stderr, "%s\n", hdr);
  285. /* parse it */
  286. for (ms->line = 1; fgets(line, BUFSIZ, f) != NULL; ms->line++) {
  287. size_t len;
  288. if (line[0]=='#') /* comment, do not parse */
  289. continue;
  290. len = strlen(line);
  291. if (len < 2) /* null line, garbage, etc */
  292. continue;
  293. line[len - 1] = '\0'; /* delete newline */
  294. if (parse(ms, magicp, nmagicp, line, action) != 0)
  295. errs = 1;
  296. }
  297. (void)fclose(f);
  298. if (errs) {
  299. free(*magicp);
  300. *magicp = NULL;
  301. *nmagicp = 0;
  302. }
  303. return errs;
  304. }
  305. /*
  306. * extend the sign bit if the comparison is to be signed
  307. */
  308. protected uint32_t
  309. file_signextend(struct magic_set *ms, struct magic *m, uint32_t v)
  310. {
  311. if (!(m->flag & UNSIGNED))
  312. switch(m->type) {
  313. /*
  314. * Do not remove the casts below. They are
  315. * vital. When later compared with the data,
  316. * the sign extension must have happened.
  317. */
  318. case FILE_BYTE:
  319. v = (char) v;
  320. break;
  321. case FILE_SHORT:
  322. case FILE_BESHORT:
  323. case FILE_LESHORT:
  324. v = (short) v;
  325. break;
  326. case FILE_DATE:
  327. case FILE_BEDATE:
  328. case FILE_LEDATE:
  329. case FILE_LDATE:
  330. case FILE_BELDATE:
  331. case FILE_LELDATE:
  332. case FILE_LONG:
  333. case FILE_BELONG:
  334. case FILE_LELONG:
  335. v = (int32_t) v;
  336. break;
  337. case FILE_STRING:
  338. case FILE_PSTRING:
  339. case FILE_BESTRING16:
  340. case FILE_LESTRING16:
  341. case FILE_REGEX:
  342. case FILE_SEARCH:
  343. break;
  344. default:
  345. if (ms->flags & MAGIC_CHECK)
  346. file_magwarn(ms, "cannot happen: m->type=%d\n",
  347. m->type);
  348. return ~0U;
  349. }
  350. return v;
  351. }
  352. /*
  353. * parse one line from magic file, put into magic[index++] if valid
  354. */
  355. private int
  356. parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l,
  357. int action)
  358. {
  359. int i = 0;
  360. struct magic *m;
  361. char *t;
  362. private const char *fops = FILE_OPS;
  363. uint32_t val;
  364. #define ALLOC_INCR 200
  365. if (*nmagicp + 1 >= maxmagic){
  366. maxmagic += ALLOC_INCR;
  367. if ((m = (struct magic *) realloc(*magicp,
  368. sizeof(struct magic) * maxmagic)) == NULL) {
  369. file_oomem(ms);
  370. if (*magicp)
  371. free(*magicp);
  372. return -1;
  373. }
  374. *magicp = m;
  375. memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
  376. * ALLOC_INCR);
  377. }
  378. m = &(*magicp)[*nmagicp];
  379. m->flag = 0;
  380. m->cont_level = 0;
  381. while (*l == '>') {
  382. ++l; /* step over */
  383. m->cont_level++;
  384. }
  385. if (m->cont_level != 0 && *l == '&') {
  386. ++l; /* step over */
  387. m->flag |= OFFADD;
  388. }
  389. if (m->cont_level != 0 && *l == '(') {
  390. ++l; /* step over */
  391. m->flag |= INDIR;
  392. if (m->flag & OFFADD)
  393. m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
  394. }
  395. if (m->cont_level != 0 && *l == '&') {
  396. ++l; /* step over */
  397. m->flag |= OFFADD;
  398. }
  399. /* get offset, then skip over it */
  400. m->offset = (uint32_t)strtoul(l, &t, 0);
  401. if (l == t)
  402. if (ms->flags & MAGIC_CHECK)
  403. file_magwarn(ms, "offset `%s' invalid", l);
  404. l = t;
  405. if (m->flag & INDIR) {
  406. m->in_type = FILE_LONG;
  407. m->in_offset = 0;
  408. /*
  409. * read [.lbs][+-]nnnnn)
  410. */
  411. if (*l == '.') {
  412. l++;
  413. switch (*l) {
  414. case 'l':
  415. m->in_type = FILE_LELONG;
  416. break;
  417. case 'L':
  418. m->in_type = FILE_BELONG;
  419. break;
  420. case 'h':
  421. case 's':
  422. m->in_type = FILE_LESHORT;
  423. break;
  424. case 'H':
  425. case 'S':
  426. m->in_type = FILE_BESHORT;
  427. break;
  428. case 'c':
  429. case 'b':
  430. case 'C':
  431. case 'B':
  432. m->in_type = FILE_BYTE;
  433. break;
  434. default:
  435. if (ms->flags & MAGIC_CHECK)
  436. file_magwarn(ms,
  437. "indirect offset type `%c' invalid",
  438. *l);
  439. break;
  440. }
  441. l++;
  442. }
  443. if (*l == '~') {
  444. m->in_op |= FILE_OPINVERSE;
  445. l++;
  446. }
  447. switch (*l) {
  448. case '&':
  449. m->in_op |= FILE_OPAND;
  450. l++;
  451. break;
  452. case '|':
  453. m->in_op |= FILE_OPOR;
  454. l++;
  455. break;
  456. case '^':
  457. m->in_op |= FILE_OPXOR;
  458. l++;
  459. break;
  460. case '+':
  461. m->in_op |= FILE_OPADD;
  462. l++;
  463. break;
  464. case '-':
  465. m->in_op |= FILE_OPMINUS;
  466. l++;
  467. break;
  468. case '*':
  469. m->in_op |= FILE_OPMULTIPLY;
  470. l++;
  471. break;
  472. case '/':
  473. m->in_op |= FILE_OPDIVIDE;
  474. l++;
  475. break;
  476. case '%':
  477. m->in_op |= FILE_OPMODULO;
  478. l++;
  479. break;
  480. }
  481. if (*l == '(') {
  482. m->in_op |= FILE_OPINDIRECT;
  483. l++;
  484. }
  485. if (isdigit((unsigned char)*l) || *l == '-')
  486. m->in_offset = (int32_t)strtol(l, &t, 0);
  487. else
  488. t = l;
  489. if (*t++ != ')' ||
  490. ((m->in_op & FILE_OPINDIRECT) && *t++ != ')'))
  491. if (ms->flags & MAGIC_CHECK)
  492. file_magwarn(ms,
  493. "missing ')' in indirect offset");
  494. l = t;
  495. }
  496. while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
  497. ++l;
  498. EATAB;
  499. #define NBYTE 4
  500. #define NSHORT 5
  501. #define NLONG 4
  502. #define NSTRING 6
  503. #define NDATE 4
  504. #define NBESHORT 7
  505. #define NBELONG 6
  506. #define NBEDATE 6
  507. #define NLESHORT 7
  508. #define NLELONG 6
  509. #define NLEDATE 6
  510. #define NPSTRING 7
  511. #define NLDATE 5
  512. #define NBELDATE 7
  513. #define NLELDATE 7
  514. #define NREGEX 5
  515. #define NBESTRING16 10
  516. #define NLESTRING16 10
  517. #define NSEARCH 6
  518. if (*l == 'u') {
  519. ++l;
  520. m->flag |= UNSIGNED;
  521. }
  522. /* get type, skip it */
  523. if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
  524. m->type = FILE_BYTE;
  525. l += NBYTE;
  526. } else if (strncmp(l, "byte", NBYTE)==0) {
  527. m->type = FILE_BYTE;
  528. l += NBYTE;
  529. } else if (strncmp(l, "short", NSHORT)==0) {
  530. m->type = FILE_SHORT;
  531. l += NSHORT;
  532. } else if (strncmp(l, "long", NLONG)==0) {
  533. m->type = FILE_LONG;
  534. l += NLONG;
  535. } else if (strncmp(l, "string", NSTRING)==0) {
  536. m->type = FILE_STRING;
  537. l += NSTRING;
  538. } else if (strncmp(l, "date", NDATE)==0) {
  539. m->type = FILE_DATE;
  540. l += NDATE;
  541. } else if (strncmp(l, "beshort", NBESHORT)==0) {
  542. m->type = FILE_BESHORT;
  543. l += NBESHORT;
  544. } else if (strncmp(l, "belong", NBELONG)==0) {
  545. m->type = FILE_BELONG;
  546. l += NBELONG;
  547. } else if (strncmp(l, "bedate", NBEDATE)==0) {
  548. m->type = FILE_BEDATE;
  549. l += NBEDATE;
  550. } else if (strncmp(l, "leshort", NLESHORT)==0) {
  551. m->type = FILE_LESHORT;
  552. l += NLESHORT;
  553. } else if (strncmp(l, "lelong", NLELONG)==0) {
  554. m->type = FILE_LELONG;
  555. l += NLELONG;
  556. } else if (strncmp(l, "ledate", NLEDATE)==0) {
  557. m->type = FILE_LEDATE;
  558. l += NLEDATE;
  559. } else if (strncmp(l, "pstring", NPSTRING)==0) {
  560. m->type = FILE_PSTRING;
  561. l += NPSTRING;
  562. } else if (strncmp(l, "ldate", NLDATE)==0) {
  563. m->type = FILE_LDATE;
  564. l += NLDATE;
  565. } else if (strncmp(l, "beldate", NBELDATE)==0) {
  566. m->type = FILE_BELDATE;
  567. l += NBELDATE;
  568. } else if (strncmp(l, "leldate", NLELDATE)==0) {
  569. m->type = FILE_LELDATE;
  570. l += NLELDATE;
  571. } else if (strncmp(l, "regex", NREGEX)==0) {
  572. m->type = FILE_REGEX;
  573. l += NREGEX;
  574. } else if (strncmp(l, "bestring16", NBESTRING16)==0) {
  575. m->type = FILE_BESTRING16;
  576. l += NBESTRING16;
  577. } else if (strncmp(l, "lestring16", NLESTRING16)==0) {
  578. m->type = FILE_LESTRING16;
  579. l += NLESTRING16;
  580. } else if (strncmp(l, "search", NSEARCH)==0) {
  581. m->type = FILE_SEARCH;
  582. l += NSEARCH;
  583. } else {
  584. if (ms->flags & MAGIC_CHECK)
  585. file_magwarn(ms, "type `%s' invalid", l);
  586. return -1;
  587. }
  588. /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
  589. /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
  590. if (*l == '~') {
  591. if (!IS_STRING(m->type))
  592. m->mask_op |= FILE_OPINVERSE;
  593. ++l;
  594. }
  595. if ((t = strchr(fops, *l)) != NULL) {
  596. uint32_t op = (uint32_t)(t - fops);
  597. if (op != FILE_OPDIVIDE || !IS_PLAINSTRING(m->type)) {
  598. ++l;
  599. m->mask_op |= op;
  600. val = (uint32_t)strtoul(l, &l, 0);
  601. m->mask = file_signextend(ms, m, val);
  602. eatsize(&l);
  603. } else {
  604. m->mask = 0L;
  605. while (!isspace((unsigned char)*++l)) {
  606. switch (*l) {
  607. case CHAR_IGNORE_LOWERCASE:
  608. m->mask |= STRING_IGNORE_LOWERCASE;
  609. break;
  610. case CHAR_COMPACT_BLANK:
  611. m->mask |= STRING_COMPACT_BLANK;
  612. break;
  613. case CHAR_COMPACT_OPTIONAL_BLANK:
  614. m->mask |=
  615. STRING_COMPACT_OPTIONAL_BLANK;
  616. break;
  617. default:
  618. if (ms->flags & MAGIC_CHECK)
  619. file_magwarn(ms,
  620. "string extension `%c' invalid",
  621. *l);
  622. return -1;
  623. }
  624. }
  625. ++l;
  626. }
  627. }
  628. /*
  629. * We used to set mask to all 1's here, instead let's just not do
  630. * anything if mask = 0 (unless you have a better idea)
  631. */
  632. EATAB;
  633. switch (*l) {
  634. case '>':
  635. case '<':
  636. /* Old-style anding: "0 byte &0x80 dynamically linked" */
  637. case '&':
  638. case '^':
  639. case '=':
  640. m->reln = *l;
  641. ++l;
  642. if (*l == '=') {
  643. /* HP compat: ignore &= etc. */
  644. ++l;
  645. }
  646. break;
  647. case '!':
  648. m->reln = *l;
  649. ++l;
  650. break;
  651. default:
  652. if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
  653. isspace((unsigned char)l[1])) || !l[1])) {
  654. m->reln = *l;
  655. ++l;
  656. goto GetDesc; /* Bill The Cat */
  657. }
  658. m->reln = '=';
  659. break;
  660. }
  661. EATAB;
  662. if (getvalue(ms, m, &l))
  663. return -1;
  664. /*
  665. * TODO finish this macro and start using it!
  666. * #define offsetcheck {if (offset > HOWMANY-1)
  667. * magwarn("offset too big"); }
  668. */
  669. /*
  670. * now get last part - the description
  671. */
  672. GetDesc:
  673. EATAB;
  674. if (l[0] == '\b') {
  675. ++l;
  676. m->nospflag = 1;
  677. } else if ((l[0] == '\\') && (l[1] == 'b')) {
  678. ++l;
  679. ++l;
  680. m->nospflag = 1;
  681. } else
  682. m->nospflag = 0;
  683. while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC)
  684. /* NULLBODY */;
  685. if (ms->flags & MAGIC_CHECK) {
  686. if (!check_format(ms, m))
  687. return -1;
  688. }
  689. #ifndef COMPILE_ONLY
  690. if (action == FILE_CHECK) {
  691. file_mdump(m);
  692. }
  693. #endif
  694. ++(*nmagicp); /* make room for next */
  695. return 0;
  696. }
  697. /*
  698. * Check that the optional printf format in description matches
  699. * the type of the magic.
  700. */
  701. private int
  702. check_format(struct magic_set *ms, struct magic *m)
  703. {
  704. static const char *formats[] = { FILE_FORMAT_STRING };
  705. static const char *names[] = { FILE_FORMAT_NAME };
  706. char *ptr;
  707. for (ptr = m->desc; *ptr; ptr++)
  708. if (*ptr == '%')
  709. break;
  710. if (*ptr == '\0') {
  711. /* No format string; ok */
  712. return 1;
  713. }
  714. if (m->type >= sizeof(formats)/sizeof(formats[0])) {
  715. file_magwarn(ms, "Internal error inconsistency between m->type"
  716. " and format strings");
  717. return 0;
  718. }
  719. if (formats[m->type] == NULL) {
  720. file_magwarn(ms, "No format string for `%s' with description "
  721. "`%s'", m->desc, names[m->type]);
  722. return 0;
  723. }
  724. for (; *ptr; ptr++) {
  725. if (*ptr == 'l' || *ptr == 'h') {
  726. /* XXX: we should really fix this one day */
  727. continue;
  728. }
  729. if (islower((unsigned char)*ptr) || *ptr == 'X')
  730. break;
  731. }
  732. if (*ptr == '\0') {
  733. /* Missing format string; bad */
  734. file_magwarn(ms, "Invalid format `%s' for type `%s'",
  735. m->desc, names[m->type]);
  736. return 0;
  737. }
  738. if (strchr(formats[m->type], *ptr) == NULL) {
  739. file_magwarn(ms, "Printf format `%c' is not valid for type `%s'"
  740. " in description `%s'",
  741. *ptr, names[m->type], m->desc);
  742. return 0;
  743. }
  744. return 1;
  745. }
  746. /*
  747. * Read a numeric value from a pointer, into the value union of a magic
  748. * pointer, according to the magic type. Update the string pointer to point
  749. * just after the number read. Return 0 for success, non-zero for failure.
  750. */
  751. private int
  752. getvalue(struct magic_set *ms, struct magic *m, char **p)
  753. {
  754. int slen;
  755. switch (m->type) {
  756. case FILE_BESTRING16:
  757. case FILE_LESTRING16:
  758. case FILE_STRING:
  759. case FILE_PSTRING:
  760. case FILE_REGEX:
  761. case FILE_SEARCH:
  762. *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen);
  763. if (*p == NULL) {
  764. if (ms->flags & MAGIC_CHECK)
  765. file_magwarn(ms, "cannot get string from `%s'",
  766. m->value.s);
  767. return -1;
  768. }
  769. m->vallen = slen;
  770. return 0;
  771. default:
  772. if (m->reln != 'x') {
  773. m->value.l = file_signextend(ms, m,
  774. (uint32_t)strtoul(*p, p, 0));
  775. eatsize(p);
  776. }
  777. return 0;
  778. }
  779. }
  780. /*
  781. * Convert a string containing C character escapes. Stop at an unescaped
  782. * space or tab.
  783. * Copy the converted version to "p", returning its length in *slen.
  784. * Return updated scan pointer as function result.
  785. */
  786. private char *
  787. getstr(struct magic_set *ms, char *s, char *p, int plen, int *slen)
  788. {
  789. char *origs = s, *origp = p;
  790. char *pmax = p + plen - 1;
  791. int c;
  792. int val;
  793. while ((c = *s++) != '\0') {
  794. if (isspace((unsigned char) c))
  795. break;
  796. if (p >= pmax) {
  797. file_error(ms, 0, "string too long: `%s'", origs);
  798. return NULL;
  799. }
  800. if(c == '\\') {
  801. switch(c = *s++) {
  802. case '\0':
  803. goto out;
  804. default:
  805. *p++ = (char) c;
  806. break;
  807. case 'n':
  808. *p++ = '\n';
  809. break;
  810. case 'r':
  811. *p++ = '\r';
  812. break;
  813. case 'b':
  814. *p++ = '\b';
  815. break;
  816. case 't':
  817. *p++ = '\t';
  818. break;
  819. case 'f':
  820. *p++ = '\f';
  821. break;
  822. case 'v':
  823. *p++ = '\v';
  824. break;
  825. /* \ and up to 3 octal digits */
  826. case '0':
  827. case '1':
  828. case '2':
  829. case '3':
  830. case '4':
  831. case '5':
  832. case '6':
  833. case '7':
  834. val = c - '0';
  835. c = *s++; /* try for 2 */
  836. if(c >= '0' && c <= '7') {
  837. val = (val<<3) | (c - '0');
  838. c = *s++; /* try for 3 */
  839. if(c >= '0' && c <= '7')
  840. val = (val<<3) | (c-'0');
  841. else
  842. --s;
  843. }
  844. else
  845. --s;
  846. *p++ = (char)val;
  847. break;
  848. /* \x and up to 2 hex digits */
  849. case 'x':
  850. val = 'x'; /* Default if no digits */
  851. c = hextoint(*s++); /* Get next char */
  852. if (c >= 0) {
  853. val = c;
  854. c = hextoint(*s++);
  855. if (c >= 0)
  856. val = (val << 4) + c;
  857. else
  858. --s;
  859. } else
  860. --s;
  861. *p++ = (char)val;
  862. break;
  863. }
  864. } else
  865. *p++ = (char)c;
  866. }
  867. out:
  868. *p = '\0';
  869. *slen = p - origp;
  870. return s;
  871. }
  872. /* Single hex char to int; -1 if not a hex char. */
  873. private int
  874. hextoint(int c)
  875. {
  876. if (!isascii((unsigned char) c))
  877. return -1;
  878. if (isdigit((unsigned char) c))
  879. return c - '0';
  880. if ((c >= 'a')&&(c <= 'f'))
  881. return c + 10 - 'a';
  882. if (( c>= 'A')&&(c <= 'F'))
  883. return c + 10 - 'A';
  884. return -1;
  885. }
  886. /*
  887. * Print a string containing C character escapes.
  888. */
  889. protected void
  890. file_showstr(FILE *fp, const char *s, size_t len)
  891. {
  892. char c;
  893. for (;;) {
  894. c = *s++;
  895. if (len == ~0U) {
  896. if (c == '\0')
  897. break;
  898. }
  899. else {
  900. if (len-- == 0)
  901. break;
  902. }
  903. if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
  904. (void) fputc(c, fp);
  905. else {
  906. (void) fputc('\\', fp);
  907. switch (c) {
  908. case '\n':
  909. (void) fputc('n', fp);
  910. break;
  911. case '\r':
  912. (void) fputc('r', fp);
  913. break;
  914. case '\b':
  915. (void) fputc('b', fp);
  916. break;
  917. case '\t':
  918. (void) fputc('t', fp);
  919. break;
  920. case '\f':
  921. (void) fputc('f', fp);
  922. break;
  923. case '\v':
  924. (void) fputc('v', fp);
  925. break;
  926. default:
  927. (void) fprintf(fp, "%.3o", c & 0377);
  928. break;
  929. }
  930. }
  931. }
  932. }
  933. /*
  934. * eatsize(): Eat the size spec from a number [eg. 10UL]
  935. */
  936. private void
  937. eatsize(char **p)
  938. {
  939. char *l = *p;
  940. if (LOWCASE(*l) == 'u')
  941. l++;
  942. switch (LOWCASE(*l)) {
  943. case 'l': /* long */
  944. case 's': /* short */
  945. case 'h': /* short */
  946. case 'b': /* char/byte */
  947. case 'c': /* char/byte */
  948. l++;
  949. /*FALLTHROUGH*/
  950. default:
  951. break;
  952. }
  953. *p = l;
  954. }
  955. /*
  956. * handle a compiled file.
  957. */
  958. private int
  959. apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
  960. const char *fn)
  961. {
  962. int fd;
  963. struct stat st;
  964. uint32_t *ptr;
  965. uint32_t version;
  966. int needsbyteswap;
  967. char buf[MAXPATHLEN];
  968. char *dbname = mkdbname(fn, buf, sizeof(buf), 0);
  969. void *mm = NULL;
  970. if (dbname == NULL)
  971. return -1;
  972. if ((fd = open(dbname, O_RDONLY)) == -1)
  973. return -1;
  974. if (fstat(fd, &st) == -1) {
  975. file_error(ms, errno, "cannot stat `%s'", dbname);
  976. goto error;
  977. }
  978. if (st.st_size < 16) {
  979. file_error(ms, 0, "file `%s' is too small", dbname);
  980. goto error;
  981. }
  982. #ifdef QUICK
  983. if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
  984. MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
  985. file_error(ms, errno, "cannot map `%s'", dbname);
  986. goto error;
  987. }
  988. #define RET 2
  989. #else
  990. if ((mm = malloc((size_t)st.st_size)) == NULL) {
  991. file_oomem(ms);
  992. goto error;
  993. }
  994. if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
  995. file_badread(ms);
  996. goto error;
  997. }
  998. #define RET 1
  999. #endif
  1000. *magicp = mm;
  1001. (void)close(fd);
  1002. fd = -1;
  1003. ptr = (uint32_t *)(void *)*magicp;
  1004. if (*ptr != MAGICNO) {
  1005. if (swap4(*ptr) != MAGICNO) {
  1006. file_error(ms, 0, "bad magic in `%s'");
  1007. goto error;
  1008. }
  1009. needsbyteswap = 1;
  1010. } else
  1011. needsbyteswap = 0;
  1012. if (needsbyteswap)
  1013. version = swap4(ptr[1]);
  1014. else
  1015. version = ptr[1];
  1016. if (version != VERSIONNO) {
  1017. file_error(ms, 0, "version mismatch (%d != %d) in `%s'",
  1018. version, VERSIONNO, dbname);
  1019. goto error;
  1020. }
  1021. *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1;
  1022. (*magicp)++;
  1023. if (needsbyteswap)
  1024. byteswap(*magicp, *nmagicp);
  1025. return RET;
  1026. error:
  1027. if (fd != -1)
  1028. (void)close(fd);
  1029. if (mm) {
  1030. #ifdef QUICK
  1031. (void)munmap((void *)mm, (size_t)st.st_size);
  1032. #else
  1033. free(mm);
  1034. #endif
  1035. } else {
  1036. *magicp = NULL;
  1037. *nmagicp = 0;
  1038. }
  1039. return -1;
  1040. }
  1041. private const uint32_t ar[] = {
  1042. MAGICNO, VERSIONNO
  1043. };
  1044. /*
  1045. * handle an mmaped file.
  1046. */
  1047. private int
  1048. apprentice_compile(struct magic_set *ms, struct magic **magicp,
  1049. uint32_t *nmagicp, const char *fn)
  1050. {
  1051. int fd;
  1052. char buf[MAXPATHLEN];
  1053. char *dbname = mkdbname(fn, buf, sizeof(buf), 1);
  1054. if (dbname == NULL)
  1055. return -1;
  1056. if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
  1057. file_error(ms, errno, "cannot open `%s'", dbname);
  1058. return -1;
  1059. }
  1060. if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
  1061. file_error(ms, errno, "error writing `%s'", dbname);
  1062. return -1;
  1063. }
  1064. if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
  1065. != sizeof(struct magic)) {
  1066. file_error(ms, errno, "error seeking `%s'", dbname);
  1067. return -1;
  1068. }
  1069. if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
  1070. != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
  1071. file_error(ms, errno, "error writing `%s'", dbname);
  1072. return -1;
  1073. }
  1074. (void)close(fd);
  1075. return 0;
  1076. }
  1077. private const char ext[] = ".mgc";
  1078. /*
  1079. * make a dbname
  1080. */
  1081. private char *
  1082. mkdbname(const char *fn, char *buf, size_t bufsiz, int strip)
  1083. {
  1084. if (strip) {
  1085. const char *p;
  1086. if ((p = strrchr(fn, '/')) != NULL)
  1087. fn = ++p;
  1088. }
  1089. (void)snprintf(buf, bufsiz, "%s%s", fn, ext);
  1090. return buf;
  1091. }
  1092. /*
  1093. * Byteswap an mmap'ed file if needed
  1094. */
  1095. private void
  1096. byteswap(struct magic *magic, uint32_t nmagic)
  1097. {
  1098. uint32_t i;
  1099. for (i = 0; i < nmagic; i++)
  1100. bs1(&magic[i]);
  1101. }
  1102. /*
  1103. * swap a short
  1104. */
  1105. private uint16_t
  1106. swap2(uint16_t sv)
  1107. {
  1108. uint16_t rv;
  1109. uint8_t *s = (uint8_t *)(void *)&sv;
  1110. uint8_t *d = (uint8_t *)(void *)&rv;
  1111. d[0] = s[1];
  1112. d[1] = s[0];
  1113. return rv;
  1114. }
  1115. /*
  1116. * swap an int
  1117. */
  1118. private uint32_t
  1119. swap4(uint32_t sv)
  1120. {
  1121. uint32_t rv;
  1122. uint8_t *s = (uint8_t *)(void *)&sv;
  1123. uint8_t *d = (uint8_t *)(void *)&rv;
  1124. d[0] = s[3];
  1125. d[1] = s[2];
  1126. d[2] = s[1];
  1127. d[3] = s[0];
  1128. return rv;
  1129. }
  1130. /*
  1131. * byteswap a single magic entry
  1132. */
  1133. private void
  1134. bs1(struct magic *m)
  1135. {
  1136. m->cont_level = swap2(m->cont_level);
  1137. m->offset = swap4((uint32_t)m->offset);
  1138. m->in_offset = swap4((uint32_t)m->in_offset);
  1139. if (!IS_STRING(m->type))
  1140. m->value.l = swap4(m->value.l);
  1141. m->mask = swap4(m->mask);
  1142. }