apprentice.c 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231
  1. /*
  2. * Copyright (c) Ian F. Darwin 1986-1995.
  3. * Software written by Ian F. Darwin and others;
  4. * maintained 1995-present by Christos Zoulas and others.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice immediately at the beginning of the file, without modification,
  11. * this list of conditions, and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  20. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. /*
  29. * apprentice - make one pass through /etc/magic, learning its secrets.
  30. */
  31. #include "file.h"
  32. #include "magic.h"
  33. #include <stdlib.h>
  34. #ifdef HAVE_UNISTD_H
  35. #include <unistd.h>
  36. #endif
  37. #include <string.h>
  38. #include <ctype.h>
  39. #include <fcntl.h>
  40. #include <sys/stat.h>
  41. #include <sys/param.h>
  42. #ifdef QUICK
  43. #include <sys/mman.h>
  44. #endif
  45. #ifndef lint
  46. FILE_RCSID("@(#)$Id: apprentice.c,v 1.82 2004/11/24 18:56:04 christos Exp $")
  47. #endif /* lint */
  48. #define EATAB {while (isascii((unsigned char) *l) && \
  49. isspace((unsigned char) *l)) ++l;}
  50. #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
  51. tolower((unsigned char) (l)) : (l))
  52. /*
  53. * Work around a bug in headers on Digital Unix.
  54. * At least confirmed for: OSF1 V4.0 878
  55. */
  56. #if defined(__osf__) && defined(__DECC)
  57. #ifdef MAP_FAILED
  58. #undef MAP_FAILED
  59. #endif
  60. #endif
  61. #ifndef MAP_FAILED
  62. #define MAP_FAILED (void *) -1
  63. #endif
  64. #ifndef MAP_FILE
  65. #define MAP_FILE 0
  66. #endif
  67. #ifndef MAXPATHLEN
  68. #define MAXPATHLEN 1024
  69. #endif
  70. #define IS_STRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \
  71. (t) == FILE_BESTRING16 || (t) == FILE_LESTRING16)
  72. private int getvalue(struct magic_set *ms, struct magic *, char **);
  73. private int hextoint(int);
  74. private char *getstr(struct magic_set *, char *, char *, int, int *);
  75. private int parse(struct magic_set *, struct magic **, uint32_t *, char *, int);
  76. private void eatsize(char **);
  77. private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
  78. private int apprentice_file(struct magic_set *, struct magic **, uint32_t *,
  79. const char *, int);
  80. private void byteswap(struct magic *, uint32_t);
  81. private void bs1(struct magic *);
  82. private uint16_t swap2(uint16_t);
  83. private uint32_t swap4(uint32_t);
  84. private char *mkdbname(const char *, char *, size_t, int);
  85. private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
  86. const char *);
  87. private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
  88. const char *);
  89. private int check_format(struct magic_set *, struct magic *);
  90. private size_t maxmagic = 0;
  91. private size_t magicsize = sizeof(struct magic);
  92. #ifdef COMPILE_ONLY
  93. int main(int, char *[]);
  94. int
  95. main(int argc, char *argv[])
  96. {
  97. int ret;
  98. struct magic_set *ms;
  99. char *progname;
  100. if ((progname = strrchr(argv[0], '/')) != NULL)
  101. progname++;
  102. else
  103. progname = argv[0];
  104. if (argc != 2) {
  105. (void)fprintf(stderr, "Usage: %s file\n", progname);
  106. return 1;
  107. }
  108. if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
  109. (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
  110. return 1;
  111. }
  112. ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
  113. if (ret == 1)
  114. (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
  115. magic_close(ms);
  116. return ret;
  117. }
  118. #endif /* COMPILE_ONLY */
  119. /*
  120. * Handle one file.
  121. */
  122. private int
  123. apprentice_1(struct magic_set *ms, const char *fn, int action,
  124. struct mlist *mlist)
  125. {
  126. struct magic *magic = NULL;
  127. uint32_t nmagic = 0;
  128. struct mlist *ml;
  129. int rv = -1;
  130. int mapped;
  131. if (magicsize != FILE_MAGICSIZE) {
  132. file_error(ms, 0, "magic element size %lu != %lu",
  133. (unsigned long)sizeof(*magic),
  134. (unsigned long)FILE_MAGICSIZE);
  135. return -1;
  136. }
  137. if (action == FILE_COMPILE) {
  138. rv = apprentice_file(ms, &magic, &nmagic, fn, action);
  139. if (rv != 0)
  140. return -1;
  141. rv = apprentice_compile(ms, &magic, &nmagic, fn);
  142. free(magic);
  143. return rv;
  144. }
  145. #ifndef COMPILE_ONLY
  146. if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
  147. if (ms->flags & MAGIC_CHECK)
  148. file_magwarn(ms, "using regular magic file `%s'", fn);
  149. rv = apprentice_file(ms, &magic, &nmagic, fn, action);
  150. if (rv != 0)
  151. return -1;
  152. mapped = 0;
  153. }
  154. if (rv == -1)
  155. return rv;
  156. mapped = rv;
  157. if (magic == NULL || nmagic == 0) {
  158. file_delmagic(magic, mapped, nmagic);
  159. return -1;
  160. }
  161. if ((ml = malloc(sizeof(*ml))) == NULL) {
  162. file_delmagic(magic, mapped, nmagic);
  163. file_oomem(ms);
  164. return -1;
  165. }
  166. ml->magic = magic;
  167. ml->nmagic = nmagic;
  168. ml->mapped = mapped;
  169. mlist->prev->next = ml;
  170. ml->prev = mlist->prev;
  171. ml->next = mlist;
  172. mlist->prev = ml;
  173. return 0;
  174. #endif /* COMPILE_ONLY */
  175. }
  176. protected void
  177. file_delmagic(struct magic *p, int type, size_t entries)
  178. {
  179. if (p == NULL)
  180. return;
  181. switch (type) {
  182. case 2:
  183. p--;
  184. (void)munmap((void *)p, sizeof(*p) * (entries + 1));
  185. break;
  186. case 1:
  187. p--;
  188. /*FALLTHROUGH*/
  189. case 0:
  190. free(p);
  191. break;
  192. default:
  193. abort();
  194. }
  195. }
  196. /* const char *fn: list of magic files */
  197. protected struct mlist *
  198. file_apprentice(struct magic_set *ms, const char *fn, int action)
  199. {
  200. char *p, *mfn, *afn = NULL;
  201. int file_err, errs = -1;
  202. struct mlist *mlist;
  203. if (fn == NULL)
  204. fn = getenv("MAGIC");
  205. if (fn == NULL)
  206. fn = MAGIC;
  207. if ((fn = mfn = strdup(fn)) == NULL) {
  208. file_oomem(ms);
  209. return NULL;
  210. }
  211. if ((mlist = malloc(sizeof(*mlist))) == NULL) {
  212. free(mfn);
  213. file_oomem(ms);
  214. return NULL;
  215. }
  216. mlist->next = mlist->prev = mlist;
  217. while (fn) {
  218. p = strchr(fn, PATHSEP);
  219. if (p)
  220. *p++ = '\0';
  221. if (*fn == '\0')
  222. break;
  223. if (ms->flags & MAGIC_MIME) {
  224. if ((afn = malloc(strlen(fn) + 5 + 1)) == NULL) {
  225. free(mfn);
  226. free(mlist);
  227. file_oomem(ms);
  228. return NULL;
  229. }
  230. (void)strcpy(afn, fn);
  231. (void)strcat(afn, ".mime");
  232. fn = afn;
  233. }
  234. file_err = apprentice_1(ms, fn, action, mlist);
  235. if (file_err > errs)
  236. errs = file_err;
  237. if (afn) {
  238. free(afn);
  239. afn = NULL;
  240. }
  241. fn = p;
  242. }
  243. if (errs == -1) {
  244. free(mfn);
  245. free(mlist);
  246. mlist = NULL;
  247. file_error(ms, 0, "could not find any magic files!");
  248. return NULL;
  249. }
  250. free(mfn);
  251. return mlist;
  252. }
  253. /*
  254. * parse from a file
  255. * const char *fn: name of magic file
  256. */
  257. private int
  258. apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
  259. const char *fn, int action)
  260. {
  261. private const char hdr[] =
  262. "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
  263. FILE *f;
  264. char line[BUFSIZ+1];
  265. int errs = 0;
  266. f = fopen(ms->file = fn, "r");
  267. if (f == NULL) {
  268. if (errno != ENOENT)
  269. file_error(ms, errno, "cannot read magic file `%s'",
  270. fn);
  271. return -1;
  272. }
  273. maxmagic = MAXMAGIS;
  274. *magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
  275. if (*magicp == NULL) {
  276. (void)fclose(f);
  277. file_oomem(ms);
  278. return -1;
  279. }
  280. /* print silly verbose header for USG compat. */
  281. if (action == FILE_CHECK)
  282. (void)fprintf(stderr, "%s\n", hdr);
  283. /* parse it */
  284. for (ms->line = 1; fgets(line, BUFSIZ, f) != NULL; ms->line++) {
  285. size_t len;
  286. if (line[0]=='#') /* comment, do not parse */
  287. continue;
  288. len = strlen(line);
  289. if (len < 2) /* null line, garbage, etc */
  290. continue;
  291. line[len - 1] = '\0'; /* delete newline */
  292. if (parse(ms, magicp, nmagicp, line, action) != 0)
  293. errs = 1;
  294. }
  295. (void)fclose(f);
  296. if (errs) {
  297. free(*magicp);
  298. *magicp = NULL;
  299. *nmagicp = 0;
  300. }
  301. return errs;
  302. }
  303. /*
  304. * extend the sign bit if the comparison is to be signed
  305. */
  306. protected uint32_t
  307. file_signextend(struct magic_set *ms, struct magic *m, uint32_t v)
  308. {
  309. if (!(m->flag & UNSIGNED))
  310. switch(m->type) {
  311. /*
  312. * Do not remove the casts below. They are
  313. * vital. When later compared with the data,
  314. * the sign extension must have happened.
  315. */
  316. case FILE_BYTE:
  317. v = (char) v;
  318. break;
  319. case FILE_SHORT:
  320. case FILE_BESHORT:
  321. case FILE_LESHORT:
  322. v = (short) v;
  323. break;
  324. case FILE_DATE:
  325. case FILE_BEDATE:
  326. case FILE_LEDATE:
  327. case FILE_LDATE:
  328. case FILE_BELDATE:
  329. case FILE_LELDATE:
  330. case FILE_LONG:
  331. case FILE_BELONG:
  332. case FILE_LELONG:
  333. v = (int32_t) v;
  334. break;
  335. case FILE_STRING:
  336. case FILE_PSTRING:
  337. case FILE_BESTRING16:
  338. case FILE_LESTRING16:
  339. break;
  340. case FILE_REGEX:
  341. break;
  342. default:
  343. if (ms->flags & MAGIC_CHECK)
  344. file_magwarn(ms, "cannot happen: m->type=%d\n",
  345. m->type);
  346. return ~0U;
  347. }
  348. return v;
  349. }
  350. /*
  351. * parse one line from magic file, put into magic[index++] if valid
  352. */
  353. private int
  354. parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l,
  355. int action)
  356. {
  357. int i = 0;
  358. struct magic *m;
  359. char *t;
  360. private const char *fops = FILE_OPS;
  361. uint32_t val;
  362. #define ALLOC_INCR 200
  363. if (*nmagicp + 1 >= maxmagic){
  364. maxmagic += ALLOC_INCR;
  365. if ((m = (struct magic *) realloc(*magicp,
  366. sizeof(struct magic) * maxmagic)) == NULL) {
  367. file_oomem(ms);
  368. if (*magicp)
  369. free(*magicp);
  370. return -1;
  371. }
  372. *magicp = m;
  373. memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
  374. * ALLOC_INCR);
  375. }
  376. m = &(*magicp)[*nmagicp];
  377. m->flag = 0;
  378. m->cont_level = 0;
  379. while (*l == '>') {
  380. ++l; /* step over */
  381. m->cont_level++;
  382. }
  383. if (m->cont_level != 0 && *l == '(') {
  384. ++l; /* step over */
  385. m->flag |= INDIR;
  386. }
  387. if (m->cont_level != 0 && *l == '&') {
  388. ++l; /* step over */
  389. m->flag |= OFFADD;
  390. }
  391. /* get offset, then skip over it */
  392. m->offset = (uint32_t)strtoul(l, &t, 0);
  393. if (l == t)
  394. if (ms->flags & MAGIC_CHECK)
  395. file_magwarn(ms, "offset `%s' invalid", l);
  396. l = t;
  397. if (m->flag & INDIR) {
  398. m->in_type = FILE_LONG;
  399. m->in_offset = 0;
  400. /*
  401. * read [.lbs][+-]nnnnn)
  402. */
  403. if (*l == '.') {
  404. l++;
  405. switch (*l) {
  406. case 'l':
  407. m->in_type = FILE_LELONG;
  408. break;
  409. case 'L':
  410. m->in_type = FILE_BELONG;
  411. break;
  412. case 'h':
  413. case 's':
  414. m->in_type = FILE_LESHORT;
  415. break;
  416. case 'H':
  417. case 'S':
  418. m->in_type = FILE_BESHORT;
  419. break;
  420. case 'c':
  421. case 'b':
  422. case 'C':
  423. case 'B':
  424. m->in_type = FILE_BYTE;
  425. break;
  426. default:
  427. if (ms->flags & MAGIC_CHECK)
  428. file_magwarn(ms,
  429. "indirect offset type `%c' invalid",
  430. *l);
  431. break;
  432. }
  433. l++;
  434. }
  435. if (*l == '~') {
  436. m->in_op = FILE_OPINVERSE;
  437. l++;
  438. }
  439. switch (*l) {
  440. case '&':
  441. m->in_op |= FILE_OPAND;
  442. l++;
  443. break;
  444. case '|':
  445. m->in_op |= FILE_OPOR;
  446. l++;
  447. break;
  448. case '^':
  449. m->in_op |= FILE_OPXOR;
  450. l++;
  451. break;
  452. case '+':
  453. m->in_op |= FILE_OPADD;
  454. l++;
  455. break;
  456. case '-':
  457. m->in_op |= FILE_OPMINUS;
  458. l++;
  459. break;
  460. case '*':
  461. m->in_op |= FILE_OPMULTIPLY;
  462. l++;
  463. break;
  464. case '/':
  465. m->in_op |= FILE_OPDIVIDE;
  466. l++;
  467. break;
  468. case '%':
  469. m->in_op |= FILE_OPMODULO;
  470. l++;
  471. break;
  472. }
  473. if (isdigit((unsigned char)*l))
  474. m->in_offset = (uint32_t)strtoul(l, &t, 0);
  475. else
  476. t = l;
  477. if (*t++ != ')')
  478. if (ms->flags & MAGIC_CHECK)
  479. file_magwarn(ms,
  480. "missing ')' in indirect offset");
  481. l = t;
  482. }
  483. while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
  484. ++l;
  485. EATAB;
  486. #define NBYTE 4
  487. #define NSHORT 5
  488. #define NLONG 4
  489. #define NSTRING 6
  490. #define NDATE 4
  491. #define NBESHORT 7
  492. #define NBELONG 6
  493. #define NBEDATE 6
  494. #define NLESHORT 7
  495. #define NLELONG 6
  496. #define NLEDATE 6
  497. #define NPSTRING 7
  498. #define NLDATE 5
  499. #define NBELDATE 7
  500. #define NLELDATE 7
  501. #define NREGEX 5
  502. #define NBESTRING16 10
  503. #define NLESTRING16 10
  504. if (*l == 'u') {
  505. ++l;
  506. m->flag |= UNSIGNED;
  507. }
  508. /* get type, skip it */
  509. if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
  510. m->type = FILE_BYTE;
  511. l += NBYTE;
  512. } else if (strncmp(l, "byte", NBYTE)==0) {
  513. m->type = FILE_BYTE;
  514. l += NBYTE;
  515. } else if (strncmp(l, "short", NSHORT)==0) {
  516. m->type = FILE_SHORT;
  517. l += NSHORT;
  518. } else if (strncmp(l, "long", NLONG)==0) {
  519. m->type = FILE_LONG;
  520. l += NLONG;
  521. } else if (strncmp(l, "string", NSTRING)==0) {
  522. m->type = FILE_STRING;
  523. l += NSTRING;
  524. } else if (strncmp(l, "date", NDATE)==0) {
  525. m->type = FILE_DATE;
  526. l += NDATE;
  527. } else if (strncmp(l, "beshort", NBESHORT)==0) {
  528. m->type = FILE_BESHORT;
  529. l += NBESHORT;
  530. } else if (strncmp(l, "belong", NBELONG)==0) {
  531. m->type = FILE_BELONG;
  532. l += NBELONG;
  533. } else if (strncmp(l, "bedate", NBEDATE)==0) {
  534. m->type = FILE_BEDATE;
  535. l += NBEDATE;
  536. } else if (strncmp(l, "leshort", NLESHORT)==0) {
  537. m->type = FILE_LESHORT;
  538. l += NLESHORT;
  539. } else if (strncmp(l, "lelong", NLELONG)==0) {
  540. m->type = FILE_LELONG;
  541. l += NLELONG;
  542. } else if (strncmp(l, "ledate", NLEDATE)==0) {
  543. m->type = FILE_LEDATE;
  544. l += NLEDATE;
  545. } else if (strncmp(l, "pstring", NPSTRING)==0) {
  546. m->type = FILE_PSTRING;
  547. l += NPSTRING;
  548. } else if (strncmp(l, "ldate", NLDATE)==0) {
  549. m->type = FILE_LDATE;
  550. l += NLDATE;
  551. } else if (strncmp(l, "beldate", NBELDATE)==0) {
  552. m->type = FILE_BELDATE;
  553. l += NBELDATE;
  554. } else if (strncmp(l, "leldate", NLELDATE)==0) {
  555. m->type = FILE_LELDATE;
  556. l += NLELDATE;
  557. } else if (strncmp(l, "regex", NREGEX)==0) {
  558. m->type = FILE_REGEX;
  559. l += NREGEX;
  560. } else if (strncmp(l, "bestring16", NBESTRING16)==0) {
  561. m->type = FILE_BESTRING16;
  562. l += NBESTRING16;
  563. } else if (strncmp(l, "lestring16", NLESTRING16)==0) {
  564. m->type = FILE_LESTRING16;
  565. l += NLESTRING16;
  566. } else {
  567. if (ms->flags & MAGIC_CHECK)
  568. file_magwarn(ms, "type `%s' invalid", l);
  569. return -1;
  570. }
  571. /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
  572. /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
  573. if (*l == '~') {
  574. if (!IS_STRING(m->type))
  575. m->mask_op = FILE_OPINVERSE;
  576. ++l;
  577. }
  578. if ((t = strchr(fops, *l)) != NULL) {
  579. uint32_t op = (uint32_t)(t - fops);
  580. if (op != FILE_OPDIVIDE || !IS_STRING(m->type)) {
  581. ++l;
  582. m->mask_op |= op;
  583. val = (uint32_t)strtoul(l, &l, 0);
  584. m->mask = file_signextend(ms, m, val);
  585. eatsize(&l);
  586. } else {
  587. m->mask = 0L;
  588. while (!isspace((unsigned char)*++l)) {
  589. switch (*l) {
  590. case CHAR_IGNORE_LOWERCASE:
  591. m->mask |= STRING_IGNORE_LOWERCASE;
  592. break;
  593. case CHAR_COMPACT_BLANK:
  594. m->mask |= STRING_COMPACT_BLANK;
  595. break;
  596. case CHAR_COMPACT_OPTIONAL_BLANK:
  597. m->mask |=
  598. STRING_COMPACT_OPTIONAL_BLANK;
  599. break;
  600. default:
  601. if (ms->flags & MAGIC_CHECK)
  602. file_magwarn(ms,
  603. "string extension `%c' invalid",
  604. *l);
  605. return -1;
  606. }
  607. }
  608. }
  609. }
  610. /*
  611. * We used to set mask to all 1's here, instead let's just not do
  612. * anything if mask = 0 (unless you have a better idea)
  613. */
  614. EATAB;
  615. switch (*l) {
  616. case '>':
  617. case '<':
  618. /* Old-style anding: "0 byte &0x80 dynamically linked" */
  619. case '&':
  620. case '^':
  621. case '=':
  622. m->reln = *l;
  623. ++l;
  624. if (*l == '=') {
  625. /* HP compat: ignore &= etc. */
  626. ++l;
  627. }
  628. break;
  629. case '!':
  630. if (!IS_STRING(m->type)) {
  631. m->reln = *l;
  632. ++l;
  633. break;
  634. }
  635. /*FALLTHROUGH*/
  636. default:
  637. if (*l == 'x' && isascii((unsigned char)l[1]) &&
  638. isspace((unsigned char)l[1])) {
  639. m->reln = *l;
  640. ++l;
  641. goto GetDesc; /* Bill The Cat */
  642. }
  643. m->reln = '=';
  644. break;
  645. }
  646. EATAB;
  647. if (getvalue(ms, m, &l))
  648. return -1;
  649. /*
  650. * TODO finish this macro and start using it!
  651. * #define offsetcheck {if (offset > HOWMANY-1)
  652. * magwarn("offset too big"); }
  653. */
  654. /*
  655. * now get last part - the description
  656. */
  657. GetDesc:
  658. EATAB;
  659. if (l[0] == '\b') {
  660. ++l;
  661. m->nospflag = 1;
  662. } else if ((l[0] == '\\') && (l[1] == 'b')) {
  663. ++l;
  664. ++l;
  665. m->nospflag = 1;
  666. } else
  667. m->nospflag = 0;
  668. while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC)
  669. /* NULLBODY */;
  670. if (ms->flags & MAGIC_CHECK) {
  671. if (!check_format(ms, m))
  672. return -1;
  673. }
  674. #ifndef COMPILE_ONLY
  675. if (action == FILE_CHECK) {
  676. file_mdump(m);
  677. }
  678. #endif
  679. ++(*nmagicp); /* make room for next */
  680. return 0;
  681. }
  682. /*
  683. * Check that the optional printf format in description matches
  684. * the type of the magic.
  685. */
  686. private int
  687. check_format(struct magic_set *ms, struct magic *m)
  688. {
  689. static const char *formats[] = { FILE_FORMAT_STRING };
  690. static const char *names[] = { FILE_FORMAT_NAME };
  691. char *ptr;
  692. for (ptr = m->desc; *ptr; ptr++)
  693. if (*ptr == '%')
  694. break;
  695. if (*ptr == '\0') {
  696. /* No format string; ok */
  697. return 1;
  698. }
  699. if (m->type >= sizeof(formats)/sizeof(formats[0])) {
  700. file_magwarn(ms, "Internal error inconsistency between m->type"
  701. " and format strings");
  702. return 0;
  703. }
  704. if (formats[m->type] == NULL) {
  705. file_magwarn(ms, "No format string for `%s' with description "
  706. "`%s'", m->desc, names[m->type]);
  707. return 0;
  708. }
  709. for (; *ptr; ptr++) {
  710. if (*ptr == 'l' || *ptr == 'h') {
  711. /* XXX: we should really fix this one day */
  712. continue;
  713. }
  714. if (islower((unsigned char)*ptr) || *ptr == 'X')
  715. break;
  716. }
  717. if (*ptr == '\0') {
  718. /* Missing format string; bad */
  719. file_magwarn(ms, "Invalid format `%s' for type `%s'",
  720. m->desc, names[m->type]);
  721. return 0;
  722. }
  723. if (strchr(formats[m->type], *ptr) == NULL) {
  724. file_magwarn(ms, "Printf format `%c' is not valid for type `%s'"
  725. " in description `%s'",
  726. *ptr, names[m->type], m->desc);
  727. return 0;
  728. }
  729. return 1;
  730. }
  731. /*
  732. * Read a numeric value from a pointer, into the value union of a magic
  733. * pointer, according to the magic type. Update the string pointer to point
  734. * just after the number read. Return 0 for success, non-zero for failure.
  735. */
  736. private int
  737. getvalue(struct magic_set *ms, struct magic *m, char **p)
  738. {
  739. int slen;
  740. switch (m->type) {
  741. case FILE_BESTRING16:
  742. case FILE_LESTRING16:
  743. case FILE_STRING:
  744. case FILE_PSTRING:
  745. case FILE_REGEX:
  746. *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen);
  747. if (*p == NULL) {
  748. if (ms->flags & MAGIC_CHECK)
  749. file_magwarn(ms, "cannot get string from `%s'",
  750. m->value.s);
  751. return -1;
  752. }
  753. m->vallen = slen;
  754. return 0;
  755. default:
  756. if (m->reln != 'x') {
  757. m->value.l = file_signextend(ms, m,
  758. (uint32_t)strtoul(*p, p, 0));
  759. eatsize(p);
  760. }
  761. return 0;
  762. }
  763. }
  764. /*
  765. * Convert a string containing C character escapes. Stop at an unescaped
  766. * space or tab.
  767. * Copy the converted version to "p", returning its length in *slen.
  768. * Return updated scan pointer as function result.
  769. */
  770. private char *
  771. getstr(struct magic_set *ms, char *s, char *p, int plen, int *slen)
  772. {
  773. char *origs = s, *origp = p;
  774. char *pmax = p + plen - 1;
  775. int c;
  776. int val;
  777. while ((c = *s++) != '\0') {
  778. if (isspace((unsigned char) c))
  779. break;
  780. if (p >= pmax) {
  781. file_error(ms, 0, "string too long: `%s'", origs);
  782. return NULL;
  783. }
  784. if(c == '\\') {
  785. switch(c = *s++) {
  786. case '\0':
  787. goto out;
  788. default:
  789. *p++ = (char) c;
  790. break;
  791. case 'n':
  792. *p++ = '\n';
  793. break;
  794. case 'r':
  795. *p++ = '\r';
  796. break;
  797. case 'b':
  798. *p++ = '\b';
  799. break;
  800. case 't':
  801. *p++ = '\t';
  802. break;
  803. case 'f':
  804. *p++ = '\f';
  805. break;
  806. case 'v':
  807. *p++ = '\v';
  808. break;
  809. /* \ and up to 3 octal digits */
  810. case '0':
  811. case '1':
  812. case '2':
  813. case '3':
  814. case '4':
  815. case '5':
  816. case '6':
  817. case '7':
  818. val = c - '0';
  819. c = *s++; /* try for 2 */
  820. if(c >= '0' && c <= '7') {
  821. val = (val<<3) | (c - '0');
  822. c = *s++; /* try for 3 */
  823. if(c >= '0' && c <= '7')
  824. val = (val<<3) | (c-'0');
  825. else
  826. --s;
  827. }
  828. else
  829. --s;
  830. *p++ = (char)val;
  831. break;
  832. /* \x and up to 2 hex digits */
  833. case 'x':
  834. val = 'x'; /* Default if no digits */
  835. c = hextoint(*s++); /* Get next char */
  836. if (c >= 0) {
  837. val = c;
  838. c = hextoint(*s++);
  839. if (c >= 0)
  840. val = (val << 4) + c;
  841. else
  842. --s;
  843. } else
  844. --s;
  845. *p++ = (char)val;
  846. break;
  847. }
  848. } else
  849. *p++ = (char)c;
  850. }
  851. out:
  852. *p = '\0';
  853. *slen = p - origp;
  854. return s;
  855. }
  856. /* Single hex char to int; -1 if not a hex char. */
  857. private int
  858. hextoint(int c)
  859. {
  860. if (!isascii((unsigned char) c))
  861. return -1;
  862. if (isdigit((unsigned char) c))
  863. return c - '0';
  864. if ((c >= 'a')&&(c <= 'f'))
  865. return c + 10 - 'a';
  866. if (( c>= 'A')&&(c <= 'F'))
  867. return c + 10 - 'A';
  868. return -1;
  869. }
  870. /*
  871. * Print a string containing C character escapes.
  872. */
  873. protected void
  874. file_showstr(FILE *fp, const char *s, size_t len)
  875. {
  876. char c;
  877. for (;;) {
  878. c = *s++;
  879. if (len == ~0U) {
  880. if (c == '\0')
  881. break;
  882. }
  883. else {
  884. if (len-- == 0)
  885. break;
  886. }
  887. if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
  888. (void) fputc(c, fp);
  889. else {
  890. (void) fputc('\\', fp);
  891. switch (c) {
  892. case '\n':
  893. (void) fputc('n', fp);
  894. break;
  895. case '\r':
  896. (void) fputc('r', fp);
  897. break;
  898. case '\b':
  899. (void) fputc('b', fp);
  900. break;
  901. case '\t':
  902. (void) fputc('t', fp);
  903. break;
  904. case '\f':
  905. (void) fputc('f', fp);
  906. break;
  907. case '\v':
  908. (void) fputc('v', fp);
  909. break;
  910. default:
  911. (void) fprintf(fp, "%.3o", c & 0377);
  912. break;
  913. }
  914. }
  915. }
  916. }
  917. /*
  918. * eatsize(): Eat the size spec from a number [eg. 10UL]
  919. */
  920. private void
  921. eatsize(char **p)
  922. {
  923. char *l = *p;
  924. if (LOWCASE(*l) == 'u')
  925. l++;
  926. switch (LOWCASE(*l)) {
  927. case 'l': /* long */
  928. case 's': /* short */
  929. case 'h': /* short */
  930. case 'b': /* char/byte */
  931. case 'c': /* char/byte */
  932. l++;
  933. /*FALLTHROUGH*/
  934. default:
  935. break;
  936. }
  937. *p = l;
  938. }
  939. /*
  940. * handle a compiled file.
  941. */
  942. private int
  943. apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
  944. const char *fn)
  945. {
  946. int fd;
  947. struct stat st;
  948. uint32_t *ptr;
  949. uint32_t version;
  950. int needsbyteswap;
  951. char buf[MAXPATHLEN];
  952. char *dbname = mkdbname(fn, buf, sizeof(buf), 0);
  953. void *mm = NULL;
  954. if (dbname == NULL)
  955. return -1;
  956. if ((fd = open(dbname, O_RDONLY)) == -1)
  957. return -1;
  958. if (fstat(fd, &st) == -1) {
  959. file_error(ms, errno, "cannot stat `%s'", dbname);
  960. goto error;
  961. }
  962. if (st.st_size < 16) {
  963. file_error(ms, 0, "file `%s' is too small", dbname);
  964. goto error;
  965. }
  966. #ifdef QUICK
  967. if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
  968. MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
  969. file_error(ms, errno, "cannot map `%s'", dbname);
  970. goto error;
  971. }
  972. #define RET 2
  973. #else
  974. if ((mm = malloc((size_t)st.st_size)) == NULL) {
  975. file_oomem(ms);
  976. goto error;
  977. }
  978. if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
  979. file_badread(ms);
  980. goto error;
  981. }
  982. #define RET 1
  983. #endif
  984. *magicp = mm;
  985. (void)close(fd);
  986. fd = -1;
  987. ptr = (uint32_t *)(void *)*magicp;
  988. if (*ptr != MAGICNO) {
  989. if (swap4(*ptr) != MAGICNO) {
  990. file_error(ms, 0, "bad magic in `%s'");
  991. goto error;
  992. }
  993. needsbyteswap = 1;
  994. } else
  995. needsbyteswap = 0;
  996. if (needsbyteswap)
  997. version = swap4(ptr[1]);
  998. else
  999. version = ptr[1];
  1000. if (version != VERSIONNO) {
  1001. file_error(ms, 0, "version mismatch (%d != %d) in `%s'",
  1002. version, VERSIONNO, dbname);
  1003. goto error;
  1004. }
  1005. *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1;
  1006. (*magicp)++;
  1007. if (needsbyteswap)
  1008. byteswap(*magicp, *nmagicp);
  1009. return RET;
  1010. error:
  1011. if (fd != -1)
  1012. (void)close(fd);
  1013. if (mm) {
  1014. #ifdef QUICK
  1015. (void)munmap((void *)mm, (size_t)st.st_size);
  1016. #else
  1017. free(mm);
  1018. #endif
  1019. } else {
  1020. *magicp = NULL;
  1021. *nmagicp = 0;
  1022. }
  1023. return -1;
  1024. }
  1025. private const uint32_t ar[] = {
  1026. MAGICNO, VERSIONNO
  1027. };
  1028. /*
  1029. * handle an mmaped file.
  1030. */
  1031. private int
  1032. apprentice_compile(struct magic_set *ms, struct magic **magicp,
  1033. uint32_t *nmagicp, const char *fn)
  1034. {
  1035. int fd;
  1036. char buf[MAXPATHLEN];
  1037. char *dbname = mkdbname(fn, buf, sizeof(buf), 1);
  1038. if (dbname == NULL)
  1039. return -1;
  1040. if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
  1041. file_error(ms, errno, "cannot open `%s'", dbname);
  1042. return -1;
  1043. }
  1044. if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
  1045. file_error(ms, errno, "error writing `%s'", dbname);
  1046. return -1;
  1047. }
  1048. if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
  1049. != sizeof(struct magic)) {
  1050. file_error(ms, errno, "error seeking `%s'", dbname);
  1051. return -1;
  1052. }
  1053. if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
  1054. != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
  1055. file_error(ms, errno, "error writing `%s'", dbname);
  1056. return -1;
  1057. }
  1058. (void)close(fd);
  1059. return 0;
  1060. }
  1061. private const char ext[] = ".mgc";
  1062. /*
  1063. * make a dbname
  1064. */
  1065. private char *
  1066. mkdbname(const char *fn, char *buf, size_t bufsiz, int strip)
  1067. {
  1068. if (strip) {
  1069. const char *p;
  1070. if ((p = strrchr(fn, '/')) != NULL)
  1071. fn = ++p;
  1072. }
  1073. (void)snprintf(buf, bufsiz, "%s%s", fn, ext);
  1074. return buf;
  1075. }
  1076. /*
  1077. * Byteswap an mmap'ed file if needed
  1078. */
  1079. private void
  1080. byteswap(struct magic *magic, uint32_t nmagic)
  1081. {
  1082. uint32_t i;
  1083. for (i = 0; i < nmagic; i++)
  1084. bs1(&magic[i]);
  1085. }
  1086. /*
  1087. * swap a short
  1088. */
  1089. private uint16_t
  1090. swap2(uint16_t sv)
  1091. {
  1092. uint16_t rv;
  1093. uint8_t *s = (uint8_t *)(void *)&sv;
  1094. uint8_t *d = (uint8_t *)(void *)&rv;
  1095. d[0] = s[1];
  1096. d[1] = s[0];
  1097. return rv;
  1098. }
  1099. /*
  1100. * swap an int
  1101. */
  1102. private uint32_t
  1103. swap4(uint32_t sv)
  1104. {
  1105. uint32_t rv;
  1106. uint8_t *s = (uint8_t *)(void *)&sv;
  1107. uint8_t *d = (uint8_t *)(void *)&rv;
  1108. d[0] = s[3];
  1109. d[1] = s[2];
  1110. d[2] = s[1];
  1111. d[3] = s[0];
  1112. return rv;
  1113. }
  1114. /*
  1115. * byteswap a single magic entry
  1116. */
  1117. private void
  1118. bs1(struct magic *m)
  1119. {
  1120. m->cont_level = swap2(m->cont_level);
  1121. m->offset = swap4((uint32_t)m->offset);
  1122. m->in_offset = swap4((uint32_t)m->in_offset);
  1123. if (IS_STRING(m->type))
  1124. m->value.l = swap4(m->value.l);
  1125. m->mask = swap4(m->mask);
  1126. }