apprentice.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. /*
  2. * apprentice - make one pass through /etc/magic, learning its secrets.
  3. *
  4. * Copyright (c) Ian F. Darwin, 1987.
  5. * Written by Ian F. Darwin.
  6. *
  7. * This software is not subject to any license of the American Telephone
  8. * and Telegraph Company or of the Regents of the University of California.
  9. *
  10. * Permission is granted to anyone to use this software for any purpose on
  11. * any computer system, and to alter it and redistribute it freely, subject
  12. * to the following restrictions:
  13. *
  14. * 1. The author is not responsible for the consequences of use of this
  15. * software, no matter how awful, even if they arise from flaws in it.
  16. *
  17. * 2. The origin of this software must not be misrepresented, either by
  18. * explicit claim or by omission. Since few users ever read sources,
  19. * credits must appear in the documentation.
  20. *
  21. * 3. Altered versions must be plainly marked as such, and must not be
  22. * misrepresented as being the original software. Since few users
  23. * ever read sources, credits must appear in the documentation.
  24. *
  25. * 4. This notice may not be removed or altered.
  26. */
  27. #ifdef HAVE_CONFIG_H
  28. #include <config.h>
  29. #endif
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <ctype.h>
  34. #include <errno.h>
  35. #include "file.h"
  36. #ifndef lint
  37. FILE_RCSID("@(#)$Id: apprentice.c,v 1.29 1999/10/31 22:23:03 christos Exp $")
  38. #endif /* lint */
  39. #define EATAB {while (isascii((unsigned char) *l) && \
  40. isspace((unsigned char) *l)) ++l;}
  41. #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
  42. tolower((unsigned char) (l)) : (l))
  43. static int getvalue __P((struct magic *, char **));
  44. static int hextoint __P((int));
  45. static char *getstr __P((char *, char *, int, int *));
  46. static int parse __P((char *, int *, int));
  47. static void eatsize __P((char **));
  48. static int maxmagic = 0;
  49. static int apprentice_1 __P((const char *, int));
  50. int
  51. apprentice(fn, check)
  52. const char *fn; /* list of magic files */
  53. int check; /* non-zero? checking-only run. */
  54. {
  55. char *p, *mfn;
  56. int file_err, errs = -1;
  57. maxmagic = MAXMAGIS;
  58. magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
  59. mfn = malloc(strlen(fn)+1);
  60. if (magic == NULL || mfn == NULL) {
  61. (void) fprintf(stderr, "%s: Out of memory.\n", progname);
  62. if (check)
  63. return -1;
  64. else
  65. exit(1);
  66. }
  67. fn = strcpy(mfn, fn);
  68. while (fn) {
  69. p = strchr(fn, ':');
  70. if (p)
  71. *p++ = '\0';
  72. file_err = apprentice_1(fn, check);
  73. if (file_err > errs)
  74. errs = file_err;
  75. fn = p;
  76. }
  77. if (errs == -1)
  78. (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
  79. progname);
  80. if (!check && errs)
  81. exit(1);
  82. free(mfn);
  83. return errs;
  84. }
  85. static int
  86. apprentice_1(fn, check)
  87. const char *fn; /* name of magic file */
  88. int check; /* non-zero? checking-only run. */
  89. {
  90. static const char hdr[] =
  91. "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
  92. FILE *f;
  93. char line[BUFSIZ+1];
  94. int errs = 0;
  95. f = fopen(fn, "r");
  96. if (f==NULL) {
  97. if (errno != ENOENT)
  98. (void) fprintf(stderr,
  99. "%s: can't read magic file %s (%m)\n",
  100. progname, fn);
  101. return -1;
  102. }
  103. /* parse it */
  104. if (check) /* print silly verbose header for USG compat. */
  105. (void) printf("%s\n", hdr);
  106. for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
  107. if (line[0]=='#') /* comment, do not parse */
  108. continue;
  109. if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
  110. continue;
  111. line[strlen(line)-1] = '\0'; /* delete newline */
  112. if (parse(line, &nmagic, check) != 0)
  113. errs = 1;
  114. }
  115. (void) fclose(f);
  116. return errs;
  117. }
  118. /*
  119. * extend the sign bit if the comparison is to be signed
  120. */
  121. uint32
  122. signextend(m, v)
  123. struct magic *m;
  124. uint32 v;
  125. {
  126. if (!(m->flag & UNSIGNED))
  127. switch(m->type) {
  128. /*
  129. * Do not remove the casts below. They are
  130. * vital. When later compared with the data,
  131. * the sign extension must have happened.
  132. */
  133. case BYTE:
  134. v = (char) v;
  135. break;
  136. case SHORT:
  137. case BESHORT:
  138. case LESHORT:
  139. v = (short) v;
  140. break;
  141. case DATE:
  142. case BEDATE:
  143. case LEDATE:
  144. case LONG:
  145. case BELONG:
  146. case LELONG:
  147. v = (int32) v;
  148. break;
  149. case STRING:
  150. break;
  151. default:
  152. magwarn("can't happen: m->type=%d\n",
  153. m->type);
  154. return -1;
  155. }
  156. return v;
  157. }
  158. /*
  159. * parse one line from magic file, put into magic[index++] if valid
  160. */
  161. static int
  162. parse(l, ndx, check)
  163. char *l;
  164. int *ndx, check;
  165. {
  166. int i = 0, nd = *ndx;
  167. struct magic *m;
  168. char *t, *s;
  169. #define ALLOC_INCR 200
  170. if (nd+1 >= maxmagic){
  171. maxmagic += ALLOC_INCR;
  172. if ((magic = (struct magic *) realloc(magic,
  173. sizeof(struct magic) *
  174. maxmagic)) == NULL) {
  175. (void) fprintf(stderr, "%s: Out of memory.\n", progname);
  176. if (check)
  177. return -1;
  178. else
  179. exit(1);
  180. }
  181. memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
  182. }
  183. m = &magic[*ndx];
  184. m->flag = 0;
  185. m->cont_level = 0;
  186. while (*l == '>') {
  187. ++l; /* step over */
  188. m->cont_level++;
  189. }
  190. if (m->cont_level != 0 && *l == '(') {
  191. ++l; /* step over */
  192. m->flag |= INDIR;
  193. }
  194. if (m->cont_level != 0 && *l == '&') {
  195. ++l; /* step over */
  196. m->flag |= ADD;
  197. }
  198. /* get offset, then skip over it */
  199. m->offset = (int) strtoul(l,&t,0);
  200. if (l == t)
  201. magwarn("offset %s invalid", l);
  202. l = t;
  203. if (m->flag & INDIR) {
  204. m->in.type = LONG;
  205. m->in.offset = 0;
  206. /*
  207. * read [.lbs][+-]nnnnn)
  208. */
  209. if (*l == '.') {
  210. l++;
  211. switch (*l) {
  212. case 'l':
  213. m->in.type = LELONG;
  214. break;
  215. case 'L':
  216. m->in.type = BELONG;
  217. break;
  218. case 'h':
  219. case 's':
  220. m->in.type = LESHORT;
  221. break;
  222. case 'H':
  223. case 'S':
  224. m->in.type = BESHORT;
  225. break;
  226. case 'c':
  227. case 'b':
  228. case 'C':
  229. case 'B':
  230. m->in.type = BYTE;
  231. break;
  232. default:
  233. magwarn("indirect offset type %c invalid", *l);
  234. break;
  235. }
  236. l++;
  237. }
  238. s = l;
  239. if (*l == '+' || *l == '-') l++;
  240. if (isdigit((unsigned char)*l)) {
  241. m->in.offset = strtoul(l, &t, 0);
  242. if (*s == '-') m->in.offset = - m->in.offset;
  243. }
  244. else
  245. t = l;
  246. if (*t++ != ')')
  247. magwarn("missing ')' in indirect offset");
  248. l = t;
  249. }
  250. while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
  251. ++l;
  252. EATAB;
  253. #define NBYTE 4
  254. #define NSHORT 5
  255. #define NLONG 4
  256. #define NSTRING 6
  257. #define NDATE 4
  258. #define NBESHORT 7
  259. #define NBELONG 6
  260. #define NBEDATE 6
  261. #define NLESHORT 7
  262. #define NLELONG 6
  263. #define NLEDATE 6
  264. if (*l == 'u') {
  265. ++l;
  266. m->flag |= UNSIGNED;
  267. }
  268. /* get type, skip it */
  269. if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
  270. m->type = BYTE;
  271. l += NBYTE;
  272. } else if (strncmp(l, "byte", NBYTE)==0) {
  273. m->type = BYTE;
  274. l += NBYTE;
  275. } else if (strncmp(l, "short", NSHORT)==0) {
  276. m->type = SHORT;
  277. l += NSHORT;
  278. } else if (strncmp(l, "long", NLONG)==0) {
  279. m->type = LONG;
  280. l += NLONG;
  281. } else if (strncmp(l, "string", NSTRING)==0) {
  282. m->type = STRING;
  283. l += NSTRING;
  284. } else if (strncmp(l, "date", NDATE)==0) {
  285. m->type = DATE;
  286. l += NDATE;
  287. } else if (strncmp(l, "beshort", NBESHORT)==0) {
  288. m->type = BESHORT;
  289. l += NBESHORT;
  290. } else if (strncmp(l, "belong", NBELONG)==0) {
  291. m->type = BELONG;
  292. l += NBELONG;
  293. } else if (strncmp(l, "bedate", NBEDATE)==0) {
  294. m->type = BEDATE;
  295. l += NBEDATE;
  296. } else if (strncmp(l, "leshort", NLESHORT)==0) {
  297. m->type = LESHORT;
  298. l += NLESHORT;
  299. } else if (strncmp(l, "lelong", NLELONG)==0) {
  300. m->type = LELONG;
  301. l += NLELONG;
  302. } else if (strncmp(l, "ledate", NLEDATE)==0) {
  303. m->type = LEDATE;
  304. l += NLEDATE;
  305. } else {
  306. magwarn("type %s invalid", l);
  307. return -1;
  308. }
  309. /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
  310. if (*l == '&') {
  311. ++l;
  312. m->mask = signextend(m, strtoul(l, &l, 0));
  313. eatsize(&l);
  314. } else
  315. m->mask = ~0L;
  316. EATAB;
  317. switch (*l) {
  318. case '>':
  319. case '<':
  320. /* Old-style anding: "0 byte &0x80 dynamically linked" */
  321. case '&':
  322. case '^':
  323. case '=':
  324. m->reln = *l;
  325. ++l;
  326. if (*l == '=') {
  327. /* HP compat: ignore &= etc. */
  328. ++l;
  329. }
  330. break;
  331. case '!':
  332. if (m->type != STRING) {
  333. m->reln = *l;
  334. ++l;
  335. break;
  336. }
  337. /* FALL THROUGH */
  338. default:
  339. if (*l == 'x' && isascii((unsigned char)l[1]) &&
  340. isspace((unsigned char)l[1])) {
  341. m->reln = *l;
  342. ++l;
  343. goto GetDesc; /* Bill The Cat */
  344. }
  345. m->reln = '=';
  346. break;
  347. }
  348. EATAB;
  349. if (getvalue(m, &l))
  350. return -1;
  351. /*
  352. * TODO finish this macro and start using it!
  353. * #define offsetcheck {if (offset > HOWMANY-1)
  354. * magwarn("offset too big"); }
  355. */
  356. /*
  357. * now get last part - the description
  358. */
  359. GetDesc:
  360. EATAB;
  361. if (l[0] == '\b') {
  362. ++l;
  363. m->nospflag = 1;
  364. } else if ((l[0] == '\\') && (l[1] == 'b')) {
  365. ++l;
  366. ++l;
  367. m->nospflag = 1;
  368. } else
  369. m->nospflag = 0;
  370. while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
  371. /* NULLBODY */;
  372. if (check) {
  373. mdump(m);
  374. }
  375. ++(*ndx); /* make room for next */
  376. return 0;
  377. }
  378. /*
  379. * Read a numeric value from a pointer, into the value union of a magic
  380. * pointer, according to the magic type. Update the string pointer to point
  381. * just after the number read. Return 0 for success, non-zero for failure.
  382. */
  383. static int
  384. getvalue(m, p)
  385. struct magic *m;
  386. char **p;
  387. {
  388. int slen;
  389. if (m->type == STRING) {
  390. *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
  391. m->vallen = slen;
  392. } else
  393. if (m->reln != 'x') {
  394. m->value.l = signextend(m, strtoul(*p, p, 0));
  395. eatsize(p);
  396. }
  397. return 0;
  398. }
  399. /*
  400. * Convert a string containing C character escapes. Stop at an unescaped
  401. * space or tab.
  402. * Copy the converted version to "p", returning its length in *slen.
  403. * Return updated scan pointer as function result.
  404. */
  405. static char *
  406. getstr(s, p, plen, slen)
  407. register char *s;
  408. register char *p;
  409. int plen, *slen;
  410. {
  411. char *origs = s, *origp = p;
  412. char *pmax = p + plen - 1;
  413. register int c;
  414. register int val;
  415. while ((c = *s++) != '\0') {
  416. if (isspace((unsigned char) c))
  417. break;
  418. if (p >= pmax) {
  419. fprintf(stderr, "String too long: %s\n", origs);
  420. break;
  421. }
  422. if(c == '\\') {
  423. switch(c = *s++) {
  424. case '\0':
  425. goto out;
  426. default:
  427. *p++ = (char) c;
  428. break;
  429. case 'n':
  430. *p++ = '\n';
  431. break;
  432. case 'r':
  433. *p++ = '\r';
  434. break;
  435. case 'b':
  436. *p++ = '\b';
  437. break;
  438. case 't':
  439. *p++ = '\t';
  440. break;
  441. case 'f':
  442. *p++ = '\f';
  443. break;
  444. case 'v':
  445. *p++ = '\v';
  446. break;
  447. /* \ and up to 3 octal digits */
  448. case '0':
  449. case '1':
  450. case '2':
  451. case '3':
  452. case '4':
  453. case '5':
  454. case '6':
  455. case '7':
  456. val = c - '0';
  457. c = *s++; /* try for 2 */
  458. if(c >= '0' && c <= '7') {
  459. val = (val<<3) | (c - '0');
  460. c = *s++; /* try for 3 */
  461. if(c >= '0' && c <= '7')
  462. val = (val<<3) | (c-'0');
  463. else
  464. --s;
  465. }
  466. else
  467. --s;
  468. *p++ = (char)val;
  469. break;
  470. /* \x and up to 2 hex digits */
  471. case 'x':
  472. val = 'x'; /* Default if no digits */
  473. c = hextoint(*s++); /* Get next char */
  474. if (c >= 0) {
  475. val = c;
  476. c = hextoint(*s++);
  477. if (c >= 0)
  478. val = (val << 4) + c;
  479. else
  480. --s;
  481. } else
  482. --s;
  483. *p++ = (char)val;
  484. break;
  485. }
  486. } else
  487. *p++ = (char)c;
  488. }
  489. out:
  490. *p = '\0';
  491. *slen = p - origp;
  492. return s;
  493. }
  494. /* Single hex char to int; -1 if not a hex char. */
  495. static int
  496. hextoint(c)
  497. int c;
  498. {
  499. if (!isascii((unsigned char) c)) return -1;
  500. if (isdigit((unsigned char) c)) return c - '0';
  501. if ((c>='a')&&(c<='f')) return c + 10 - 'a';
  502. if ((c>='A')&&(c<='F')) return c + 10 - 'A';
  503. return -1;
  504. }
  505. /*
  506. * Print a string containing C character escapes.
  507. */
  508. void
  509. showstr(fp, s, len)
  510. FILE *fp;
  511. const char *s;
  512. int len;
  513. {
  514. register char c;
  515. for (;;) {
  516. c = *s++;
  517. if (len == -1) {
  518. if (c == '\0')
  519. break;
  520. }
  521. else {
  522. if (len-- == 0)
  523. break;
  524. }
  525. if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
  526. (void) fputc(c, fp);
  527. else {
  528. (void) fputc('\\', fp);
  529. switch (c) {
  530. case '\n':
  531. (void) fputc('n', fp);
  532. break;
  533. case '\r':
  534. (void) fputc('r', fp);
  535. break;
  536. case '\b':
  537. (void) fputc('b', fp);
  538. break;
  539. case '\t':
  540. (void) fputc('t', fp);
  541. break;
  542. case '\f':
  543. (void) fputc('f', fp);
  544. break;
  545. case '\v':
  546. (void) fputc('v', fp);
  547. break;
  548. default:
  549. (void) fprintf(fp, "%.3o", c & 0377);
  550. break;
  551. }
  552. }
  553. }
  554. }
  555. /*
  556. * eatsize(): Eat the size spec from a number [eg. 10UL]
  557. */
  558. static void
  559. eatsize(p)
  560. char **p;
  561. {
  562. char *l = *p;
  563. if (LOWCASE(*l) == 'u')
  564. l++;
  565. switch (LOWCASE(*l)) {
  566. case 'l': /* long */
  567. case 's': /* short */
  568. case 'h': /* short */
  569. case 'b': /* char/byte */
  570. case 'c': /* char/byte */
  571. l++;
  572. /*FALLTHROUGH*/
  573. default:
  574. break;
  575. }
  576. *p = l;
  577. }