apprentice.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. /*
  2. * apprentice - make one pass through /etc/magic, learning its secrets.
  3. *
  4. * Copyright (c) Ian F. Darwin, 1987.
  5. * Written by Ian F. Darwin.
  6. *
  7. * This software is not subject to any license of the American Telephone
  8. * and Telegraph Company or of the Regents of the University of California.
  9. *
  10. * Permission is granted to anyone to use this software for any purpose on
  11. * any computer system, and to alter it and redistribute it freely, subject
  12. * to the following restrictions:
  13. *
  14. * 1. The author is not responsible for the consequences of use of this
  15. * software, no matter how awful, even if they arise from flaws in it.
  16. *
  17. * 2. The origin of this software must not be misrepresented, either by
  18. * explicit claim or by omission. Since few users ever read sources,
  19. * credits must appear in the documentation.
  20. *
  21. * 3. Altered versions must be plainly marked as such, and must not be
  22. * misrepresented as being the original software. Since few users
  23. * ever read sources, credits must appear in the documentation.
  24. *
  25. * 4. This notice may not be removed or altered.
  26. */
  27. #ifdef HAVE_CONFIG_H
  28. #include <config.h>
  29. #endif
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <ctype.h>
  34. #include <errno.h>
  35. #include "file.h"
  36. #ifndef lint
  37. FILE_RCSID("@(#)$Id: apprentice.c,v 1.28 1998/09/12 13:17:52 christos Exp $")
  38. #endif /* lint */
  39. #define EATAB {while (isascii((unsigned char) *l) && \
  40. isspace((unsigned char) *l)) ++l;}
  41. #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
  42. tolower((unsigned char) (l)) : (l))
  43. static int getvalue __P((struct magic *, char **));
  44. static int hextoint __P((int));
  45. static char *getstr __P((char *, char *, int, int *));
  46. static int parse __P((char *, int *, int));
  47. static void eatsize __P((char **));
  48. static int maxmagic = 0;
  49. static int apprentice_1 __P((const char *, int));
  50. int
  51. apprentice(fn, check)
  52. const char *fn; /* list of magic files */
  53. int check; /* non-zero? checking-only run. */
  54. {
  55. char *p, *mfn;
  56. int file_err, errs = -1;
  57. maxmagic = MAXMAGIS;
  58. magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
  59. mfn = malloc(strlen(fn)+1);
  60. if (magic == NULL || mfn == NULL) {
  61. (void) fprintf(stderr, "%s: Out of memory.\n", progname);
  62. if (check)
  63. return -1;
  64. else
  65. exit(1);
  66. }
  67. fn = strcpy(mfn, fn);
  68. while (fn) {
  69. p = strchr(fn, ':');
  70. if (p)
  71. *p++ = '\0';
  72. file_err = apprentice_1(fn, check);
  73. if (file_err > errs)
  74. errs = file_err;
  75. fn = p;
  76. }
  77. if (errs == -1)
  78. (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
  79. progname);
  80. if (!check && errs)
  81. exit(1);
  82. free(mfn);
  83. return errs;
  84. }
  85. static int
  86. apprentice_1(fn, check)
  87. const char *fn; /* name of magic file */
  88. int check; /* non-zero? checking-only run. */
  89. {
  90. static const char hdr[] =
  91. "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
  92. FILE *f;
  93. char line[BUFSIZ+1];
  94. int errs = 0;
  95. f = fopen(fn, "r");
  96. if (f==NULL) {
  97. if (errno != ENOENT)
  98. (void) fprintf(stderr,
  99. "%s: can't read magic file %s (%m)\n",
  100. progname, fn);
  101. return -1;
  102. }
  103. /* parse it */
  104. if (check) /* print silly verbose header for USG compat. */
  105. (void) printf("%s\n", hdr);
  106. for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
  107. if (line[0]=='#') /* comment, do not parse */
  108. continue;
  109. if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
  110. continue;
  111. line[strlen(line)-1] = '\0'; /* delete newline */
  112. if (parse(line, &nmagic, check) != 0)
  113. errs = 1;
  114. }
  115. (void) fclose(f);
  116. return errs;
  117. }
  118. /*
  119. * extend the sign bit if the comparison is to be signed
  120. */
  121. uint32
  122. signextend(m, v)
  123. struct magic *m;
  124. uint32 v;
  125. {
  126. if (!(m->flag & UNSIGNED))
  127. switch(m->type) {
  128. /*
  129. * Do not remove the casts below. They are
  130. * vital. When later compared with the data,
  131. * the sign extension must have happened.
  132. */
  133. case BYTE:
  134. v = (char) v;
  135. break;
  136. case SHORT:
  137. case BESHORT:
  138. case LESHORT:
  139. v = (short) v;
  140. break;
  141. case DATE:
  142. case BEDATE:
  143. case LEDATE:
  144. case LONG:
  145. case BELONG:
  146. case LELONG:
  147. v = (int32) v;
  148. break;
  149. case STRING:
  150. break;
  151. default:
  152. magwarn("can't happen: m->type=%d\n",
  153. m->type);
  154. return -1;
  155. }
  156. return v;
  157. }
  158. /*
  159. * parse one line from magic file, put into magic[index++] if valid
  160. */
  161. static int
  162. parse(l, ndx, check)
  163. char *l;
  164. int *ndx, check;
  165. {
  166. int i = 0, nd = *ndx;
  167. struct magic *m;
  168. char *t, *s;
  169. #define ALLOC_INCR 20
  170. if (nd+1 >= maxmagic){
  171. maxmagic += ALLOC_INCR;
  172. if ((magic = (struct magic *) realloc(magic,
  173. sizeof(struct magic) *
  174. maxmagic)) == NULL) {
  175. (void) fprintf(stderr, "%s: Out of memory.\n", progname);
  176. if (check)
  177. return -1;
  178. else
  179. exit(1);
  180. }
  181. memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
  182. }
  183. m = &magic[*ndx];
  184. m->flag = 0;
  185. m->cont_level = 0;
  186. while (*l == '>') {
  187. ++l; /* step over */
  188. m->cont_level++;
  189. }
  190. if (m->cont_level != 0 && *l == '(') {
  191. ++l; /* step over */
  192. m->flag |= INDIR;
  193. }
  194. if (m->cont_level != 0 && *l == '&') {
  195. ++l; /* step over */
  196. m->flag |= ADD;
  197. }
  198. /* get offset, then skip over it */
  199. m->offset = (int) strtoul(l,&t,0);
  200. if (l == t)
  201. magwarn("offset %s invalid", l);
  202. l = t;
  203. if (m->flag & INDIR) {
  204. m->in.type = LONG;
  205. m->in.offset = 0;
  206. /*
  207. * read [.lbs][+-]nnnnn)
  208. */
  209. if (*l == '.') {
  210. l++;
  211. switch (*l) {
  212. case 'l':
  213. m->in.type = LELONG;
  214. break;
  215. case 'L':
  216. m->in.type = BELONG;
  217. break;
  218. case 'h':
  219. case 's':
  220. m->in.type = LESHORT;
  221. break;
  222. case 'H':
  223. case 'S':
  224. m->in.type = BESHORT;
  225. break;
  226. case 'c':
  227. case 'b':
  228. case 'C':
  229. case 'B':
  230. m->in.type = BYTE;
  231. break;
  232. default:
  233. magwarn("indirect offset type %c invalid", *l);
  234. break;
  235. }
  236. l++;
  237. }
  238. s = l;
  239. if (*l == '+' || *l == '-') l++;
  240. if (isdigit((unsigned char)*l)) {
  241. m->in.offset = strtoul(l, &t, 0);
  242. if (*s == '-') m->in.offset = - m->in.offset;
  243. }
  244. else
  245. t = l;
  246. if (*t++ != ')')
  247. magwarn("missing ')' in indirect offset");
  248. l = t;
  249. }
  250. while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
  251. ++l;
  252. EATAB;
  253. #define NBYTE 4
  254. #define NSHORT 5
  255. #define NLONG 4
  256. #define NSTRING 6
  257. #define NDATE 4
  258. #define NBESHORT 7
  259. #define NBELONG 6
  260. #define NBEDATE 6
  261. #define NLESHORT 7
  262. #define NLELONG 6
  263. #define NLEDATE 6
  264. if (*l == 'u') {
  265. ++l;
  266. m->flag |= UNSIGNED;
  267. }
  268. /* get type, skip it */
  269. if (strncmp(l, "byte", NBYTE)==0) {
  270. m->type = BYTE;
  271. l += NBYTE;
  272. } else if (strncmp(l, "short", NSHORT)==0) {
  273. m->type = SHORT;
  274. l += NSHORT;
  275. } else if (strncmp(l, "long", NLONG)==0) {
  276. m->type = LONG;
  277. l += NLONG;
  278. } else if (strncmp(l, "string", NSTRING)==0) {
  279. m->type = STRING;
  280. l += NSTRING;
  281. } else if (strncmp(l, "date", NDATE)==0) {
  282. m->type = DATE;
  283. l += NDATE;
  284. } else if (strncmp(l, "beshort", NBESHORT)==0) {
  285. m->type = BESHORT;
  286. l += NBESHORT;
  287. } else if (strncmp(l, "belong", NBELONG)==0) {
  288. m->type = BELONG;
  289. l += NBELONG;
  290. } else if (strncmp(l, "bedate", NBEDATE)==0) {
  291. m->type = BEDATE;
  292. l += NBEDATE;
  293. } else if (strncmp(l, "leshort", NLESHORT)==0) {
  294. m->type = LESHORT;
  295. l += NLESHORT;
  296. } else if (strncmp(l, "lelong", NLELONG)==0) {
  297. m->type = LELONG;
  298. l += NLELONG;
  299. } else if (strncmp(l, "ledate", NLEDATE)==0) {
  300. m->type = LEDATE;
  301. l += NLEDATE;
  302. } else {
  303. magwarn("type %s invalid", l);
  304. return -1;
  305. }
  306. /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
  307. if (*l == '&') {
  308. ++l;
  309. m->mask = signextend(m, strtoul(l, &l, 0));
  310. eatsize(&l);
  311. } else
  312. m->mask = ~0L;
  313. EATAB;
  314. switch (*l) {
  315. case '>':
  316. case '<':
  317. /* Old-style anding: "0 byte &0x80 dynamically linked" */
  318. case '&':
  319. case '^':
  320. case '=':
  321. m->reln = *l;
  322. ++l;
  323. break;
  324. case '!':
  325. if (m->type != STRING) {
  326. m->reln = *l;
  327. ++l;
  328. break;
  329. }
  330. /* FALL THROUGH */
  331. default:
  332. if (*l == 'x' && isascii((unsigned char)l[1]) &&
  333. isspace((unsigned char)l[1])) {
  334. m->reln = *l;
  335. ++l;
  336. goto GetDesc; /* Bill The Cat */
  337. }
  338. m->reln = '=';
  339. break;
  340. }
  341. EATAB;
  342. if (getvalue(m, &l))
  343. return -1;
  344. /*
  345. * TODO finish this macro and start using it!
  346. * #define offsetcheck {if (offset > HOWMANY-1)
  347. * magwarn("offset too big"); }
  348. */
  349. /*
  350. * now get last part - the description
  351. */
  352. GetDesc:
  353. EATAB;
  354. if (l[0] == '\b') {
  355. ++l;
  356. m->nospflag = 1;
  357. } else if ((l[0] == '\\') && (l[1] == 'b')) {
  358. ++l;
  359. ++l;
  360. m->nospflag = 1;
  361. } else
  362. m->nospflag = 0;
  363. while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
  364. /* NULLBODY */;
  365. if (check) {
  366. mdump(m);
  367. }
  368. ++(*ndx); /* make room for next */
  369. return 0;
  370. }
  371. /*
  372. * Read a numeric value from a pointer, into the value union of a magic
  373. * pointer, according to the magic type. Update the string pointer to point
  374. * just after the number read. Return 0 for success, non-zero for failure.
  375. */
  376. static int
  377. getvalue(m, p)
  378. struct magic *m;
  379. char **p;
  380. {
  381. int slen;
  382. if (m->type == STRING) {
  383. *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
  384. m->vallen = slen;
  385. } else
  386. if (m->reln != 'x') {
  387. m->value.l = signextend(m, strtoul(*p, p, 0));
  388. eatsize(p);
  389. }
  390. return 0;
  391. }
  392. /*
  393. * Convert a string containing C character escapes. Stop at an unescaped
  394. * space or tab.
  395. * Copy the converted version to "p", returning its length in *slen.
  396. * Return updated scan pointer as function result.
  397. */
  398. static char *
  399. getstr(s, p, plen, slen)
  400. register char *s;
  401. register char *p;
  402. int plen, *slen;
  403. {
  404. char *origs = s, *origp = p;
  405. char *pmax = p + plen - 1;
  406. register int c;
  407. register int val;
  408. while ((c = *s++) != '\0') {
  409. if (isspace((unsigned char) c))
  410. break;
  411. if (p >= pmax) {
  412. fprintf(stderr, "String too long: %s\n", origs);
  413. break;
  414. }
  415. if(c == '\\') {
  416. switch(c = *s++) {
  417. case '\0':
  418. goto out;
  419. default:
  420. *p++ = (char) c;
  421. break;
  422. case 'n':
  423. *p++ = '\n';
  424. break;
  425. case 'r':
  426. *p++ = '\r';
  427. break;
  428. case 'b':
  429. *p++ = '\b';
  430. break;
  431. case 't':
  432. *p++ = '\t';
  433. break;
  434. case 'f':
  435. *p++ = '\f';
  436. break;
  437. case 'v':
  438. *p++ = '\v';
  439. break;
  440. /* \ and up to 3 octal digits */
  441. case '0':
  442. case '1':
  443. case '2':
  444. case '3':
  445. case '4':
  446. case '5':
  447. case '6':
  448. case '7':
  449. val = c - '0';
  450. c = *s++; /* try for 2 */
  451. if(c >= '0' && c <= '7') {
  452. val = (val<<3) | (c - '0');
  453. c = *s++; /* try for 3 */
  454. if(c >= '0' && c <= '7')
  455. val = (val<<3) | (c-'0');
  456. else
  457. --s;
  458. }
  459. else
  460. --s;
  461. *p++ = (char)val;
  462. break;
  463. /* \x and up to 2 hex digits */
  464. case 'x':
  465. val = 'x'; /* Default if no digits */
  466. c = hextoint(*s++); /* Get next char */
  467. if (c >= 0) {
  468. val = c;
  469. c = hextoint(*s++);
  470. if (c >= 0)
  471. val = (val << 4) + c;
  472. else
  473. --s;
  474. } else
  475. --s;
  476. *p++ = (char)val;
  477. break;
  478. }
  479. } else
  480. *p++ = (char)c;
  481. }
  482. out:
  483. *p = '\0';
  484. *slen = p - origp;
  485. return s;
  486. }
  487. /* Single hex char to int; -1 if not a hex char. */
  488. static int
  489. hextoint(c)
  490. int c;
  491. {
  492. if (!isascii((unsigned char) c)) return -1;
  493. if (isdigit((unsigned char) c)) return c - '0';
  494. if ((c>='a')&&(c<='f')) return c + 10 - 'a';
  495. if ((c>='A')&&(c<='F')) return c + 10 - 'A';
  496. return -1;
  497. }
  498. /*
  499. * Print a string containing C character escapes.
  500. */
  501. void
  502. showstr(fp, s, len)
  503. FILE *fp;
  504. const char *s;
  505. int len;
  506. {
  507. register char c;
  508. for (;;) {
  509. c = *s++;
  510. if (len == -1) {
  511. if (c == '\0')
  512. break;
  513. }
  514. else {
  515. if (len-- == 0)
  516. break;
  517. }
  518. if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
  519. (void) fputc(c, fp);
  520. else {
  521. (void) fputc('\\', fp);
  522. switch (c) {
  523. case '\n':
  524. (void) fputc('n', fp);
  525. break;
  526. case '\r':
  527. (void) fputc('r', fp);
  528. break;
  529. case '\b':
  530. (void) fputc('b', fp);
  531. break;
  532. case '\t':
  533. (void) fputc('t', fp);
  534. break;
  535. case '\f':
  536. (void) fputc('f', fp);
  537. break;
  538. case '\v':
  539. (void) fputc('v', fp);
  540. break;
  541. default:
  542. (void) fprintf(fp, "%.3o", c & 0377);
  543. break;
  544. }
  545. }
  546. }
  547. }
  548. /*
  549. * eatsize(): Eat the size spec from a number [eg. 10UL]
  550. */
  551. static void
  552. eatsize(p)
  553. char **p;
  554. {
  555. char *l = *p;
  556. if (LOWCASE(*l) == 'u')
  557. l++;
  558. switch (LOWCASE(*l)) {
  559. case 'l': /* long */
  560. case 's': /* short */
  561. case 'h': /* short */
  562. case 'b': /* char/byte */
  563. case 'c': /* char/byte */
  564. l++;
  565. /*FALLTHROUGH*/
  566. default:
  567. break;
  568. }
  569. *p = l;
  570. }