apprentice.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. /*
  2. * apprentice - make one pass through /etc/magic, learning its secrets.
  3. *
  4. * Copyright (c) Ian F. Darwin, 1987.
  5. * Written by Ian F. Darwin.
  6. *
  7. * This software is not subject to any license of the American Telephone
  8. * and Telegraph Company or of the Regents of the University of California.
  9. *
  10. * Permission is granted to anyone to use this software for any purpose on
  11. * any computer system, and to alter it and redistribute it freely, subject
  12. * to the following restrictions:
  13. *
  14. * 1. The author is not responsible for the consequences of use of this
  15. * software, no matter how awful, even if they arise from flaws in it.
  16. *
  17. * 2. The origin of this software must not be misrepresented, either by
  18. * explicit claim or by omission. Since few users ever read sources,
  19. * credits must appear in the documentation.
  20. *
  21. * 3. Altered versions must be plainly marked as such, and must not be
  22. * misrepresented as being the original software. Since few users
  23. * ever read sources, credits must appear in the documentation.
  24. *
  25. * 4. This notice may not be removed or altered.
  26. */
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <string.h>
  30. #include <ctype.h>
  31. #include <errno.h>
  32. #include "file.h"
  33. #ifndef lint
  34. static char *moduleid =
  35. "@(#)$Id: apprentice.c,v 1.25 1997/01/15 17:23:24 christos Exp $";
  36. #endif /* lint */
  37. #define EATAB {while (isascii((unsigned char) *l) && \
  38. isspace((unsigned char) *l)) ++l;}
  39. #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
  40. tolower((unsigned char) (l)) : (l))
  41. static int getvalue __P((struct magic *, char **));
  42. static int hextoint __P((int));
  43. static char *getstr __P((char *, char *, int, int *));
  44. static int parse __P((char *, int *, int));
  45. static void eatsize __P((char **));
  46. static int maxmagic = 0;
  47. static int apprentice_1 __P((char *, int));
  48. int
  49. apprentice(fn, check)
  50. char *fn; /* list of magic files */
  51. int check; /* non-zero? checking-only run. */
  52. {
  53. char *p, *mfn;
  54. int file_err, errs = -1;
  55. maxmagic = MAXMAGIS;
  56. magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
  57. mfn = malloc(strlen(fn)+1);
  58. if (magic == NULL || mfn == NULL) {
  59. (void) fprintf(stderr, "%s: Out of memory.\n", progname);
  60. if (check)
  61. return -1;
  62. else
  63. exit(1);
  64. }
  65. fn = strcpy(mfn, fn);
  66. while (fn) {
  67. p = strchr(fn, ':');
  68. if (p)
  69. *p++ = '\0';
  70. file_err = apprentice_1(fn, check);
  71. if (file_err > errs)
  72. errs = file_err;
  73. fn = p;
  74. }
  75. if (errs == -1)
  76. (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
  77. progname);
  78. if (!check && errs)
  79. exit(1);
  80. free(mfn);
  81. return errs;
  82. }
  83. static int
  84. apprentice_1(fn, check)
  85. char *fn; /* name of magic file */
  86. int check; /* non-zero? checking-only run. */
  87. {
  88. static const char hdr[] =
  89. "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
  90. FILE *f;
  91. char line[BUFSIZ+1];
  92. int errs = 0;
  93. f = fopen(fn, "r");
  94. if (f==NULL) {
  95. if (errno != ENOENT)
  96. (void) fprintf(stderr,
  97. "%s: can't read magic file %s (%s)\n",
  98. progname, fn, strerror(errno));
  99. return -1;
  100. }
  101. /* parse it */
  102. if (check) /* print silly verbose header for USG compat. */
  103. (void) printf("%s\n", hdr);
  104. for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
  105. if (line[0]=='#') /* comment, do not parse */
  106. continue;
  107. if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
  108. continue;
  109. line[strlen(line)-1] = '\0'; /* delete newline */
  110. if (parse(line, &nmagic, check) != 0)
  111. errs = 1;
  112. }
  113. (void) fclose(f);
  114. return errs;
  115. }
  116. /*
  117. * extend the sign bit if the comparison is to be signed
  118. */
  119. uint32
  120. signextend(m, v)
  121. struct magic *m;
  122. uint32 v;
  123. {
  124. if (!(m->flag & UNSIGNED))
  125. switch(m->type) {
  126. /*
  127. * Do not remove the casts below. They are
  128. * vital. When later compared with the data,
  129. * the sign extension must have happened.
  130. */
  131. case BYTE:
  132. v = (char) v;
  133. break;
  134. case SHORT:
  135. case BESHORT:
  136. case LESHORT:
  137. v = (short) v;
  138. break;
  139. case DATE:
  140. case BEDATE:
  141. case LEDATE:
  142. case LONG:
  143. case BELONG:
  144. case LELONG:
  145. v = (int32) v;
  146. break;
  147. case STRING:
  148. break;
  149. default:
  150. magwarn("can't happen: m->type=%d\n",
  151. m->type);
  152. return -1;
  153. }
  154. return v;
  155. }
  156. /*
  157. * parse one line from magic file, put into magic[index++] if valid
  158. */
  159. static int
  160. parse(l, ndx, check)
  161. char *l;
  162. int *ndx, check;
  163. {
  164. int i = 0, nd = *ndx;
  165. struct magic *m;
  166. char *t, *s;
  167. #define ALLOC_INCR 20
  168. if (nd+1 >= maxmagic){
  169. maxmagic += ALLOC_INCR;
  170. if ((magic = (struct magic *) realloc(magic,
  171. sizeof(struct magic) *
  172. maxmagic)) == NULL) {
  173. (void) fprintf(stderr, "%s: Out of memory.\n", progname);
  174. if (check)
  175. return -1;
  176. else
  177. exit(1);
  178. }
  179. memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
  180. }
  181. m = &magic[*ndx];
  182. m->flag = 0;
  183. m->cont_level = 0;
  184. while (*l == '>') {
  185. ++l; /* step over */
  186. m->cont_level++;
  187. }
  188. if (m->cont_level != 0 && *l == '(') {
  189. ++l; /* step over */
  190. m->flag |= INDIR;
  191. }
  192. if (m->cont_level != 0 && *l == '&') {
  193. ++l; /* step over */
  194. m->flag |= ADD;
  195. }
  196. /* get offset, then skip over it */
  197. m->offset = (int) strtoul(l,&t,0);
  198. if (l == t)
  199. magwarn("offset %s invalid", l);
  200. l = t;
  201. if (m->flag & INDIR) {
  202. m->in.type = LONG;
  203. m->in.offset = 0;
  204. /*
  205. * read [.lbs][+-]nnnnn)
  206. */
  207. if (*l == '.') {
  208. l++;
  209. switch (LOWCASE(*l)) {
  210. case 'l':
  211. m->in.type = LONG;
  212. break;
  213. case 'h':
  214. case 's':
  215. m->in.type = SHORT;
  216. break;
  217. case 'c':
  218. case 'b':
  219. m->in.type = BYTE;
  220. break;
  221. default:
  222. magwarn("indirect offset type %c invalid", *l);
  223. break;
  224. }
  225. l++;
  226. }
  227. s = l;
  228. if (*l == '+' || *l == '-') l++;
  229. if (isdigit((unsigned char)*l)) {
  230. m->in.offset = strtoul(l, &t, 0);
  231. if (*s == '-') m->in.offset = - m->in.offset;
  232. }
  233. else
  234. t = l;
  235. if (*t++ != ')')
  236. magwarn("missing ')' in indirect offset");
  237. l = t;
  238. }
  239. while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
  240. ++l;
  241. EATAB;
  242. #define NBYTE 4
  243. #define NSHORT 5
  244. #define NLONG 4
  245. #define NSTRING 6
  246. #define NDATE 4
  247. #define NBESHORT 7
  248. #define NBELONG 6
  249. #define NBEDATE 6
  250. #define NLESHORT 7
  251. #define NLELONG 6
  252. #define NLEDATE 6
  253. if (*l == 'u') {
  254. ++l;
  255. m->flag |= UNSIGNED;
  256. }
  257. /* get type, skip it */
  258. if (strncmp(l, "byte", NBYTE)==0) {
  259. m->type = BYTE;
  260. l += NBYTE;
  261. } else if (strncmp(l, "short", NSHORT)==0) {
  262. m->type = SHORT;
  263. l += NSHORT;
  264. } else if (strncmp(l, "long", NLONG)==0) {
  265. m->type = LONG;
  266. l += NLONG;
  267. } else if (strncmp(l, "string", NSTRING)==0) {
  268. m->type = STRING;
  269. l += NSTRING;
  270. } else if (strncmp(l, "date", NDATE)==0) {
  271. m->type = DATE;
  272. l += NDATE;
  273. } else if (strncmp(l, "beshort", NBESHORT)==0) {
  274. m->type = BESHORT;
  275. l += NBESHORT;
  276. } else if (strncmp(l, "belong", NBELONG)==0) {
  277. m->type = BELONG;
  278. l += NBELONG;
  279. } else if (strncmp(l, "bedate", NBEDATE)==0) {
  280. m->type = BEDATE;
  281. l += NBEDATE;
  282. } else if (strncmp(l, "leshort", NLESHORT)==0) {
  283. m->type = LESHORT;
  284. l += NLESHORT;
  285. } else if (strncmp(l, "lelong", NLELONG)==0) {
  286. m->type = LELONG;
  287. l += NLELONG;
  288. } else if (strncmp(l, "ledate", NLEDATE)==0) {
  289. m->type = LEDATE;
  290. l += NLEDATE;
  291. } else {
  292. magwarn("type %s invalid", l);
  293. return -1;
  294. }
  295. /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
  296. if (*l == '&') {
  297. ++l;
  298. m->mask = signextend(m, strtoul(l, &l, 0));
  299. eatsize(&l);
  300. } else
  301. m->mask = ~0L;
  302. EATAB;
  303. switch (*l) {
  304. case '>':
  305. case '<':
  306. /* Old-style anding: "0 byte &0x80 dynamically linked" */
  307. case '&':
  308. case '^':
  309. case '=':
  310. m->reln = *l;
  311. ++l;
  312. break;
  313. case '!':
  314. if (m->type != STRING) {
  315. m->reln = *l;
  316. ++l;
  317. break;
  318. }
  319. /* FALL THROUGH */
  320. default:
  321. if (*l == 'x' && isascii((unsigned char)l[1]) &&
  322. isspace((unsigned char)l[1])) {
  323. m->reln = *l;
  324. ++l;
  325. goto GetDesc; /* Bill The Cat */
  326. }
  327. m->reln = '=';
  328. break;
  329. }
  330. EATAB;
  331. if (getvalue(m, &l))
  332. return -1;
  333. /*
  334. * TODO finish this macro and start using it!
  335. * #define offsetcheck {if (offset > HOWMANY-1)
  336. * magwarn("offset too big"); }
  337. */
  338. /*
  339. * now get last part - the description
  340. */
  341. GetDesc:
  342. EATAB;
  343. if (l[0] == '\b') {
  344. ++l;
  345. m->nospflag = 1;
  346. } else if ((l[0] == '\\') && (l[1] == 'b')) {
  347. ++l;
  348. ++l;
  349. m->nospflag = 1;
  350. } else
  351. m->nospflag = 0;
  352. while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
  353. /* NULLBODY */;
  354. if (check) {
  355. mdump(m);
  356. }
  357. ++(*ndx); /* make room for next */
  358. return 0;
  359. }
  360. /*
  361. * Read a numeric value from a pointer, into the value union of a magic
  362. * pointer, according to the magic type. Update the string pointer to point
  363. * just after the number read. Return 0 for success, non-zero for failure.
  364. */
  365. static int
  366. getvalue(m, p)
  367. struct magic *m;
  368. char **p;
  369. {
  370. int slen;
  371. if (m->type == STRING) {
  372. *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
  373. m->vallen = slen;
  374. } else
  375. if (m->reln != 'x') {
  376. m->value.l = signextend(m, strtoul(*p, p, 0));
  377. eatsize(p);
  378. }
  379. return 0;
  380. }
  381. /*
  382. * Convert a string containing C character escapes. Stop at an unescaped
  383. * space or tab.
  384. * Copy the converted version to "p", returning its length in *slen.
  385. * Return updated scan pointer as function result.
  386. */
  387. static char *
  388. getstr(s, p, plen, slen)
  389. register char *s;
  390. register char *p;
  391. int plen, *slen;
  392. {
  393. char *origs = s, *origp = p;
  394. char *pmax = p + plen - 1;
  395. register int c;
  396. register int val;
  397. while ((c = *s++) != '\0') {
  398. if (isspace((unsigned char) c))
  399. break;
  400. if (p >= pmax) {
  401. fprintf(stderr, "String too long: %s\n", origs);
  402. break;
  403. }
  404. if(c == '\\') {
  405. switch(c = *s++) {
  406. case '\0':
  407. goto out;
  408. default:
  409. *p++ = (char) c;
  410. break;
  411. case 'n':
  412. *p++ = '\n';
  413. break;
  414. case 'r':
  415. *p++ = '\r';
  416. break;
  417. case 'b':
  418. *p++ = '\b';
  419. break;
  420. case 't':
  421. *p++ = '\t';
  422. break;
  423. case 'f':
  424. *p++ = '\f';
  425. break;
  426. case 'v':
  427. *p++ = '\v';
  428. break;
  429. /* \ and up to 3 octal digits */
  430. case '0':
  431. case '1':
  432. case '2':
  433. case '3':
  434. case '4':
  435. case '5':
  436. case '6':
  437. case '7':
  438. val = c - '0';
  439. c = *s++; /* try for 2 */
  440. if(c >= '0' && c <= '7') {
  441. val = (val<<3) | (c - '0');
  442. c = *s++; /* try for 3 */
  443. if(c >= '0' && c <= '7')
  444. val = (val<<3) | (c-'0');
  445. else
  446. --s;
  447. }
  448. else
  449. --s;
  450. *p++ = (char)val;
  451. break;
  452. /* \x and up to 2 hex digits */
  453. case 'x':
  454. val = 'x'; /* Default if no digits */
  455. c = hextoint(*s++); /* Get next char */
  456. if (c >= 0) {
  457. val = c;
  458. c = hextoint(*s++);
  459. if (c >= 0)
  460. val = (val << 4) + c;
  461. else
  462. --s;
  463. } else
  464. --s;
  465. *p++ = (char)val;
  466. break;
  467. }
  468. } else
  469. *p++ = (char)c;
  470. }
  471. out:
  472. *p = '\0';
  473. *slen = p - origp;
  474. return s;
  475. }
  476. /* Single hex char to int; -1 if not a hex char. */
  477. static int
  478. hextoint(c)
  479. int c;
  480. {
  481. if (!isascii((unsigned char) c)) return -1;
  482. if (isdigit((unsigned char) c)) return c - '0';
  483. if ((c>='a')&&(c<='f')) return c + 10 - 'a';
  484. if ((c>='A')&&(c<='F')) return c + 10 - 'A';
  485. return -1;
  486. }
  487. /*
  488. * Print a string containing C character escapes.
  489. */
  490. void
  491. showstr(fp, s, len)
  492. FILE *fp;
  493. const char *s;
  494. int len;
  495. {
  496. register char c;
  497. for (;;) {
  498. c = *s++;
  499. if (len == -1) {
  500. if (c == '\0')
  501. break;
  502. }
  503. else {
  504. if (len-- == 0)
  505. break;
  506. }
  507. if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
  508. (void) fputc(c, fp);
  509. else {
  510. (void) fputc('\\', fp);
  511. switch (c) {
  512. case '\n':
  513. (void) fputc('n', fp);
  514. break;
  515. case '\r':
  516. (void) fputc('r', fp);
  517. break;
  518. case '\b':
  519. (void) fputc('b', fp);
  520. break;
  521. case '\t':
  522. (void) fputc('t', fp);
  523. break;
  524. case '\f':
  525. (void) fputc('f', fp);
  526. break;
  527. case '\v':
  528. (void) fputc('v', fp);
  529. break;
  530. default:
  531. (void) fprintf(fp, "%.3o", c & 0377);
  532. break;
  533. }
  534. }
  535. }
  536. }
  537. /*
  538. * eatsize(): Eat the size spec from a number [eg. 10UL]
  539. */
  540. static void
  541. eatsize(p)
  542. char **p;
  543. {
  544. char *l = *p;
  545. if (LOWCASE(*l) == 'u')
  546. l++;
  547. switch (LOWCASE(*l)) {
  548. case 'l': /* long */
  549. case 's': /* short */
  550. case 'h': /* short */
  551. case 'b': /* char/byte */
  552. case 'c': /* char/byte */
  553. l++;
  554. /*FALLTHROUGH*/
  555. default:
  556. break;
  557. }
  558. *p = l;
  559. }