apprentice.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. /*
  2. * apprentice - make one pass through /etc/magic, learning its secrets.
  3. *
  4. * Copyright (c) Ian F. Darwin, 1987.
  5. * Written by Ian F. Darwin.
  6. *
  7. * This software is not subject to any license of the American Telephone
  8. * and Telegraph Company or of the Regents of the University of California.
  9. *
  10. * Permission is granted to anyone to use this software for any purpose on
  11. * any computer system, and to alter it and redistribute it freely, subject
  12. * to the following restrictions:
  13. *
  14. * 1. The author is not responsible for the consequences of use of this
  15. * software, no matter how awful, even if they arise from flaws in it.
  16. *
  17. * 2. The origin of this software must not be misrepresented, either by
  18. * explicit claim or by omission. Since few users ever read sources,
  19. * credits must appear in the documentation.
  20. *
  21. * 3. Altered versions must be plainly marked as such, and must not be
  22. * misrepresented as being the original software. Since few users
  23. * ever read sources, credits must appear in the documentation.
  24. *
  25. * 4. This notice may not be removed or altered.
  26. */
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <string.h>
  30. #include <ctype.h>
  31. #include <errno.h>
  32. #include "file.h"
  33. #ifndef lint
  34. FILE_RCSID("@(#)$Id: apprentice.c,v 1.29 1999/10/31 22:23:03 christos Exp $")
  35. #endif /* lint */
  36. #define EATAB {while (isascii((unsigned char) *l) && \
  37. isspace((unsigned char) *l)) ++l;}
  38. #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
  39. tolower((unsigned char) (l)) : (l))
  40. static int getvalue __P((struct magic *, char **));
  41. static int hextoint __P((int));
  42. static char *getstr __P((char *, char *, int, int *));
  43. static int parse __P((char *, int *, int));
  44. static void eatsize __P((char **));
  45. static int maxmagic = 0;
  46. static int apprentice_1 __P((const char *, int));
  47. int
  48. apprentice(fn, check)
  49. const char *fn; /* list of magic files */
  50. int check; /* non-zero? checking-only run. */
  51. {
  52. char *p, *mfn;
  53. int file_err, errs = -1;
  54. maxmagic = MAXMAGIS;
  55. magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
  56. mfn = malloc(strlen(fn)+1);
  57. if (magic == NULL || mfn == NULL) {
  58. (void) fprintf(stderr, "%s: Out of memory.\n", progname);
  59. if (check)
  60. return -1;
  61. else
  62. exit(1);
  63. }
  64. fn = strcpy(mfn, fn);
  65. while (fn) {
  66. p = strchr(fn, ':');
  67. if (p)
  68. *p++ = '\0';
  69. file_err = apprentice_1(fn, check);
  70. if (file_err > errs)
  71. errs = file_err;
  72. fn = p;
  73. }
  74. if (errs == -1)
  75. (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
  76. progname);
  77. if (!check && errs)
  78. exit(1);
  79. free(mfn);
  80. return errs;
  81. }
  82. static int
  83. apprentice_1(fn, check)
  84. const char *fn; /* name of magic file */
  85. int check; /* non-zero? checking-only run. */
  86. {
  87. static const char hdr[] =
  88. "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
  89. FILE *f;
  90. char line[BUFSIZ+1];
  91. int errs = 0;
  92. f = fopen(fn, "r");
  93. if (f==NULL) {
  94. if (errno != ENOENT)
  95. (void) fprintf(stderr,
  96. "%s: can't read magic file %s (%s)\n",
  97. progname, fn, strerror(errno));
  98. return -1;
  99. }
  100. /* parse it */
  101. if (check) /* print silly verbose header for USG compat. */
  102. (void) printf("%s\n", hdr);
  103. for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
  104. if (line[0]=='#') /* comment, do not parse */
  105. continue;
  106. if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
  107. continue;
  108. line[strlen(line)-1] = '\0'; /* delete newline */
  109. if (parse(line, &nmagic, check) != 0)
  110. errs = 1;
  111. }
  112. (void) fclose(f);
  113. return errs;
  114. }
  115. /*
  116. * extend the sign bit if the comparison is to be signed
  117. */
  118. uint32
  119. signextend(m, v)
  120. struct magic *m;
  121. uint32 v;
  122. {
  123. if (!(m->flag & UNSIGNED))
  124. switch(m->type) {
  125. /*
  126. * Do not remove the casts below. They are
  127. * vital. When later compared with the data,
  128. * the sign extension must have happened.
  129. */
  130. case BYTE:
  131. v = (char) v;
  132. break;
  133. case SHORT:
  134. case BESHORT:
  135. case LESHORT:
  136. v = (short) v;
  137. break;
  138. case DATE:
  139. case BEDATE:
  140. case LEDATE:
  141. case LONG:
  142. case BELONG:
  143. case LELONG:
  144. v = (int32) v;
  145. break;
  146. case STRING:
  147. break;
  148. default:
  149. magwarn("can't happen: m->type=%d\n",
  150. m->type);
  151. return -1;
  152. }
  153. return v;
  154. }
  155. /*
  156. * parse one line from magic file, put into magic[index++] if valid
  157. */
  158. static int
  159. parse(l, ndx, check)
  160. char *l;
  161. int *ndx, check;
  162. {
  163. int i = 0, nd = *ndx;
  164. struct magic *m;
  165. char *t, *s;
  166. #define ALLOC_INCR 200
  167. if (nd+1 >= maxmagic){
  168. maxmagic += ALLOC_INCR;
  169. if ((magic = (struct magic *) realloc(magic,
  170. sizeof(struct magic) *
  171. maxmagic)) == NULL) {
  172. (void) fprintf(stderr, "%s: Out of memory.\n", progname);
  173. if (check)
  174. return -1;
  175. else
  176. exit(1);
  177. }
  178. memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
  179. }
  180. m = &magic[*ndx];
  181. m->flag = 0;
  182. m->cont_level = 0;
  183. while (*l == '>') {
  184. ++l; /* step over */
  185. m->cont_level++;
  186. }
  187. if (m->cont_level != 0 && *l == '(') {
  188. ++l; /* step over */
  189. m->flag |= INDIR;
  190. }
  191. if (m->cont_level != 0 && *l == '&') {
  192. ++l; /* step over */
  193. m->flag |= ADD;
  194. }
  195. /* get offset, then skip over it */
  196. m->offset = (int) strtoul(l,&t,0);
  197. if (l == t)
  198. magwarn("offset %s invalid", l);
  199. l = t;
  200. if (m->flag & INDIR) {
  201. m->in.type = LONG;
  202. m->in.offset = 0;
  203. /*
  204. * read [.lbs][+-]nnnnn)
  205. */
  206. if (*l == '.') {
  207. l++;
  208. switch (*l) {
  209. case 'l':
  210. m->in.type = LELONG;
  211. break;
  212. case 'L':
  213. m->in.type = BELONG;
  214. break;
  215. case 'h':
  216. case 's':
  217. m->in.type = LESHORT;
  218. break;
  219. case 'H':
  220. case 'S':
  221. m->in.type = BESHORT;
  222. break;
  223. case 'c':
  224. case 'b':
  225. case 'C':
  226. case 'B':
  227. m->in.type = BYTE;
  228. break;
  229. default:
  230. magwarn("indirect offset type %c invalid", *l);
  231. break;
  232. }
  233. l++;
  234. }
  235. s = l;
  236. if (*l == '+' || *l == '-') l++;
  237. if (isdigit((unsigned char)*l)) {
  238. m->in.offset = strtoul(l, &t, 0);
  239. if (*s == '-') m->in.offset = - m->in.offset;
  240. }
  241. else
  242. t = l;
  243. if (*t++ != ')')
  244. magwarn("missing ')' in indirect offset");
  245. l = t;
  246. }
  247. while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
  248. ++l;
  249. EATAB;
  250. #define NBYTE 4
  251. #define NSHORT 5
  252. #define NLONG 4
  253. #define NSTRING 6
  254. #define NDATE 4
  255. #define NBESHORT 7
  256. #define NBELONG 6
  257. #define NBEDATE 6
  258. #define NLESHORT 7
  259. #define NLELONG 6
  260. #define NLEDATE 6
  261. if (*l == 'u') {
  262. ++l;
  263. m->flag |= UNSIGNED;
  264. }
  265. /* get type, skip it */
  266. if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
  267. m->type = BYTE;
  268. l += NBYTE;
  269. } else if (strncmp(l, "byte", NBYTE)==0) {
  270. m->type = BYTE;
  271. l += NBYTE;
  272. } else if (strncmp(l, "short", NSHORT)==0) {
  273. m->type = SHORT;
  274. l += NSHORT;
  275. } else if (strncmp(l, "long", NLONG)==0) {
  276. m->type = LONG;
  277. l += NLONG;
  278. } else if (strncmp(l, "string", NSTRING)==0) {
  279. m->type = STRING;
  280. l += NSTRING;
  281. } else if (strncmp(l, "date", NDATE)==0) {
  282. m->type = DATE;
  283. l += NDATE;
  284. } else if (strncmp(l, "beshort", NBESHORT)==0) {
  285. m->type = BESHORT;
  286. l += NBESHORT;
  287. } else if (strncmp(l, "belong", NBELONG)==0) {
  288. m->type = BELONG;
  289. l += NBELONG;
  290. } else if (strncmp(l, "bedate", NBEDATE)==0) {
  291. m->type = BEDATE;
  292. l += NBEDATE;
  293. } else if (strncmp(l, "leshort", NLESHORT)==0) {
  294. m->type = LESHORT;
  295. l += NLESHORT;
  296. } else if (strncmp(l, "lelong", NLELONG)==0) {
  297. m->type = LELONG;
  298. l += NLELONG;
  299. } else if (strncmp(l, "ledate", NLEDATE)==0) {
  300. m->type = LEDATE;
  301. l += NLEDATE;
  302. } else {
  303. magwarn("type %s invalid", l);
  304. return -1;
  305. }
  306. /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
  307. if (*l == '&') {
  308. ++l;
  309. m->mask = signextend(m, strtoul(l, &l, 0));
  310. eatsize(&l);
  311. } else
  312. m->mask = ~0L;
  313. EATAB;
  314. switch (*l) {
  315. case '>':
  316. case '<':
  317. /* Old-style anding: "0 byte &0x80 dynamically linked" */
  318. case '&':
  319. case '^':
  320. case '=':
  321. m->reln = *l;
  322. ++l;
  323. if (*l == '=') {
  324. /* HP compat: ignore &= etc. */
  325. ++l;
  326. }
  327. break;
  328. case '!':
  329. if (m->type != STRING) {
  330. m->reln = *l;
  331. ++l;
  332. break;
  333. }
  334. /* FALL THROUGH */
  335. default:
  336. if (*l == 'x' && isascii((unsigned char)l[1]) &&
  337. isspace((unsigned char)l[1])) {
  338. m->reln = *l;
  339. ++l;
  340. goto GetDesc; /* Bill The Cat */
  341. }
  342. m->reln = '=';
  343. break;
  344. }
  345. EATAB;
  346. if (getvalue(m, &l))
  347. return -1;
  348. /*
  349. * TODO finish this macro and start using it!
  350. * #define offsetcheck {if (offset > HOWMANY-1)
  351. * magwarn("offset too big"); }
  352. */
  353. /*
  354. * now get last part - the description
  355. */
  356. GetDesc:
  357. EATAB;
  358. if (l[0] == '\b') {
  359. ++l;
  360. m->nospflag = 1;
  361. } else if ((l[0] == '\\') && (l[1] == 'b')) {
  362. ++l;
  363. ++l;
  364. m->nospflag = 1;
  365. } else
  366. m->nospflag = 0;
  367. while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
  368. /* NULLBODY */;
  369. if (check) {
  370. mdump(m);
  371. }
  372. ++(*ndx); /* make room for next */
  373. return 0;
  374. }
  375. /*
  376. * Read a numeric value from a pointer, into the value union of a magic
  377. * pointer, according to the magic type. Update the string pointer to point
  378. * just after the number read. Return 0 for success, non-zero for failure.
  379. */
  380. static int
  381. getvalue(m, p)
  382. struct magic *m;
  383. char **p;
  384. {
  385. int slen;
  386. if (m->type == STRING) {
  387. *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
  388. m->vallen = slen;
  389. } else
  390. if (m->reln != 'x') {
  391. m->value.l = signextend(m, strtoul(*p, p, 0));
  392. eatsize(p);
  393. }
  394. return 0;
  395. }
  396. /*
  397. * Convert a string containing C character escapes. Stop at an unescaped
  398. * space or tab.
  399. * Copy the converted version to "p", returning its length in *slen.
  400. * Return updated scan pointer as function result.
  401. */
  402. static char *
  403. getstr(s, p, plen, slen)
  404. register char *s;
  405. register char *p;
  406. int plen, *slen;
  407. {
  408. char *origs = s, *origp = p;
  409. char *pmax = p + plen - 1;
  410. register int c;
  411. register int val;
  412. while ((c = *s++) != '\0') {
  413. if (isspace((unsigned char) c))
  414. break;
  415. if (p >= pmax) {
  416. fprintf(stderr, "String too long: %s\n", origs);
  417. break;
  418. }
  419. if(c == '\\') {
  420. switch(c = *s++) {
  421. case '\0':
  422. goto out;
  423. default:
  424. *p++ = (char) c;
  425. break;
  426. case 'n':
  427. *p++ = '\n';
  428. break;
  429. case 'r':
  430. *p++ = '\r';
  431. break;
  432. case 'b':
  433. *p++ = '\b';
  434. break;
  435. case 't':
  436. *p++ = '\t';
  437. break;
  438. case 'f':
  439. *p++ = '\f';
  440. break;
  441. case 'v':
  442. *p++ = '\v';
  443. break;
  444. /* \ and up to 3 octal digits */
  445. case '0':
  446. case '1':
  447. case '2':
  448. case '3':
  449. case '4':
  450. case '5':
  451. case '6':
  452. case '7':
  453. val = c - '0';
  454. c = *s++; /* try for 2 */
  455. if(c >= '0' && c <= '7') {
  456. val = (val<<3) | (c - '0');
  457. c = *s++; /* try for 3 */
  458. if(c >= '0' && c <= '7')
  459. val = (val<<3) | (c-'0');
  460. else
  461. --s;
  462. }
  463. else
  464. --s;
  465. *p++ = (char)val;
  466. break;
  467. /* \x and up to 2 hex digits */
  468. case 'x':
  469. val = 'x'; /* Default if no digits */
  470. c = hextoint(*s++); /* Get next char */
  471. if (c >= 0) {
  472. val = c;
  473. c = hextoint(*s++);
  474. if (c >= 0)
  475. val = (val << 4) + c;
  476. else
  477. --s;
  478. } else
  479. --s;
  480. *p++ = (char)val;
  481. break;
  482. }
  483. } else
  484. *p++ = (char)c;
  485. }
  486. out:
  487. *p = '\0';
  488. *slen = p - origp;
  489. return s;
  490. }
  491. /* Single hex char to int; -1 if not a hex char. */
  492. static int
  493. hextoint(c)
  494. int c;
  495. {
  496. if (!isascii((unsigned char) c)) return -1;
  497. if (isdigit((unsigned char) c)) return c - '0';
  498. if ((c>='a')&&(c<='f')) return c + 10 - 'a';
  499. if ((c>='A')&&(c<='F')) return c + 10 - 'A';
  500. return -1;
  501. }
  502. /*
  503. * Print a string containing C character escapes.
  504. */
  505. void
  506. showstr(fp, s, len)
  507. FILE *fp;
  508. const char *s;
  509. int len;
  510. {
  511. register char c;
  512. for (;;) {
  513. c = *s++;
  514. if (len == -1) {
  515. if (c == '\0')
  516. break;
  517. }
  518. else {
  519. if (len-- == 0)
  520. break;
  521. }
  522. if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
  523. (void) fputc(c, fp);
  524. else {
  525. (void) fputc('\\', fp);
  526. switch (c) {
  527. case '\n':
  528. (void) fputc('n', fp);
  529. break;
  530. case '\r':
  531. (void) fputc('r', fp);
  532. break;
  533. case '\b':
  534. (void) fputc('b', fp);
  535. break;
  536. case '\t':
  537. (void) fputc('t', fp);
  538. break;
  539. case '\f':
  540. (void) fputc('f', fp);
  541. break;
  542. case '\v':
  543. (void) fputc('v', fp);
  544. break;
  545. default:
  546. (void) fprintf(fp, "%.3o", c & 0377);
  547. break;
  548. }
  549. }
  550. }
  551. }
  552. /*
  553. * eatsize(): Eat the size spec from a number [eg. 10UL]
  554. */
  555. static void
  556. eatsize(p)
  557. char **p;
  558. {
  559. char *l = *p;
  560. if (LOWCASE(*l) == 'u')
  561. l++;
  562. switch (LOWCASE(*l)) {
  563. case 'l': /* long */
  564. case 's': /* short */
  565. case 'h': /* short */
  566. case 'b': /* char/byte */
  567. case 'c': /* char/byte */
  568. l++;
  569. /*FALLTHROUGH*/
  570. default:
  571. break;
  572. }
  573. *p = l;
  574. }