apprentice.c 22 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. /*
  2. * apprentice - make one pass through /etc/magic, learning its secrets.
  3. *
  4. * Copyright (c) Ian F. Darwin, 1987.
  5. * Written by Ian F. Darwin.
  6. *
  7. * This software is not subject to any license of the American Telephone
  8. * and Telegraph Company or of the Regents of the University of California.
  9. *
  10. * Permission is granted to anyone to use this software for any purpose on
  11. * any computer system, and to alter it and redistribute it freely, subject
  12. * to the following restrictions:
  13. *
  14. * 1. The author is not responsible for the consequences of use of this
  15. * software, no matter how awful, even if they arise from flaws in it.
  16. *
  17. * 2. The origin of this software must not be misrepresented, either by
  18. * explicit claim or by omission. Since few users ever read sources,
  19. * credits must appear in the documentation.
  20. *
  21. * 3. Altered versions must be plainly marked as such, and must not be
  22. * misrepresented as being the original software. Since few users
  23. * ever read sources, credits must appear in the documentation.
  24. *
  25. * 4. This notice may not be removed or altered.
  26. */
  27. #include "file.h"
  28. #include <stdio.h>
  29. #include <stdlib.h>
  30. #ifdef HAVE_UNISTD_H
  31. #include <unistd.h>
  32. #endif
  33. #include <string.h>
  34. #include <ctype.h>
  35. #include <errno.h>
  36. #ifdef QUICK
  37. #include <fcntl.h>
  38. #include <sys/types.h>
  39. #include <sys/stat.h>
  40. #include <sys/mman.h>
  41. #endif
  42. #ifndef lint
  43. FILE_RCSID("@(#)$Id: apprentice.c,v 1.44 2001/08/01 14:03:19 christos Exp $")
  44. #endif /* lint */
  45. #define EATAB {while (isascii((unsigned char) *l) && \
  46. isspace((unsigned char) *l)) ++l;}
  47. #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
  48. tolower((unsigned char) (l)) : (l))
  49. /*
  50. * Work around a bug in headers on Digital Unix.
  51. * At least confirmed for: OSF1 V4.0 878
  52. */
  53. #if defined(__osf__) && defined(__DECC)
  54. #ifdef MAP_FAILED
  55. #undef MAP_FAILED
  56. #endif
  57. #endif
  58. #ifndef MAP_FAILED
  59. #define MAP_FAILED (void *) -1
  60. #endif
  61. #ifndef MAP_FILE
  62. #define MAP_FILE 0
  63. #endif
  64. #ifdef __EMX__
  65. char PATHSEP=';';
  66. #else
  67. char PATHSEP=':';
  68. #endif
  69. static int getvalue __P((struct magic *, char **));
  70. static int hextoint __P((int));
  71. static char *getstr __P((char *, char *, int, int *));
  72. static int parse __P((struct magic **, uint32 *, char *, int));
  73. static void eatsize __P((char **));
  74. static int apprentice_1 __P((const char *, int));
  75. static int apprentice_file __P((struct magic **, uint32 *,
  76. const char *, int));
  77. static void byteswap __P((struct magic *, uint32));
  78. static void bs1 __P((struct magic *));
  79. static uint16 swap2 __P((uint16));
  80. static uint32 swap4 __P((uint32));
  81. static char *mkdbname __P((const char *));
  82. static int apprentice_map __P((struct magic **, uint32 *,
  83. const char *, int));
  84. static int apprentice_compile __P((struct magic **, uint32 *,
  85. const char *, int));
  86. static int maxmagic = 0;
  87. struct mlist mlist;
  88. #ifdef COMPILE_ONLY
  89. const char *magicfile;
  90. char *progname;
  91. int lineno;
  92. int main __P((int, char *[]));
  93. int
  94. main(argc, argv)
  95. int argc;
  96. char *argv[];
  97. {
  98. int ret;
  99. if ((progname = strrchr(argv[0], '/')) != NULL)
  100. progname++;
  101. else
  102. progname = argv[0];
  103. if (argc != 2) {
  104. (void)fprintf(stderr, "usage: %s file\n", progname);
  105. exit(1);
  106. }
  107. magicfile = argv[1];
  108. exit(apprentice(magicfile, COMPILE));
  109. }
  110. #endif /* COMPILE_ONLY */
  111. /*
  112. * Handle one file.
  113. */
  114. static int
  115. apprentice_1(fn, action)
  116. const char *fn;
  117. int action;
  118. {
  119. struct magic *magic = NULL;
  120. uint32 nmagic = 0;
  121. struct mlist *ml;
  122. int rv = -1;
  123. if (action == COMPILE) {
  124. rv = apprentice_file(&magic, &nmagic, fn, action);
  125. if (rv == 0)
  126. return apprentice_compile(&magic, &nmagic, fn, action);
  127. else
  128. return rv;
  129. }
  130. #ifndef COMPILE_ONLY
  131. if ((rv = apprentice_map(&magic, &nmagic, fn, action)) != 0)
  132. (void)fprintf(stderr, "%s: Using regular magic file `%s'\n",
  133. progname, fn);
  134. if (rv != 0)
  135. rv = apprentice_file(&magic, &nmagic, fn, action);
  136. if (rv != 0)
  137. return rv;
  138. if ((ml = malloc(sizeof(*ml))) == NULL) {
  139. (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
  140. strerror(errno));
  141. if (action == CHECK)
  142. return -1;
  143. }
  144. if (magic == NULL || nmagic == 0)
  145. return rv;
  146. ml->magic = magic;
  147. ml->nmagic = nmagic;
  148. mlist.prev->next = ml;
  149. ml->prev = mlist.prev;
  150. ml->next = &mlist;
  151. mlist.prev = ml;
  152. return rv;
  153. #endif /* COMPILE_ONLY */
  154. }
  155. int
  156. apprentice(fn, action)
  157. const char *fn; /* list of magic files */
  158. int action;
  159. {
  160. char *p, *mfn;
  161. int file_err, errs = -1;
  162. mlist.next = mlist.prev = &mlist;
  163. mfn = malloc(strlen(fn)+1);
  164. if (mfn == NULL) {
  165. (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
  166. strerror(errno));
  167. if (action == CHECK)
  168. return -1;
  169. else
  170. exit(1);
  171. }
  172. fn = strcpy(mfn, fn);
  173. while (fn) {
  174. p = strchr(fn, PATHSEP);
  175. if (p)
  176. *p++ = '\0';
  177. file_err = apprentice_1(fn, action);
  178. if (file_err > errs)
  179. errs = file_err;
  180. fn = p;
  181. }
  182. if (errs == -1)
  183. (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
  184. progname);
  185. if (action == CHECK && errs)
  186. exit(1);
  187. free(mfn);
  188. return errs;
  189. }
  190. /*
  191. * parse from a file
  192. */
  193. static int
  194. apprentice_file(magicp, nmagicp, fn, action)
  195. struct magic **magicp;
  196. uint32 *nmagicp;
  197. const char *fn; /* name of magic file */
  198. int action;
  199. {
  200. static const char hdr[] =
  201. "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
  202. FILE *f;
  203. char line[BUFSIZ+1];
  204. int errs = 0;
  205. f = fopen(fn, "r");
  206. if (f == NULL) {
  207. if (errno != ENOENT)
  208. (void) fprintf(stderr,
  209. "%s: can't read magic file %s (%s)\n",
  210. progname, fn, strerror(errno));
  211. return -1;
  212. }
  213. maxmagic = MAXMAGIS;
  214. *magicp = (struct magic *) calloc(sizeof(struct magic), maxmagic);
  215. if (*magicp == NULL) {
  216. (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
  217. strerror(errno));
  218. if (action == CHECK)
  219. return -1;
  220. }
  221. /* parse it */
  222. if (action == CHECK) /* print silly verbose header for USG compat. */
  223. (void) printf("%s\n", hdr);
  224. for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
  225. if (line[0]=='#') /* comment, do not parse */
  226. continue;
  227. if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
  228. continue;
  229. line[strlen(line)-1] = '\0'; /* delete newline */
  230. if (parse(magicp, nmagicp, line, action) != 0)
  231. errs = 1;
  232. }
  233. (void) fclose(f);
  234. if (errs) {
  235. free(*magicp);
  236. *magicp = NULL;
  237. *nmagicp = 0;
  238. }
  239. return errs;
  240. }
  241. /*
  242. * extend the sign bit if the comparison is to be signed
  243. */
  244. uint32
  245. signextend(m, v)
  246. struct magic *m;
  247. uint32 v;
  248. {
  249. if (!(m->flag & UNSIGNED))
  250. switch(m->type) {
  251. /*
  252. * Do not remove the casts below. They are
  253. * vital. When later compared with the data,
  254. * the sign extension must have happened.
  255. */
  256. case BYTE:
  257. v = (char) v;
  258. break;
  259. case SHORT:
  260. case BESHORT:
  261. case LESHORT:
  262. v = (short) v;
  263. break;
  264. case DATE:
  265. case BEDATE:
  266. case LEDATE:
  267. case LDATE:
  268. case BELDATE:
  269. case LELDATE:
  270. case LONG:
  271. case BELONG:
  272. case LELONG:
  273. v = (int32) v;
  274. break;
  275. case STRING:
  276. case PSTRING:
  277. break;
  278. default:
  279. magwarn("can't happen: m->type=%d\n",
  280. m->type);
  281. return -1;
  282. }
  283. return v;
  284. }
  285. /*
  286. * parse one line from magic file, put into magic[index++] if valid
  287. */
  288. static int
  289. parse(magicp, nmagicp, l, action)
  290. struct magic **magicp;
  291. uint32 *nmagicp;
  292. char *l;
  293. int action;
  294. {
  295. int i = 0;
  296. struct magic *m;
  297. char *t;
  298. #define ALLOC_INCR 200
  299. if (*nmagicp + 1 >= maxmagic){
  300. maxmagic += ALLOC_INCR;
  301. if ((m = (struct magic *) realloc(*magicp,
  302. sizeof(struct magic) * maxmagic)) == NULL) {
  303. (void) fprintf(stderr, "%s: Out of memory (%s).\n",
  304. progname, strerror(errno));
  305. if (*magicp)
  306. free(*magicp);
  307. if (action == CHECK)
  308. return -1;
  309. else
  310. exit(1);
  311. }
  312. *magicp = m;
  313. memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
  314. * ALLOC_INCR);
  315. }
  316. m = &(*magicp)[*nmagicp];
  317. m->flag = 0;
  318. m->cont_level = 0;
  319. while (*l == '>') {
  320. ++l; /* step over */
  321. m->cont_level++;
  322. }
  323. if (m->cont_level != 0 && *l == '(') {
  324. ++l; /* step over */
  325. m->flag |= INDIR;
  326. }
  327. if (m->cont_level != 0 && *l == '&') {
  328. ++l; /* step over */
  329. m->flag |= OFFADD;
  330. }
  331. /* get offset, then skip over it */
  332. m->offset = (int) strtoul(l,&t,0);
  333. if (l == t)
  334. magwarn("offset %s invalid", l);
  335. l = t;
  336. if (m->flag & INDIR) {
  337. m->in_type = LONG;
  338. m->in_offset = 0;
  339. /*
  340. * read [.lbs][+-]nnnnn)
  341. */
  342. if (*l == '.') {
  343. l++;
  344. switch (*l) {
  345. case 'l':
  346. m->in_type = LELONG;
  347. break;
  348. case 'L':
  349. m->in_type = BELONG;
  350. break;
  351. case 'h':
  352. case 's':
  353. m->in_type = LESHORT;
  354. break;
  355. case 'H':
  356. case 'S':
  357. m->in_type = BESHORT;
  358. break;
  359. case 'c':
  360. case 'b':
  361. case 'C':
  362. case 'B':
  363. m->in_type = BYTE;
  364. break;
  365. default:
  366. magwarn("indirect offset type %c invalid", *l);
  367. break;
  368. }
  369. l++;
  370. }
  371. if (*l == '~') {
  372. m->in_op = OPINVERSE;
  373. l++;
  374. }
  375. switch (*l) {
  376. case '&':
  377. m->in_op |= OPAND;
  378. l++;
  379. break;
  380. case '|':
  381. m->in_op |= OPOR;
  382. l++;
  383. break;
  384. case '^':
  385. m->in_op |= OPXOR;
  386. l++;
  387. break;
  388. case '+':
  389. m->in_op |= OPADD;
  390. l++;
  391. break;
  392. case '-':
  393. m->in_op |= OPMINUS;
  394. l++;
  395. break;
  396. case '*':
  397. m->in_op |= OPMULTIPLY;
  398. l++;
  399. break;
  400. case '/':
  401. m->in_op |= OPDIVIDE;
  402. l++;
  403. break;
  404. case '%':
  405. m->in_op |= OPMODULO;
  406. l++;
  407. break;
  408. }
  409. if (isdigit((unsigned char)*l))
  410. m->in_offset = strtoul(l, &t, 0);
  411. else
  412. t = l;
  413. if (*t++ != ')')
  414. magwarn("missing ')' in indirect offset");
  415. l = t;
  416. }
  417. while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
  418. ++l;
  419. EATAB;
  420. #define NBYTE 4
  421. #define NSHORT 5
  422. #define NLONG 4
  423. #define NSTRING 6
  424. #define NDATE 4
  425. #define NBESHORT 7
  426. #define NBELONG 6
  427. #define NBEDATE 6
  428. #define NLESHORT 7
  429. #define NLELONG 6
  430. #define NLEDATE 6
  431. #define NPSTRING 7
  432. #define NLDATE 5
  433. #define NBELDATE 7
  434. #define NLELDATE 7
  435. if (*l == 'u') {
  436. ++l;
  437. m->flag |= UNSIGNED;
  438. }
  439. /* get type, skip it */
  440. if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
  441. m->type = BYTE;
  442. l += NBYTE;
  443. } else if (strncmp(l, "byte", NBYTE)==0) {
  444. m->type = BYTE;
  445. l += NBYTE;
  446. } else if (strncmp(l, "short", NSHORT)==0) {
  447. m->type = SHORT;
  448. l += NSHORT;
  449. } else if (strncmp(l, "long", NLONG)==0) {
  450. m->type = LONG;
  451. l += NLONG;
  452. } else if (strncmp(l, "string", NSTRING)==0) {
  453. m->type = STRING;
  454. l += NSTRING;
  455. } else if (strncmp(l, "date", NDATE)==0) {
  456. m->type = DATE;
  457. l += NDATE;
  458. } else if (strncmp(l, "beshort", NBESHORT)==0) {
  459. m->type = BESHORT;
  460. l += NBESHORT;
  461. } else if (strncmp(l, "belong", NBELONG)==0) {
  462. m->type = BELONG;
  463. l += NBELONG;
  464. } else if (strncmp(l, "bedate", NBEDATE)==0) {
  465. m->type = BEDATE;
  466. l += NBEDATE;
  467. } else if (strncmp(l, "leshort", NLESHORT)==0) {
  468. m->type = LESHORT;
  469. l += NLESHORT;
  470. } else if (strncmp(l, "lelong", NLELONG)==0) {
  471. m->type = LELONG;
  472. l += NLELONG;
  473. } else if (strncmp(l, "ledate", NLEDATE)==0) {
  474. m->type = LEDATE;
  475. l += NLEDATE;
  476. } else if (strncmp(l, "pstring", NPSTRING)==0) {
  477. m->type = PSTRING;
  478. l += NPSTRING;
  479. } else if (strncmp(l, "ldate", NLDATE)==0) {
  480. m->type = LDATE;
  481. l += NLDATE;
  482. } else if (strncmp(l, "beldate", NBELDATE)==0) {
  483. m->type = BELDATE;
  484. l += NBELDATE;
  485. } else if (strncmp(l, "leldate", NLELDATE)==0) {
  486. m->type = LELDATE;
  487. l += NLELDATE;
  488. } else {
  489. magwarn("type %s invalid", l);
  490. return -1;
  491. }
  492. /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
  493. /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
  494. if (*l == '~') {
  495. if (STRING != m->type && PSTRING != m->type)
  496. m->mask_op = OPINVERSE;
  497. ++l;
  498. }
  499. switch (*l) {
  500. case '&':
  501. m->mask_op |= OPAND;
  502. ++l;
  503. m->mask = signextend(m, strtoul(l, &l, 0));
  504. eatsize(&l);
  505. break;
  506. case '|':
  507. m->mask_op |= OPOR;
  508. ++l;
  509. m->mask = signextend(m, strtoul(l, &l, 0));
  510. eatsize(&l);
  511. break;
  512. case '^':
  513. m->mask_op |= OPXOR;
  514. ++l;
  515. m->mask = signextend(m, strtoul(l, &l, 0));
  516. eatsize(&l);
  517. break;
  518. case '+':
  519. m->mask_op |= OPADD;
  520. ++l;
  521. m->mask = signextend(m, strtoul(l, &l, 0));
  522. eatsize(&l);
  523. break;
  524. case '-':
  525. m->mask_op |= OPMINUS;
  526. ++l;
  527. m->mask = signextend(m, strtoul(l, &l, 0));
  528. eatsize(&l);
  529. break;
  530. case '*':
  531. m->mask_op |= OPMULTIPLY;
  532. ++l;
  533. m->mask = signextend(m, strtoul(l, &l, 0));
  534. eatsize(&l);
  535. break;
  536. case '%':
  537. m->mask_op |= OPMODULO;
  538. ++l;
  539. m->mask = signextend(m, strtoul(l, &l, 0));
  540. eatsize(&l);
  541. break;
  542. case '/':
  543. if (STRING != m->type && PSTRING != m->type) {
  544. m->mask_op |= OPDIVIDE;
  545. ++l;
  546. m->mask = signextend(m, strtoul(l, &l, 0));
  547. eatsize(&l);
  548. } else {
  549. m->mask = 0L;
  550. while (!isspace(*++l)) {
  551. switch (*l) {
  552. case CHAR_IGNORE_LOWERCASE:
  553. m->mask |= STRING_IGNORE_LOWERCASE;
  554. break;
  555. case CHAR_COMPACT_BLANK:
  556. m->mask |= STRING_COMPACT_BLANK;
  557. break;
  558. case CHAR_COMPACT_OPTIONAL_BLANK:
  559. m->mask |=
  560. STRING_COMPACT_OPTIONAL_BLANK;
  561. break;
  562. default:
  563. magwarn("string extension %c invalid",
  564. *l);
  565. return -1;
  566. }
  567. }
  568. }
  569. break;
  570. }
  571. /* We used to set mask to all 1's here, instead let's just not do anything
  572. if mask = 0 (unless you have a better idea) */
  573. EATAB;
  574. switch (*l) {
  575. case '>':
  576. case '<':
  577. /* Old-style anding: "0 byte &0x80 dynamically linked" */
  578. case '&':
  579. case '^':
  580. case '=':
  581. m->reln = *l;
  582. ++l;
  583. if (*l == '=') {
  584. /* HP compat: ignore &= etc. */
  585. ++l;
  586. }
  587. break;
  588. case '!':
  589. if (m->type != STRING && m->type != PSTRING) {
  590. m->reln = *l;
  591. ++l;
  592. break;
  593. }
  594. /* FALL THROUGH */
  595. default:
  596. if (*l == 'x' && isascii((unsigned char)l[1]) &&
  597. isspace((unsigned char)l[1])) {
  598. m->reln = *l;
  599. ++l;
  600. goto GetDesc; /* Bill The Cat */
  601. }
  602. m->reln = '=';
  603. break;
  604. }
  605. EATAB;
  606. if (getvalue(m, &l))
  607. return -1;
  608. /*
  609. * TODO finish this macro and start using it!
  610. * #define offsetcheck {if (offset > HOWMANY-1)
  611. * magwarn("offset too big"); }
  612. */
  613. /*
  614. * now get last part - the description
  615. */
  616. GetDesc:
  617. EATAB;
  618. if (l[0] == '\b') {
  619. ++l;
  620. m->nospflag = 1;
  621. } else if ((l[0] == '\\') && (l[1] == 'b')) {
  622. ++l;
  623. ++l;
  624. m->nospflag = 1;
  625. } else
  626. m->nospflag = 0;
  627. while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
  628. /* NULLBODY */;
  629. if (action == CHECK) {
  630. mdump(m);
  631. }
  632. ++(*nmagicp); /* make room for next */
  633. return 0;
  634. }
  635. /*
  636. * Read a numeric value from a pointer, into the value union of a magic
  637. * pointer, according to the magic type. Update the string pointer to point
  638. * just after the number read. Return 0 for success, non-zero for failure.
  639. */
  640. static int
  641. getvalue(m, p)
  642. struct magic *m;
  643. char **p;
  644. {
  645. int slen;
  646. if (m->type == STRING || m->type == PSTRING) {
  647. *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
  648. m->vallen = slen;
  649. } else
  650. if (m->reln != 'x') {
  651. m->value.l = signextend(m, strtoul(*p, p, 0));
  652. eatsize(p);
  653. }
  654. return 0;
  655. }
  656. /*
  657. * Convert a string containing C character escapes. Stop at an unescaped
  658. * space or tab.
  659. * Copy the converted version to "p", returning its length in *slen.
  660. * Return updated scan pointer as function result.
  661. */
  662. static char *
  663. getstr(s, p, plen, slen)
  664. char *s;
  665. char *p;
  666. int plen, *slen;
  667. {
  668. char *origs = s, *origp = p;
  669. char *pmax = p + plen - 1;
  670. int c;
  671. int val;
  672. while ((c = *s++) != '\0') {
  673. if (isspace((unsigned char) c))
  674. break;
  675. if (p >= pmax) {
  676. fprintf(stderr, "String too long: %s\n", origs);
  677. break;
  678. }
  679. if(c == '\\') {
  680. switch(c = *s++) {
  681. case '\0':
  682. goto out;
  683. default:
  684. *p++ = (char) c;
  685. break;
  686. case 'n':
  687. *p++ = '\n';
  688. break;
  689. case 'r':
  690. *p++ = '\r';
  691. break;
  692. case 'b':
  693. *p++ = '\b';
  694. break;
  695. case 't':
  696. *p++ = '\t';
  697. break;
  698. case 'f':
  699. *p++ = '\f';
  700. break;
  701. case 'v':
  702. *p++ = '\v';
  703. break;
  704. /* \ and up to 3 octal digits */
  705. case '0':
  706. case '1':
  707. case '2':
  708. case '3':
  709. case '4':
  710. case '5':
  711. case '6':
  712. case '7':
  713. val = c - '0';
  714. c = *s++; /* try for 2 */
  715. if(c >= '0' && c <= '7') {
  716. val = (val<<3) | (c - '0');
  717. c = *s++; /* try for 3 */
  718. if(c >= '0' && c <= '7')
  719. val = (val<<3) | (c-'0');
  720. else
  721. --s;
  722. }
  723. else
  724. --s;
  725. *p++ = (char)val;
  726. break;
  727. /* \x and up to 2 hex digits */
  728. case 'x':
  729. val = 'x'; /* Default if no digits */
  730. c = hextoint(*s++); /* Get next char */
  731. if (c >= 0) {
  732. val = c;
  733. c = hextoint(*s++);
  734. if (c >= 0)
  735. val = (val << 4) + c;
  736. else
  737. --s;
  738. } else
  739. --s;
  740. *p++ = (char)val;
  741. break;
  742. }
  743. } else
  744. *p++ = (char)c;
  745. }
  746. out:
  747. *p = '\0';
  748. *slen = p - origp;
  749. return s;
  750. }
  751. /* Single hex char to int; -1 if not a hex char. */
  752. static int
  753. hextoint(c)
  754. int c;
  755. {
  756. if (!isascii((unsigned char) c))
  757. return -1;
  758. if (isdigit((unsigned char) c))
  759. return c - '0';
  760. if ((c >= 'a')&&(c <= 'f'))
  761. return c + 10 - 'a';
  762. if (( c>= 'A')&&(c <= 'F'))
  763. return c + 10 - 'A';
  764. return -1;
  765. }
  766. /*
  767. * Print a string containing C character escapes.
  768. */
  769. void
  770. showstr(fp, s, len)
  771. FILE *fp;
  772. const char *s;
  773. int len;
  774. {
  775. char c;
  776. for (;;) {
  777. c = *s++;
  778. if (len == -1) {
  779. if (c == '\0')
  780. break;
  781. }
  782. else {
  783. if (len-- == 0)
  784. break;
  785. }
  786. if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
  787. (void) fputc(c, fp);
  788. else {
  789. (void) fputc('\\', fp);
  790. switch (c) {
  791. case '\n':
  792. (void) fputc('n', fp);
  793. break;
  794. case '\r':
  795. (void) fputc('r', fp);
  796. break;
  797. case '\b':
  798. (void) fputc('b', fp);
  799. break;
  800. case '\t':
  801. (void) fputc('t', fp);
  802. break;
  803. case '\f':
  804. (void) fputc('f', fp);
  805. break;
  806. case '\v':
  807. (void) fputc('v', fp);
  808. break;
  809. default:
  810. (void) fprintf(fp, "%.3o", c & 0377);
  811. break;
  812. }
  813. }
  814. }
  815. }
  816. /*
  817. * eatsize(): Eat the size spec from a number [eg. 10UL]
  818. */
  819. static void
  820. eatsize(p)
  821. char **p;
  822. {
  823. char *l = *p;
  824. if (LOWCASE(*l) == 'u')
  825. l++;
  826. switch (LOWCASE(*l)) {
  827. case 'l': /* long */
  828. case 's': /* short */
  829. case 'h': /* short */
  830. case 'b': /* char/byte */
  831. case 'c': /* char/byte */
  832. l++;
  833. /*FALLTHROUGH*/
  834. default:
  835. break;
  836. }
  837. *p = l;
  838. }
  839. /*
  840. * handle an mmaped file.
  841. */
  842. static int
  843. apprentice_map(magicp, nmagicp, fn, action)
  844. struct magic **magicp;
  845. uint32 *nmagicp;
  846. const char *fn;
  847. int action;
  848. {
  849. int fd;
  850. struct stat st;
  851. uint32 *ptr;
  852. uint32 version;
  853. int needsbyteswap;
  854. char *dbname = mkdbname(fn);
  855. if (dbname == NULL)
  856. return -1;
  857. if ((fd = open(dbname, O_RDONLY)) == -1)
  858. return -1;
  859. if (fstat(fd, &st) == -1) {
  860. (void)fprintf(stderr, "%s: Cannot stat `%s' (%s)\n",
  861. progname, dbname, strerror(errno));
  862. goto error;
  863. }
  864. #ifdef QUICK
  865. if ((*magicp = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
  866. MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
  867. (void)fprintf(stderr, "%s: Cannot map `%s' (%s)\n",
  868. progname, dbname, strerror(errno));
  869. goto error;
  870. }
  871. #else
  872. if ((*magicp = malloc((size_t)st.st_size)) == NULL) {
  873. (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
  874. strerror(errno));
  875. goto error;
  876. }
  877. if (read(fd, *magicp, (size_t)st.st_size) != (size_t)st.st_size) {
  878. (void) fprintf(stderr, "%s: Read failed (%s).\n", progname,
  879. strerror(errno));
  880. goto error;
  881. }
  882. #endif
  883. (void)close(fd);
  884. fd = -1;
  885. ptr = (uint32 *) *magicp;
  886. if (*ptr != MAGICNO) {
  887. if (swap4(*ptr) != MAGICNO) {
  888. (void)fprintf(stderr, "%s: Bad magic in `%s'\n",
  889. progname, dbname);
  890. goto error;
  891. }
  892. needsbyteswap = 1;
  893. } else
  894. needsbyteswap = 0;
  895. if (needsbyteswap)
  896. version = swap4(ptr[1]);
  897. else
  898. version = ptr[1];
  899. if (version != VERSIONNO) {
  900. (void)fprintf(stderr,
  901. "%s: version mismatch (%d != %d) in `%s'\n",
  902. progname, version, VERSIONNO, dbname);
  903. goto error;
  904. }
  905. *nmagicp = (st.st_size / sizeof(struct magic)) - 1;
  906. (*magicp)++;
  907. if (needsbyteswap)
  908. byteswap(*magicp, *nmagicp);
  909. return 0;
  910. error:
  911. if (fd != -1)
  912. (void)close(fd);
  913. if (*magicp) {
  914. #ifdef QUICK
  915. (void)munmap(*magicp, (size_t)st.st_size);
  916. #else
  917. free(*magicp);
  918. #endif
  919. } else {
  920. *magicp = NULL;
  921. *nmagicp = 0;
  922. }
  923. return -1;
  924. }
  925. /*
  926. * handle an mmaped file.
  927. */
  928. static int
  929. apprentice_compile(magicp, nmagicp, fn, action)
  930. struct magic **magicp;
  931. uint32 *nmagicp;
  932. const char *fn;
  933. int action;
  934. {
  935. int fd;
  936. char *dbname = mkdbname(fn);
  937. static const uint32 ar[] = {
  938. MAGICNO, VERSIONNO
  939. };
  940. if (dbname == NULL)
  941. return -1;
  942. if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
  943. (void)fprintf(stderr, "%s: Cannot open `%s' (%s)\n",
  944. progname, dbname, strerror(errno));
  945. return -1;
  946. }
  947. if (write(fd, ar, sizeof(ar)) != sizeof(ar)) {
  948. (void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
  949. progname, dbname, strerror(errno));
  950. return -1;
  951. }
  952. if (lseek(fd, sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
  953. (void)fprintf(stderr, "%s: error seeking `%s' (%s)\n",
  954. progname, dbname, strerror(errno));
  955. return -1;
  956. }
  957. if (write(fd, *magicp, sizeof(struct magic) * *nmagicp)
  958. != sizeof(struct magic) * *nmagicp) {
  959. (void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
  960. progname, dbname, strerror(errno));
  961. return -1;
  962. }
  963. (void)close(fd);
  964. return 0;
  965. }
  966. /*
  967. * make a dbname
  968. */
  969. char *
  970. mkdbname(fn)
  971. const char *fn;
  972. {
  973. static const char ext[] = ".mgc";
  974. static char *buf = NULL;
  975. size_t len = strlen(fn) + sizeof(ext) + 1;
  976. if (buf == NULL)
  977. buf = malloc(len);
  978. else
  979. buf = realloc(buf, len);
  980. if (buf == NULL) {
  981. (void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
  982. strerror(errno));
  983. return NULL;
  984. }
  985. (void)strcpy(buf, fn);
  986. (void)strcat(buf, ext);
  987. return buf;
  988. }
  989. /*
  990. * Byteswap an mmap'ed file if needed
  991. */
  992. static void
  993. byteswap(magic, nmagic)
  994. struct magic *magic;
  995. uint32 nmagic;
  996. {
  997. uint32 i;
  998. for (i = 0; i < nmagic; i++)
  999. bs1(&magic[i]);
  1000. }
  1001. /*
  1002. * swap a short
  1003. */
  1004. static uint16
  1005. swap2(sv)
  1006. uint16 sv;
  1007. {
  1008. uint16 rv;
  1009. uint8 *s = (uint8 *) &sv;
  1010. uint8 *d = (uint8 *) &rv;
  1011. d[0] = s[1];
  1012. d[1] = s[0];
  1013. return rv;
  1014. }
  1015. /*
  1016. * swap an int
  1017. */
  1018. static uint32
  1019. swap4(sv)
  1020. uint32 sv;
  1021. {
  1022. uint32 rv;
  1023. uint8 *s = (uint8 *) &sv;
  1024. uint8 *d = (uint8 *) &rv;
  1025. d[0] = s[3];
  1026. d[1] = s[2];
  1027. d[2] = s[1];
  1028. d[3] = s[0];
  1029. return rv;
  1030. }
  1031. /*
  1032. * byteswap a single magic entry
  1033. */
  1034. static
  1035. void bs1(m)
  1036. struct magic *m;
  1037. {
  1038. m->cont_level = swap2(m->cont_level);
  1039. m->offset = swap4(m->offset);
  1040. m->in_offset = swap4(m->in_offset);
  1041. if (m->type != STRING)
  1042. m->value.l = swap4(m->value.l);
  1043. m->mask = swap4(m->mask);
  1044. }