man2html.c 10 KB


  1. /* Copyright (c) 1995 DJ Delorie, 334 North Road, Deerfield NH USA
  2. Distributed under the terms of the GNU GPL, version 2 or later.
  3. Note: The HTML output of this program is NOT considered a derived
  4. work of this program. */
  5. /*
  6. Original work by dj@delorie.com
  7. Usage: man2html < program.man > program.html
  8. */
  9. #include <stdio.h>
  10. #include <ctype.h>
  11. #include <string.h>
  12. int in_tt = 0;
  13. int next_line_dd = 0;
  14. int need_undl = 0;
  15. int got_regular_line = 0;
  16. int need_re = 0;
  17. int fill_mode = 1;
  18. int current_BI = 0;
  19. int skip_nl = 0;
  20. int process_line(void);
  21. char *
  22. get_token(char *inp, char *buf)
  23. {
  24. int quoted = 0;
  25. /* skip whitespace */
  26. while (*inp && isspace(*inp))
  27. inp++;
  28. if (*inp == 0)
  29. return 0;
  30. while (*inp)
  31. {
  32. switch (*inp)
  33. {
  34. case '"':
  35. quoted = 1-quoted;
  36. break;
  37. case '\\':
  38. *buf++ = *inp;
  39. *buf++ = *++inp;
  40. break;
  41. default:
  42. if (isspace(*inp) && !quoted)
  43. {
  44. *buf = 0;
  45. return inp;
  46. }
  47. *buf++ = *inp;
  48. break;
  49. }
  50. inp++;
  51. }
  52. *buf = 0;
  53. return inp;
  54. }
  55. void
  56. clean(char *cp)
  57. {
  58. char foo[1000];
  59. char *rp = foo;
  60. char *ocp = cp;
  61. if (strncmp(cp, ".if t ", 6) == 0)
  62. cp += 6;
  63. while (*cp)
  64. {
  65. switch (*cp)
  66. {
  67. case '\\':
  68. cp++;
  69. switch (*cp)
  70. {
  71. case 'E':
  72. case 'F':
  73. case 'g':
  74. case 'b':
  75. case 'r':
  76. case 'B':
  77. *rp++ = '\\';
  78. *rp++ = *cp++;
  79. break;
  80. case '/':
  81. case '-':
  82. case '\\':
  83. case '+':
  84. case '.':
  85. case 10:
  86. case 0:
  87. case ' ':
  88. case '=':
  89. case '\'':
  90. case '`':
  91. case '[':
  92. case ']':
  93. case ':':
  94. case '}':
  95. case '{':
  96. *rp++ = *cp++;
  97. break;
  98. case '|':
  99. case '^':
  100. case '"':
  101. case 'd':
  102. case 'u':
  103. case 'n':
  104. case '&':
  105. case 'w':
  106. case '%':
  107. case 'v':
  108. case 'k':
  109. cp++;
  110. break;
  111. case 't':
  112. *rp++ = ' ';
  113. cp++;
  114. break;
  115. case '0':
  116. *rp++ = ' ';
  117. cp++;
  118. break;
  119. case 'c':
  120. if (cp[1] == '\n')
  121. {
  122. skip_nl = 1;
  123. cp++;
  124. }
  125. cp++;
  126. break;
  127. case 'e':
  128. *rp++ = '\\';
  129. cp++;
  130. break;
  131. case 's':
  132. cp++;
  133. cp++;
  134. while (isdigit(*cp))
  135. cp++;
  136. break;
  137. case 'f':
  138. if (current_BI)
  139. {
  140. *rp++ = '<';
  141. *rp++ = '/';
  142. *rp++ = current_BI;
  143. *rp++ = '>';
  144. current_BI = 0;
  145. }
  146. if (in_tt)
  147. {
  148. strcpy(rp, "</tt>");
  149. rp += 5;
  150. in_tt = 0;
  151. }
  152. switch (*++cp)
  153. {
  154. case '(':
  155. if (cp[1] == 'C' && cp[2] == 'W')
  156. {
  157. strcpy(rp, "<tt>");
  158. rp += 4;
  159. in_tt = 1;
  160. cp += 2;
  161. }
  162. else
  163. fprintf(stderr, "unknown font %.3s\n", cp);
  164. break;
  165. case 'B':
  166. current_BI = 'b';
  167. *rp++ = '<';
  168. *rp++ = 'b';
  169. *rp++ = '>';
  170. break;
  171. case 'R':
  172. case 'P':
  173. break;
  174. case 'I':
  175. current_BI = 'i';
  176. *rp++ = '<';
  177. *rp++ = 'i';
  178. *rp++ = '>';
  179. break;
  180. }
  181. cp++;
  182. break;
  183. case '*':
  184. cp++;
  185. if (cp[0] == '(')
  186. {
  187. cp++;
  188. if (cp[0] == 'l' && cp[1] == 'q')
  189. *rp++ = '`';
  190. else if (cp[0] == 'r' && cp[1] == 'q')
  191. *rp++ = '\'';
  192. else
  193. {
  194. sprintf(rp, "[%.2s]", cp);
  195. rp += 4;
  196. }
  197. cp += 2;
  198. }
  199. else if (cp[0] == 'r')
  200. {
  201. cp++;
  202. strcpy(rp, "RCS");
  203. rp += 3;
  204. }
  205. else
  206. {
  207. sprintf(rp, "[%c]", *cp);
  208. rp += 3;
  209. }
  210. break;
  211. case '(':
  212. if (cp[1] == 'c' && cp[2] == 'o')
  213. *rp++ = 0xa9;
  214. else if (cp[1] == 'b' && cp[2] == 'v')
  215. *rp++ = '|';
  216. else if (cp[1] == 'e' && cp[2] == 'm')
  217. *rp++ = ' ';
  218. else if (cp[1] == '+' && cp[2] == '-')
  219. *rp++ = 0xb1;
  220. else if (cp[1] == 't' && cp[2] == 'i')
  221. *rp++ = '~';
  222. else if (cp[1] == 't' && cp[2] == 's')
  223. *rp++ = '"';
  224. else if (cp[1] == 'p' && cp[2] == 'l')
  225. *rp++ = '+';
  226. else if (cp[1] == 'm' && cp[2] == 'i')
  227. *rp++ = '-';
  228. else if (cp[1] == 'f' && cp[2] == 'm')
  229. *rp++ = '\'';
  230. else if (cp[1] == 'm' && cp[2] == 'u')
  231. *rp++ = 'x';
  232. else if (cp[1] == 'b' && cp[2] == 'u')
  233. {
  234. strcpy(rp, "<li>");
  235. rp += 4;
  236. }
  237. else if (cp[1] == '>' && cp[2] == '=')
  238. {
  239. *rp++ = '>';
  240. *rp++ = '=';
  241. }
  242. else if (cp[1] == '*' && cp[2] == '*')
  243. {
  244. *rp++ = '*';
  245. *rp++ = '*';
  246. }
  247. else
  248. fprintf(stderr, "unknown meta-character (%c%c\n", cp[1], cp[2]);
  249. cp += 3;
  250. break;
  251. default:
  252. fprintf(stderr, "unknown escape \\%c (%d)\n", *cp, *cp);
  253. break;
  254. }
  255. break;
  256. case '&':
  257. *rp++ = '&';
  258. *rp++ = 'a';
  259. *rp++ = 'm';
  260. *rp++ = 'p';
  261. *rp++ = ';';
  262. cp++;
  263. break;
  264. case '<':
  265. *rp++ = '&';
  266. *rp++ = 'l';
  267. *rp++ = 't';
  268. *rp++ = ';';
  269. cp++;
  270. break;
  271. case '>':
  272. *rp++ = '&';
  273. *rp++ = 'g';
  274. *rp++ = 't';
  275. *rp++ = ';';
  276. cp++;
  277. break;
  278. default:
  279. *rp++ = *cp++;
  280. break;
  281. }
  282. }
  283. *rp = 0;
  284. strcpy(ocp, foo);
  285. }
  286. void
  287. un_bi(void)
  288. {
  289. if (current_BI)
  290. {
  291. printf("</%c>", current_BI);
  292. current_BI = 0;
  293. }
  294. }
  295. void
  296. process_line_til_regular(void)
  297. {
  298. got_regular_line = 0;
  299. while (!got_regular_line)
  300. process_line();
  301. }
  302. void
  303. bol(void)
  304. {
  305. got_regular_line = 1;
  306. if (next_line_dd)
  307. printf("<dd>");
  308. next_line_dd = 0;
  309. }
  310. void
  311. eol(void)
  312. {
  313. if (!fill_mode)
  314. printf("<br>");
  315. }
  316. void
  317. twoggle(char *a, char *b, char *l)
  318. {
  319. int first = 1;
  320. char *c;
  321. char buf[1000];
  322. bol();
  323. while ((l = get_token(l, buf)))
  324. {
  325. clean(buf);
  326. c = first ? a : b;
  327. if (c)
  328. printf("<%s>%s</%s>", c, buf, c);
  329. else
  330. printf("%s", buf);
  331. if (a && b && strcmp(a, b) == 0)
  332. putchar(' ');
  333. first = 1-first;
  334. }
  335. un_bi();
  336. if (!skip_nl)
  337. printf("\n");
  338. eol();
  339. got_regular_line = 1;
  340. }
  341. int
  342. process_line(void)
  343. {
  344. char buf[1000], cmd[10];
  345. char token[1000];
  346. if (fgets(buf, 1000, stdin) == 0)
  347. return 0;
  348. skip_nl = 0;
  349. if (buf[0] != '.')
  350. {
  351. if (strncmp(buf, "'\\\"", 3) == 0)
  352. return 1;
  353. clean(buf);
  354. bol();
  355. fputs(buf, stdout);
  356. if (buf[0] == 0 || buf[0] == '\n')
  357. printf("<p>");
  358. eol();
  359. return 1;
  360. }
  361. if (sscanf(buf, "%s %[^\n]", cmd, buf) == 1)
  362. buf[0] = 0;
  363. if (strcmp(cmd, "..") == 0)
  364. {
  365. }
  366. else if (strcmp(cmd, ".B") == 0)
  367. {
  368. if (buf[0])
  369. {
  370. twoggle("b", "b", buf);
  371. }
  372. else
  373. {
  374. printf("<b>");
  375. process_line_til_regular();
  376. printf("</b>");
  377. }
  378. }
  379. else if (strcmp(cmd, ".I") == 0)
  380. {
  381. if (buf[0])
  382. {
  383. twoggle("i", "i", buf);
  384. }
  385. else
  386. {
  387. printf("<i>");
  388. process_line_til_regular();
  389. printf("</i>");
  390. }
  391. }
  392. else if (strcmp(cmd, ".BI") == 0)
  393. {
  394. twoggle("b", "i", buf);
  395. }
  396. else if (strcmp(cmd, ".IB") == 0)
  397. {
  398. twoggle("i", "b", buf);
  399. }
  400. else if (strcmp(cmd, ".BR") == 0)
  401. {
  402. twoggle("b", 0, buf);
  403. }
  404. else if (strcmp(cmd, ".RB") == 0)
  405. {
  406. twoggle(0, "b", buf);
  407. }
  408. else if (strcmp(cmd, ".IR") == 0)
  409. {
  410. twoggle("i", 0, buf);
  411. }
  412. else if (strcmp(cmd, ".RI") == 0)
  413. {
  414. twoggle(0, "i", buf);
  415. }
  416. else if (strcmp(cmd, ".nf") == 0)
  417. {
  418. if (fill_mode)
  419. printf("<pre>\n");
  420. fill_mode = 0;
  421. }
  422. else if (strcmp(cmd, ".fi") == 0)
  423. {
  424. if (!fill_mode)
  425. printf("</pre>\n");
  426. fill_mode = 1;
  427. }
  428. else if (strcmp(cmd, ".br") == 0
  429. || strcmp(cmd, ".Sp") == 0
  430. || strcmp(cmd, ".ti") == 0)
  431. {
  432. if (need_undl)
  433. {
  434. need_undl = 0;
  435. printf("</dl>");
  436. }
  437. printf("<br>\n");
  438. }
  439. else if (strcmp(cmd, ".LP") == 0
  440. || strcmp(cmd, ".PP") == 0
  441. || strcmp(cmd, ".sp") == 0
  442. || strcmp(cmd, ".P") == 0)
  443. {
  444. if (need_undl)
  445. {
  446. need_undl = 0;
  447. printf("</dl>");
  448. }
  449. printf("\n<p>\n");
  450. }
  451. else if (strcmp(cmd, ".RS") == 0)
  452. {
  453. printf("<ul>");
  454. need_re ++;
  455. }
  456. else if (strcmp(cmd, ".RE") == 0)
  457. {
  458. if (need_re)
  459. {
  460. printf("</ul>");
  461. need_re --;
  462. }
  463. }
  464. else if (strcmp(cmd, ".SH") == 0
  465. || strcmp(cmd, ".SS") == 0)
  466. {
  467. char *cp = buf;
  468. int got_token = 0;
  469. while (need_re)
  470. {
  471. printf("</ul>");
  472. need_re--;
  473. }
  474. if (need_undl)
  475. {
  476. printf("</dl>");
  477. need_undl = 0;
  478. }
  479. printf("\n</ul><H2>");
  480. while ((cp = get_token(cp, token)))
  481. {
  482. got_token = 1;
  483. clean(token);
  484. printf("%s ", token);
  485. }
  486. if (!got_token)
  487. {
  488. if (fgets(buf, 1000, stdin) == 0)
  489. return 0;
  490. printf("%s", buf);
  491. }
  492. printf("</H2><ul>\n\n");
  493. un_bi();
  494. got_regular_line = 1;
  495. if (!fill_mode)
  496. printf("</pre>");
  497. fill_mode = 1;
  498. }
  499. else if (strcmp(cmd, ".SM") == 0)
  500. {
  501. if (buf[0])
  502. {
  503. bol();
  504. clean(buf);
  505. printf("<code>%s</code>\n", buf);
  506. eol();
  507. }
  508. else
  509. {
  510. printf("<code>");
  511. process_line_til_regular();
  512. printf("</code>");
  513. }
  514. }
  515. else if (strcmp(cmd, ".TH") == 0)
  516. {
  517. int all_upper = 1, i;
  518. get_token(buf, buf);
  519. for (i=0; buf[i]; i++)
  520. if (islower(buf[i]))
  521. all_upper = 0;
  522. if (all_upper)
  523. for (i=0; buf[i]; i++)
  524. if (isupper(buf[i]))
  525. buf[i] = tolower(buf[i]);
  526. printf("<!--#exec cmd=\"header %s\" -->\n", buf);
  527. printf("<ul>");
  528. }
  529. else if (strcmp(cmd, ".TP") == 0
  530. || strcmp(cmd, ".Tp") == 0)
  531. {
  532. if (!need_undl)
  533. {
  534. printf("<p><dl compact>");
  535. need_undl = 1;
  536. }
  537. printf("<dt>");
  538. next_line_dd = 0;
  539. process_line_til_regular();
  540. next_line_dd = 1;
  541. }
  542. else if (strcmp(cmd, ".IP") == 0)
  543. {
  544. if (!need_undl)
  545. {
  546. printf("<p><dl compact>");
  547. need_undl = 1;
  548. }
  549. get_token(buf, buf);
  550. clean(buf);
  551. printf("<dt>%s", buf);
  552. next_line_dd = 1;
  553. }
  554. else if (strcmp(cmd, ".TQ") == 0)
  555. {
  556. printf("<dt>");
  557. next_line_dd = 0;
  558. process_line_til_regular();
  559. next_line_dd = 1;
  560. }
  561. else if (strcmp(cmd, ".FN") == 0)
  562. {
  563. bol();
  564. get_token(buf, buf);
  565. printf("<code>%s</code>\n", buf);
  566. got_regular_line = 1;
  567. eol();
  568. }
  569. /* Tcl macros */
  570. else if (strcmp(cmd, ".AP") == 0)
  571. {
  572. char *cp = buf;
  573. cp = get_token(cp, token);
  574. printf("<p>%s", token);
  575. cp = get_token(cp, token);
  576. printf(" <b>%s</b>", token);
  577. cp = get_token(cp, token);
  578. printf(" (<i>%s</i>) -\n", token);
  579. }
  580. else if (strcmp(cmd, ".DS") == 0)
  581. {
  582. printf("<pre>\n");
  583. }
  584. else if (strcmp(cmd, ".DE") == 0)
  585. {
  586. printf("</pre>\n");
  587. }
  588. /* end of Tcl macros */
  589. else if (strcmp(cmd, ".\"") == 0)
  590. {
  591. }
  592. else if (strcmp(cmd, ".de") == 0)
  593. {
  594. do {
  595. if (fgets(buf, 1000, stdin) == 0)
  596. return 0;
  597. } while (buf[0] != '.' || buf[1] != '.');
  598. }
  599. return 1;
  600. }
  601. int
  602. main()
  603. {
  604. while (process_line());
  605. printf("</ul>\n<!--#exec cmd=\"trailer\" -->\n");
  606. return 0;
  607. }