cook.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. /**
  2. * \file cook.c
  3. *
  4. * This file contains the routines that deal with processing quoted strings
  5. * into an internal format.
  6. *
  7. * @addtogroup autoopts
  8. * @{
  9. */
  10. /*
  11. * This file is part of AutoOpts, a companion to AutoGen.
  12. * AutoOpts is free software.
  13. * AutoOpts is Copyright (C) 1992-2016 by Bruce Korb - all rights reserved
  14. *
  15. * AutoOpts is available under any one of two licenses. The license
  16. * in use must be one of these two and the choice is under the control
  17. * of the user of the license.
  18. *
  19. * The GNU Lesser General Public License, version 3 or later
  20. * See the files "COPYING.lgplv3" and "COPYING.gplv3"
  21. *
  22. * The Modified Berkeley Software Distribution License
  23. * See the file "COPYING.mbsd"
  24. *
  25. * These files have the following sha256 sums:
  26. *
  27. * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
  28. * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
  29. * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
  30. */
  31. /* = = = START-STATIC-FORWARD = = = */
  32. static char *
  33. nl_count(char * start, char * end, int * lnct_p);
  34. static bool
  35. contiguous_quote(char ** pps, char * pq, int * lnct_p);
  36. /* = = = END-STATIC-FORWARD = = = */
  37. /*=export_func ao_string_cook_escape_char
  38. * private:
  39. *
  40. * what: escape-process a string fragment
  41. * arg: + char const * + pzScan + points to character after the escape +
  42. * arg: + char * + pRes + Where to put the result byte +
  43. * arg: + unsigned int + nl_ch + replacement char if scanned char is \n +
  44. *
  45. * ret-type: unsigned int
  46. * ret-desc: The number of bytes consumed processing the escaped character.
  47. *
  48. * doc:
  49. *
  50. * This function converts "t" into "\t" and all your other favorite
  51. * escapes, including numeric ones: hex and ocatal, too.
  52. * The returned result tells the caller how far to advance the
  53. * scan pointer (passed in). The default is to just pass through the
  54. * escaped character and advance the scan by one.
  55. *
  56. * Some applications need to keep an escaped newline, others need to
  57. * suppress it. This is accomplished by supplying a '\n' replacement
  58. * character that is different from \n, if need be. For example, use
  59. * 0x7F and never emit a 0x7F.
  60. *
  61. * err: @code{NULL} is returned if the string is mal-formed.
  62. =*/
  63. unsigned int
  64. ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
  65. {
  66. unsigned int res = 1;
  67. switch (*pRes = *pzIn++) {
  68. case NUL: /* NUL - end of input string */
  69. return 0;
  70. case '\r':
  71. if (*pzIn != NL)
  72. return 1;
  73. res++;
  74. /* FALLTHROUGH */
  75. case NL: /* NL - emit newline */
  76. *pRes = (char)nl;
  77. return res;
  78. case 'a': *pRes = '\a'; break;
  79. case 'b': *pRes = '\b'; break;
  80. case 'f': *pRes = '\f'; break;
  81. case 'n': *pRes = NL; break;
  82. case 'r': *pRes = '\r'; break;
  83. case 't': *pRes = '\t'; break;
  84. case 'v': *pRes = '\v'; break;
  85. case 'x':
  86. case 'X': /* HEX Escape */
  87. if (IS_HEX_DIGIT_CHAR(*pzIn)) {
  88. char z[4];
  89. unsigned int ct = 0;
  90. do {
  91. z[ct] = pzIn[ct];
  92. if (++ct >= 2)
  93. break;
  94. } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
  95. z[ct] = NUL;
  96. *pRes = (char)strtoul(z, NULL, 16);
  97. return ct + 1;
  98. }
  99. break;
  100. case '0': case '1': case '2': case '3':
  101. case '4': case '5': case '6': case '7':
  102. {
  103. /*
  104. * IF the character copied was an octal digit,
  105. * THEN set the output character to an octal value.
  106. * The 3 octal digit result might exceed 0xFF, so check it.
  107. */
  108. char z[4];
  109. unsigned long val;
  110. unsigned int ct = 0;
  111. z[ct++] = *--pzIn;
  112. while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
  113. z[ct] = pzIn[ct];
  114. if (++ct >= 3)
  115. break;
  116. }
  117. z[ct] = NUL;
  118. val = strtoul(z, NULL, 8);
  119. if (val > 0xFF)
  120. val = 0xFF;
  121. *pRes = (char)val;
  122. return ct;
  123. }
  124. default: /* quoted character is result character */;
  125. }
  126. return res;
  127. }
  128. /**
  129. * count newlines between start and end
  130. */
  131. static char *
  132. nl_count(char * start, char * end, int * lnct_p)
  133. {
  134. while (start < end) {
  135. if (*(start++) == NL)
  136. (*lnct_p)++;
  137. }
  138. return end;
  139. }
  140. /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  141. *
  142. * A quoted string has been found.
  143. * Find the end of it and compress any escape sequences.
  144. */
  145. static bool
  146. contiguous_quote(char ** pps, char * pq, int * lnct_p)
  147. {
  148. char * ps = *pps + 1;
  149. for (;;) {
  150. while (IS_WHITESPACE_CHAR(*ps))
  151. if (*(ps++) == NL)
  152. (*lnct_p)++;
  153. /*
  154. * IF the next character is a quote character,
  155. * THEN we will concatenate the strings.
  156. */
  157. switch (*ps) {
  158. case '"':
  159. case '\'':
  160. *pq = *(ps++); /* assign new quote character and return */
  161. *pps = ps;
  162. return true;
  163. case '/':
  164. /*
  165. * Allow for a comment embedded in the concatenated string.
  166. */
  167. switch (ps[1]) {
  168. default:
  169. goto fail_return;
  170. case '/':
  171. /*
  172. * Skip to end of line
  173. */
  174. ps = strchr(ps, NL);
  175. if (ps == NULL)
  176. goto fail_return;
  177. break;
  178. case '*':
  179. ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p);
  180. if (ps == NULL)
  181. goto fail_return;
  182. ps += 2;
  183. }
  184. continue;
  185. default:
  186. /*
  187. * The next non-whitespace character is not a quote.
  188. * The series of quoted strings has come to an end.
  189. */
  190. *pps = ps;
  191. return false;
  192. }
  193. }
  194. fail_return:
  195. *pps = NULL;
  196. return false;
  197. }
  198. /*=export_func ao_string_cook
  199. * private:
  200. *
  201. * what: concatenate and escape-process strings
  202. * arg: + char * + pzScan + The *MODIFIABLE* input buffer +
  203. * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count +
  204. *
  205. * ret-type: char *
  206. * ret-desc: The address of the text following the processed strings.
  207. * The return value is NULL if the strings are ill-formed.
  208. *
  209. * doc:
  210. *
  211. * A series of one or more quoted strings are concatenated together.
  212. * If they are quoted with double quotes (@code{"}), then backslash
  213. * escapes are processed per the C programming language. If they are
  214. * single quote strings, then the backslashes are honored only when they
  215. * precede another backslash or a single quote character.
  216. *
  217. * err: @code{NULL} is returned if the string(s) is/are mal-formed.
  218. =*/
  219. char *
  220. ao_string_cook(char * pzScan, int * lnct_p)
  221. {
  222. int l = 0;
  223. char q = *pzScan;
  224. /*
  225. * It is a quoted string. Process the escape sequence characters
  226. * (in the set "abfnrtv") and make sure we find a closing quote.
  227. */
  228. char * pzD = pzScan++;
  229. char * pzS = pzScan;
  230. if (lnct_p == NULL)
  231. lnct_p = &l;
  232. for (;;) {
  233. /*
  234. * IF the next character is the quote character, THEN we may end the
  235. * string. We end it unless the next non-blank character *after* the
  236. * string happens to also be a quote. If it is, then we will change
  237. * our quote character to the new quote character and continue
  238. * condensing text.
  239. */
  240. while (*pzS == q) {
  241. *pzD = NUL; /* This is probably the end of the line */
  242. if (! contiguous_quote(&pzS, &q, lnct_p))
  243. return pzS;
  244. }
  245. /*
  246. * We are inside a quoted string. Copy text.
  247. */
  248. switch (*(pzD++) = *(pzS++)) {
  249. case NUL:
  250. return NULL;
  251. case NL:
  252. (*lnct_p)++;
  253. break;
  254. case '\\':
  255. /*
  256. * IF we are escaping a new line,
  257. * THEN drop both the escape and the newline from
  258. * the result string.
  259. */
  260. if (*pzS == NL) {
  261. pzS++;
  262. pzD--;
  263. (*lnct_p)++;
  264. }
  265. /*
  266. * ELSE IF the quote character is '"' or '`',
  267. * THEN we do the full escape character processing
  268. */
  269. else if (q != '\'') {
  270. unsigned int ct;
  271. ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
  272. if (ct == 0)
  273. return NULL;
  274. pzS += ct;
  275. } /* if (q != '\'') */
  276. /*
  277. * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
  278. * The latter only to easily hide preprocessing directives.
  279. */
  280. else switch (*pzS) {
  281. case '\\':
  282. case '\'':
  283. case '#':
  284. pzD[-1] = *pzS++;
  285. }
  286. } /* switch (*(pzD++) = *(pzS++)) */
  287. } /* for (;;) */
  288. }
  289. /** @}
  290. *
  291. * Local Variables:
  292. * mode: C
  293. * c-file-style: "stroustrup"
  294. * indent-tabs-mode: nil
  295. * End:
  296. * end of autoopts/cook.c */