cook.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. /**
  2. * \file cook.c
  3. *
  4. * This file contains the routines that deal with processing quoted strings
  5. * into an internal format.
  6. *
  7. * @addtogroup autoopts
  8. * @{
  9. */
  10. /*
  11. * This file is part of AutoOpts, a companion to AutoGen.
  12. * AutoOpts is free software.
  13. * AutoOpts is Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
  14. *
  15. * AutoOpts is available under any one of two licenses. The license
  16. * in use must be one of these two and the choice is under the control
  17. * of the user of the license.
  18. *
  19. * The GNU Lesser General Public License, version 3 or later
  20. * See the files "COPYING.lgplv3" and "COPYING.gplv3"
  21. *
  22. * The Modified Berkeley Software Distribution License
  23. * See the file "COPYING.mbsd"
  24. *
  25. * These files have the following sha256 sums:
  26. *
  27. * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
  28. * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
  29. * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
  30. */
  31. /*=export_func ao_string_cook_escape_char
  32. * private:
  33. *
  34. * what: escape-process a string fragment
  35. * arg: + char const * + pzScan + points to character after the escape +
  36. * arg: + char * + pRes + Where to put the result byte +
  37. * arg: + unsigned int + nl_ch + replacement char if scanned char is \n +
  38. *
  39. * ret-type: unsigned int
  40. * ret-desc: The number of bytes consumed processing the escaped character.
  41. *
  42. * doc:
  43. *
  44. * This function converts "t" into "\t" and all your other favorite
  45. * escapes, including numeric ones: hex and ocatal, too.
  46. * The returned result tells the caller how far to advance the
  47. * scan pointer (passed in). The default is to just pass through the
  48. * escaped character and advance the scan by one.
  49. *
  50. * Some applications need to keep an escaped newline, others need to
  51. * suppress it. This is accomplished by supplying a '\n' replacement
  52. * character that is different from \n, if need be. For example, use
  53. * 0x7F and never emit a 0x7F.
  54. *
  55. * err: @code{NULL} is returned if the string is mal-formed.
  56. =*/
  57. unsigned int
  58. ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
  59. {
  60. unsigned int res = 1;
  61. switch (*pRes = *pzIn++) {
  62. case NUL: /* NUL - end of input string */
  63. return 0;
  64. case '\r':
  65. if (*pzIn != NL)
  66. return 1;
  67. res++;
  68. /* FALLTHROUGH */
  69. case NL: /* NL - emit newline */
  70. *pRes = (char)nl;
  71. return res;
  72. case 'a': *pRes = '\a'; break;
  73. case 'b': *pRes = '\b'; break;
  74. case 'f': *pRes = '\f'; break;
  75. case 'n': *pRes = NL; break;
  76. case 'r': *pRes = '\r'; break;
  77. case 't': *pRes = '\t'; break;
  78. case 'v': *pRes = '\v'; break;
  79. case 'x':
  80. case 'X': /* HEX Escape */
  81. if (IS_HEX_DIGIT_CHAR(*pzIn)) {
  82. char z[4];
  83. unsigned int ct = 0;
  84. do {
  85. z[ct] = pzIn[ct];
  86. if (++ct >= 2)
  87. break;
  88. } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
  89. z[ct] = NUL;
  90. *pRes = (char)strtoul(z, NULL, 16);
  91. return ct + 1;
  92. }
  93. break;
  94. case '0': case '1': case '2': case '3':
  95. case '4': case '5': case '6': case '7':
  96. {
  97. /*
  98. * IF the character copied was an octal digit,
  99. * THEN set the output character to an octal value.
  100. * The 3 octal digit result might exceed 0xFF, so check it.
  101. */
  102. char z[4];
  103. unsigned long val;
  104. unsigned int ct = 0;
  105. z[ct++] = *--pzIn;
  106. while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
  107. z[ct] = pzIn[ct];
  108. if (++ct >= 3)
  109. break;
  110. }
  111. z[ct] = NUL;
  112. val = strtoul(z, NULL, 8);
  113. if (val > 0xFF)
  114. val = 0xFF;
  115. *pRes = (char)val;
  116. return ct;
  117. }
  118. default: /* quoted character is result character */;
  119. }
  120. return res;
  121. }
  122. /**
  123. * count newlines between start and end
  124. */
  125. static char *
  126. nl_count(char * start, char * end, int * lnct_p)
  127. {
  128. while (start < end) {
  129. if (*(start++) == NL)
  130. (*lnct_p)++;
  131. }
  132. return end;
  133. }
  134. /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  135. *
  136. * A quoted string has been found.
  137. * Find the end of it and compress any escape sequences.
  138. */
  139. static bool
  140. contiguous_quote(char ** pps, char * pq, int * lnct_p)
  141. {
  142. char * ps = *pps + 1;
  143. for (;;) {
  144. while (IS_WHITESPACE_CHAR(*ps))
  145. if (*(ps++) == NL)
  146. (*lnct_p)++;
  147. /*
  148. * IF the next character is a quote character,
  149. * THEN we will concatenate the strings.
  150. */
  151. switch (*ps) {
  152. case '"':
  153. case '\'':
  154. *pq = *(ps++); /* assign new quote character and return */
  155. *pps = ps;
  156. return true;
  157. case '/':
  158. /*
  159. * Allow for a comment embedded in the concatenated string.
  160. */
  161. switch (ps[1]) {
  162. default:
  163. goto fail_return;
  164. case '/':
  165. /*
  166. * Skip to end of line
  167. */
  168. ps = strchr(ps, NL);
  169. if (ps == NULL)
  170. goto fail_return;
  171. break;
  172. case '*':
  173. ps = nl_count(ps + 2, strstr(ps + 2, "*/"), lnct_p);
  174. if (ps == NULL)
  175. goto fail_return;
  176. ps += 2;
  177. }
  178. continue;
  179. default:
  180. /*
  181. * The next non-whitespace character is not a quote.
  182. * The series of quoted strings has come to an end.
  183. */
  184. *pps = ps;
  185. return false;
  186. }
  187. }
  188. fail_return:
  189. *pps = NULL;
  190. return false;
  191. }
  192. /*=export_func ao_string_cook
  193. * private:
  194. *
  195. * what: concatenate and escape-process strings
  196. * arg: + char * + pzScan + The *MODIFIABLE* input buffer +
  197. * arg: + int * + lnct_p + The (possibly NULL) pointer to a line count +
  198. *
  199. * ret-type: char *
  200. * ret-desc: The address of the text following the processed strings.
  201. * The return value is NULL if the strings are ill-formed.
  202. *
  203. * doc:
  204. *
  205. * A series of one or more quoted strings are concatenated together.
  206. * If they are quoted with double quotes (@code{"}), then backslash
  207. * escapes are processed per the C programming language. If they are
  208. * single quote strings, then the backslashes are honored only when they
  209. * precede another backslash or a single quote character.
  210. *
  211. * err: @code{NULL} is returned if the string(s) is/are mal-formed.
  212. =*/
  213. char *
  214. ao_string_cook(char * pzScan, int * lnct_p)
  215. {
  216. int l = 0;
  217. char q = *pzScan;
  218. /*
  219. * It is a quoted string. Process the escape sequence characters
  220. * (in the set "abfnrtv") and make sure we find a closing quote.
  221. */
  222. char * pzD = pzScan++;
  223. char * pzS = pzScan;
  224. if (lnct_p == NULL)
  225. lnct_p = &l;
  226. for (;;) {
  227. /*
  228. * IF the next character is the quote character, THEN we may end the
  229. * string. We end it unless the next non-blank character *after* the
  230. * string happens to also be a quote. If it is, then we will change
  231. * our quote character to the new quote character and continue
  232. * condensing text.
  233. */
  234. while (*pzS == q) {
  235. *pzD = NUL; /* This is probably the end of the line */
  236. if (! contiguous_quote(&pzS, &q, lnct_p))
  237. return pzS;
  238. }
  239. /*
  240. * We are inside a quoted string. Copy text.
  241. */
  242. switch (*(pzD++) = *(pzS++)) {
  243. case NUL:
  244. return NULL;
  245. case NL:
  246. (*lnct_p)++;
  247. break;
  248. case '\\':
  249. /*
  250. * IF we are escaping a new line,
  251. * THEN drop both the escape and the newline from
  252. * the result string.
  253. */
  254. if (*pzS == NL) {
  255. pzS++;
  256. pzD--;
  257. (*lnct_p)++;
  258. }
  259. /*
  260. * ELSE IF the quote character is '"' or '`',
  261. * THEN we do the full escape character processing
  262. */
  263. else if (q != '\'') {
  264. unsigned int ct;
  265. ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
  266. if (ct == 0)
  267. return NULL;
  268. pzS += ct;
  269. } /* if (q != '\'') */
  270. /*
  271. * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
  272. * The latter only to easily hide preprocessing directives.
  273. */
  274. else switch (*pzS) {
  275. case '\\':
  276. case '\'':
  277. case '#':
  278. pzD[-1] = *pzS++;
  279. }
  280. } /* switch (*(pzD++) = *(pzS++)) */
  281. } /* for (;;) */
  282. }
  283. /** @}
  284. *
  285. * Local Variables:
  286. * mode: C
  287. * c-file-style: "stroustrup"
  288. * indent-tabs-mode: nil
  289. * End:
  290. * end of autoopts/cook.c */