cook.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. /**
  2. * \file cook.c
  3. *
  4. * This file contains the routines that deal with processing quoted strings
  5. * into an internal format.
  6. *
  7. * @addtogroup autoopts
  8. * @{
  9. */
  10. /*
  11. * This file is part of AutoOpts, a companion to AutoGen.
  12. * AutoOpts is free software.
  13. * AutoOpts is Copyright (C) 1992-2014 by Bruce Korb - all rights reserved
  14. *
  15. * AutoOpts is available under any one of two licenses. The license
  16. * in use must be one of these two and the choice is under the control
  17. * of the user of the license.
  18. *
  19. * The GNU Lesser General Public License, version 3 or later
  20. * See the files "COPYING.lgplv3" and "COPYING.gplv3"
  21. *
  22. * The Modified Berkeley Software Distribution License
  23. * See the file "COPYING.mbsd"
  24. *
  25. * These files have the following sha256 sums:
  26. *
  27. * 8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95 COPYING.gplv3
  28. * 4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b COPYING.lgplv3
  29. * 13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239 COPYING.mbsd
  30. */
  31. /* = = = START-STATIC-FORWARD = = = */
  32. static bool
  33. contiguous_quote(char ** pps, char * pq, int * lnct_p);
  34. /* = = = END-STATIC-FORWARD = = = */
  35. /*=export_func ao_string_cook_escape_char
  36. * private:
  37. *
  38. * what: escape-process a string fragment
  39. * arg: + char const* + pzScan + points to character after the escape +
  40. * arg: + char* + pRes + Where to put the result byte +
  41. * arg: + unsigned int + nl_ch + replacement char if scanned char is \n +
  42. *
  43. * ret-type: unsigned int
  44. * ret-desc: The number of bytes consumed processing the escaped character.
  45. *
  46. * doc:
  47. *
  48. * This function converts "t" into "\t" and all your other favorite
  49. * escapes, including numeric ones: hex and ocatal, too.
  50. * The returned result tells the caller how far to advance the
  51. * scan pointer (passed in). The default is to just pass through the
  52. * escaped character and advance the scan by one.
  53. *
  54. * Some applications need to keep an escaped newline, others need to
  55. * suppress it. This is accomplished by supplying a '\n' replacement
  56. * character that is different from \n, if need be. For example, use
  57. * 0x7F and never emit a 0x7F.
  58. *
  59. * err: @code{NULL} is returned if the string is mal-formed.
  60. =*/
  61. unsigned int
  62. ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
  63. {
  64. unsigned int res = 1;
  65. switch (*pRes = *pzIn++) {
  66. case NUL: /* NUL - end of input string */
  67. return 0;
  68. case '\r':
  69. if (*pzIn != NL)
  70. return 1;
  71. res++;
  72. /* FALLTHROUGH */
  73. case NL: /* NL - emit newline */
  74. *pRes = (char)nl;
  75. return res;
  76. case 'a': *pRes = '\a'; break;
  77. case 'b': *pRes = '\b'; break;
  78. case 'f': *pRes = '\f'; break;
  79. case 'n': *pRes = NL; break;
  80. case 'r': *pRes = '\r'; break;
  81. case 't': *pRes = '\t'; break;
  82. case 'v': *pRes = '\v'; break;
  83. case 'x':
  84. case 'X': /* HEX Escape */
  85. if (IS_HEX_DIGIT_CHAR(*pzIn)) {
  86. char z[4];
  87. unsigned int ct = 0;
  88. do {
  89. z[ct] = pzIn[ct];
  90. if (++ct >= 2)
  91. break;
  92. } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
  93. z[ct] = NUL;
  94. *pRes = (char)strtoul(z, NULL, 16);
  95. return ct + 1;
  96. }
  97. break;
  98. case '0': case '1': case '2': case '3':
  99. case '4': case '5': case '6': case '7':
  100. {
  101. /*
  102. * IF the character copied was an octal digit,
  103. * THEN set the output character to an octal value.
  104. * The 3 octal digit result might exceed 0xFF, so check it.
  105. */
  106. char z[4];
  107. unsigned long val;
  108. unsigned int ct = 0;
  109. z[ct++] = *--pzIn;
  110. while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
  111. z[ct] = pzIn[ct];
  112. if (++ct >= 3)
  113. break;
  114. }
  115. z[ct] = NUL;
  116. val = strtoul(z, NULL, 8);
  117. if (val > 0xFF)
  118. val = 0xFF;
  119. *pRes = (char)val;
  120. return ct;
  121. }
  122. default: /* quoted character is result character */;
  123. }
  124. return res;
  125. }
  126. /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  127. *
  128. * A quoted string has been found.
  129. * Find the end of it and compress any escape sequences.
  130. */
  131. static bool
  132. contiguous_quote(char ** pps, char * pq, int * lnct_p)
  133. {
  134. char * ps = *pps + 1;
  135. for (;;) {
  136. while (IS_WHITESPACE_CHAR(*ps))
  137. if (*(ps++) == NL)
  138. (*lnct_p)++;
  139. /*
  140. * IF the next character is a quote character,
  141. * THEN we will concatenate the strings.
  142. */
  143. switch (*ps) {
  144. case '"':
  145. case '\'':
  146. *pq = *(ps++); /* assign new quote character and return */
  147. *pps = ps;
  148. return true;
  149. case '/':
  150. /*
  151. * Allow for a comment embedded in the concatenated string.
  152. */
  153. switch (ps[1]) {
  154. default:
  155. *pps = NULL;
  156. return false;
  157. case '/':
  158. /*
  159. * Skip to end of line
  160. */
  161. ps = strchr(ps, NL);
  162. if (ps == NULL) {
  163. *pps = NULL;
  164. return false;
  165. }
  166. break;
  167. case '*':
  168. {
  169. char* p = strstr( ps+2, "*/" );
  170. /*
  171. * Skip to terminating star slash
  172. */
  173. if (p == NULL) {
  174. *pps = NULL;
  175. return false;
  176. }
  177. while (ps < p) {
  178. if (*(ps++) == NL)
  179. (*lnct_p)++;
  180. }
  181. ps = p + 2;
  182. }
  183. }
  184. continue;
  185. default:
  186. /*
  187. * The next non-whitespace character is not a quote.
  188. * The series of quoted strings has come to an end.
  189. */
  190. *pps = ps;
  191. return false;
  192. }
  193. }
  194. }
  195. /*=export_func ao_string_cook
  196. * private:
  197. *
  198. * what: concatenate and escape-process strings
  199. * arg: + char* + pzScan + The *MODIFIABLE* input buffer +
  200. * arg: + int* + lnct_p + The (possibly NULL) pointer to a line count +
  201. *
  202. * ret-type: char*
  203. * ret-desc: The address of the text following the processed strings.
  204. * The return value is NULL if the strings are ill-formed.
  205. *
  206. * doc:
  207. *
  208. * A series of one or more quoted strings are concatenated together.
  209. * If they are quoted with double quotes (@code{"}), then backslash
  210. * escapes are processed per the C programming language. If they are
  211. * single quote strings, then the backslashes are honored only when they
  212. * precede another backslash or a single quote character.
  213. *
  214. * err: @code{NULL} is returned if the string(s) is/are mal-formed.
  215. =*/
  216. char *
  217. ao_string_cook(char * pzScan, int * lnct_p)
  218. {
  219. int l = 0;
  220. char q = *pzScan;
  221. /*
  222. * It is a quoted string. Process the escape sequence characters
  223. * (in the set "abfnrtv") and make sure we find a closing quote.
  224. */
  225. char* pzD = pzScan++;
  226. char* pzS = pzScan;
  227. if (lnct_p == NULL)
  228. lnct_p = &l;
  229. for (;;) {
  230. /*
  231. * IF the next character is the quote character, THEN we may end the
  232. * string. We end it unless the next non-blank character *after* the
  233. * string happens to also be a quote. If it is, then we will change
  234. * our quote character to the new quote character and continue
  235. * condensing text.
  236. */
  237. while (*pzS == q) {
  238. *pzD = NUL; /* This is probably the end of the line */
  239. if (! contiguous_quote(&pzS, &q, lnct_p))
  240. return pzS;
  241. }
  242. /*
  243. * We are inside a quoted string. Copy text.
  244. */
  245. switch (*(pzD++) = *(pzS++)) {
  246. case NUL:
  247. return NULL;
  248. case NL:
  249. (*lnct_p)++;
  250. break;
  251. case '\\':
  252. /*
  253. * IF we are escaping a new line,
  254. * THEN drop both the escape and the newline from
  255. * the result string.
  256. */
  257. if (*pzS == NL) {
  258. pzS++;
  259. pzD--;
  260. (*lnct_p)++;
  261. }
  262. /*
  263. * ELSE IF the quote character is '"' or '`',
  264. * THEN we do the full escape character processing
  265. */
  266. else if (q != '\'') {
  267. unsigned int ct;
  268. ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
  269. if (ct == 0)
  270. return NULL;
  271. pzS += ct;
  272. } /* if (q != '\'') */
  273. /*
  274. * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
  275. * The latter only to easily hide preprocessing directives.
  276. */
  277. else switch (*pzS) {
  278. case '\\':
  279. case '\'':
  280. case '#':
  281. pzD[-1] = *pzS++;
  282. }
  283. } /* switch (*(pzD++) = *(pzS++)) */
  284. } /* for (;;) */
  285. }
  286. /** @}
  287. *
  288. * Local Variables:
  289. * mode: C
  290. * c-file-style: "stroustrup"
  291. * indent-tabs-mode: nil
  292. * End:
  293. * end of autoopts/cook.c */