cook.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. /*
  2. * $Id: cook.c,v 4.3 2006/03/25 19:24:56 bkorb Exp $
  3. * Time-stamp: "2005-05-20 13:58:56 bkorb"
  4. *
  5. * This file contains the routines that deal with processing quoted strings
  6. * into an internal format.
  7. */
  8. /*
  9. * Automated Options copyright 1992-2006 Bruce Korb
  10. *
  11. * Automated Options is free software.
  12. * You may redistribute it and/or modify it under the terms of the
  13. * GNU General Public License, as published by the Free Software
  14. * Foundation; either version 2, or (at your option) any later version.
  15. *
  16. * Automated Options is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with Automated Options. See the file "COPYING". If not,
  23. * write to: The Free Software Foundation, Inc.,
  24. * 51 Franklin Street, Fifth Floor,
  25. * Boston, MA 02110-1301, USA.
  26. *
  27. * As a special exception, Bruce Korb gives permission for additional
  28. * uses of the text contained in his release of AutoOpts.
  29. *
  30. * The exception is that, if you link the AutoOpts library with other
  31. * files to produce an executable, this does not by itself cause the
  32. * resulting executable to be covered by the GNU General Public License.
  33. * Your use of that executable is in no way restricted on account of
  34. * linking the AutoOpts library code into it.
  35. *
  36. * This exception does not however invalidate any other reasons why
  37. * the executable file might be covered by the GNU General Public License.
  38. *
  39. * This exception applies only to the code released by Bruce Korb under
  40. * the name AutoOpts. If you copy code from other sources under the
  41. * General Public License into a copy of AutoOpts, as the General Public
  42. * License permits, the exception does not apply to the code that you add
  43. * in this way. To avoid misleading anyone as to the status of such
  44. * modified files, you must delete this exception notice from them.
  45. *
  46. * If you write modifications of your own for AutoOpts, it is your choice
  47. * whether to permit this exception to apply to your modifications.
  48. * If you do not wish that, delete this exception notice.
  49. */
  50. /* = = = START-STATIC-FORWARD = = = */
  51. /* static forward declarations maintained by :mkfwd */
  52. /* = = = END-STATIC-FORWARD = = = */
  53. /*=export_func ao_string_cook_escape_char
  54. * private:
  55. *
  56. * what: escape-process a string fragment
  57. * arg: + const char* + pzScan + points to character after the escape +
  58. * arg: + char* + pRes + Where to put the result byte +
  59. * arg: + char + nl_ch + replacement char if scanned char is \n +
  60. *
  61. * ret-type: unsigned int
  62. * ret-desc: The number of bytes consumed processing the escaped character.
  63. *
  64. * doc:
  65. *
  66. * This function converts "t" into "\t" and all your other favorite
  67. * escapes, including numeric ones: hex and ocatal, too.
  68. * The returned result tells the caller how far to advance the
  69. * scan pointer (passed in). The default is to just pass through the
  70. * escaped character and advance the scan by one.
  71. *
  72. * Some applications need to keep an escaped newline, others need to
  73. * suppress it. This is accomplished by supplying a '\n' replacement
  74. * character that is different from \n, if need be. For example, use
  75. * 0x7F and never emit a 0x7F.
  76. *
  77. * err: @code{NULL} is returned if the string(s) is/are mal-formed.
  78. =*/
  79. unsigned int
  80. ao_string_cook_escape_char( const char* pzIn, char* pRes, char nl )
  81. {
  82. unsigned int res = 1;
  83. switch (*pRes = *pzIn++) {
  84. case NUL: /* NUL - end of input string */
  85. return 0;
  86. case '\r':
  87. if (*pzIn != '\n')
  88. return 1;
  89. res++;
  90. /* FALLTHROUGH */
  91. case '\n': /* NL - emit newline */
  92. *pRes = nl;
  93. return res;
  94. case 'a': *pRes = '\a'; break;
  95. case 'b': *pRes = '\b'; break;
  96. case 'f': *pRes = '\f'; break;
  97. case 'n': *pRes = '\n'; break;
  98. case 'r': *pRes = '\r'; break;
  99. case 't': *pRes = '\t'; break;
  100. case 'v': *pRes = '\v'; break;
  101. case 'x': /* HEX Escape */
  102. if (isxdigit( *pzIn )) {
  103. unsigned int val;
  104. unsigned char ch = *pzIn++;
  105. if ((ch >= 'A') && (ch <= 'F'))
  106. val = 10 + (ch - 'A');
  107. else if ((ch >= 'a') && (ch <= 'f'))
  108. val = 10 + (ch - 'a');
  109. else val = ch - '0';
  110. ch = *pzIn;
  111. if (! isxdigit( ch )) {
  112. *pRes = val;
  113. res = 2;
  114. break;
  115. }
  116. val <<= 4;
  117. if ((ch >= 'A') && (ch <= 'F'))
  118. val += 10 + (ch - 'A');
  119. else if ((ch >= 'a') && (ch <= 'f'))
  120. val += 10 + (ch - 'a');
  121. else val += ch - '0';
  122. res = 3;
  123. *pRes = val;
  124. }
  125. break;
  126. default:
  127. /*
  128. * IF the character copied was an octal digit,
  129. * THEN set the output character to an octal value
  130. */
  131. if (isdigit( *pRes ) && (*pRes < '8')) {
  132. unsigned int val = *pRes - '0';
  133. unsigned char ch = *pzIn++;
  134. /*
  135. * IF the second character is *not* an octal digit,
  136. * THEN save the value and bail
  137. */
  138. if ((ch < '0') || (ch > '7')) {
  139. *pRes = val;
  140. break;
  141. }
  142. val = (val<<3) + (ch - '0');
  143. ch = *pzIn;
  144. res = 2;
  145. /*
  146. * IF the THIRD character is *not* an octal digit,
  147. * THEN save the value and bail
  148. */
  149. if ((ch < '0') || (ch > '7')) {
  150. *pRes = val;
  151. break;
  152. }
  153. /*
  154. * IF the new value would not be too large,
  155. * THEN add on the third and last character value
  156. */
  157. if ((val<<3) < 0xFF) {
  158. val = (val<<3) + (ch - '0');
  159. res = 3;
  160. }
  161. *pRes = val;
  162. break;
  163. }
  164. }
  165. return res;
  166. }
  167. /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
  168. *
  169. * A quoted string has been found.
  170. * Find the end of it and compress any escape sequences.
  171. */
  172. /*=export_func ao_string_cook
  173. * private:
  174. *
  175. * what: concatenate and escape-process strings
  176. * arg: + char* + pzScan + The *MODIFIABLE* input buffer +
  177. * arg: + int* + pLineCt + The (possibly NULL) pointer to a line count +
  178. *
  179. * ret-type: char*
  180. * ret-desc: The address of the text following the processed strings.
  181. * The return value is NULL if the strings are ill-formed.
  182. *
  183. * doc:
  184. *
  185. * A series of one or more quoted strings are concatenated together.
  186. * If they are quoted with double quotes (@code{"}), then backslash
  187. * escapes are processed per the C programming language. If they are
  188. * single quote strings, then the backslashes are honored only when they
  189. * precede another backslash or a single quote character.
  190. *
  191. * err: @code{NULL} is returned if the string(s) is/are mal-formed.
  192. =*/
  193. char*
  194. ao_string_cook( char* pzScan, int* pLineCt )
  195. {
  196. int l = 0;
  197. char q = *pzScan;
  198. /*
  199. * It is a quoted string. Process the escape sequence characters
  200. * (in the set "abfnrtv") and make sure we find a closing quote.
  201. */
  202. char* pzD = pzScan++;
  203. char* pzS = pzScan;
  204. if (pLineCt == NULL)
  205. pLineCt = &l;
  206. for (;;) {
  207. /*
  208. * IF the next character is the quote character, THEN we may end the
  209. * string. We end it unless the next non-blank character *after* the
  210. * string happens to also be a quote. If it is, then we will change
  211. * our quote character to the new quote character and continue
  212. * condensing text.
  213. */
  214. while (*pzS == q) {
  215. *pzD = NUL; /* This is probably the end of the line */
  216. pzS++;
  217. scan_for_quote:
  218. while (isspace(*pzS))
  219. if (*(pzS++) == '\n')
  220. (*pLineCt)++;
  221. /*
  222. * IF the next character is a quote character,
  223. * THEN we will concatenate the strings.
  224. */
  225. switch (*pzS) {
  226. case '"':
  227. case '\'':
  228. break;
  229. case '/':
  230. /*
  231. * Allow for a comment embedded in the concatenated string.
  232. */
  233. switch (pzS[1]) {
  234. default: return NULL;
  235. case '/':
  236. /*
  237. * Skip to end of line
  238. */
  239. pzS = strchr( pzS, '\n' );
  240. if (pzS == NULL)
  241. return NULL;
  242. (*pLineCt)++;
  243. break;
  244. case '*':
  245. {
  246. char* p = strstr( pzS+2, "*/" );
  247. /*
  248. * Skip to terminating star slash
  249. */
  250. if (p == NULL)
  251. return NULL;
  252. while (pzS < p) {
  253. if (*(pzS++) == '\n')
  254. (*pLineCt)++;
  255. }
  256. pzS = p + 2;
  257. }
  258. }
  259. goto scan_for_quote;
  260. default:
  261. /*
  262. * The next non-whitespace character is not a quote.
  263. * The series of quoted strings has come to an end.
  264. */
  265. return pzS;
  266. }
  267. q = *(pzS++); /* assign new quote character and advance scan */
  268. }
  269. /*
  270. * We are inside a quoted string. Copy text.
  271. */
  272. switch (*(pzD++) = *(pzS++)) {
  273. case NUL:
  274. return NULL;
  275. case '\n':
  276. (*pLineCt)++;
  277. break;
  278. case '\\':
  279. /*
  280. * IF we are escaping a new line,
  281. * THEN drop both the escape and the newline from
  282. * the result string.
  283. */
  284. if (*pzS == '\n') {
  285. pzS++;
  286. pzD--;
  287. (*pLineCt)++;
  288. }
  289. /*
  290. * ELSE IF the quote character is '"' or '`',
  291. * THEN we do the full escape character processing
  292. */
  293. else if (q != '\'') {
  294. int ct = ao_string_cook_escape_char( pzS, pzD-1, '\n' );
  295. if (ct == 0)
  296. return NULL;
  297. pzS += ct;
  298. } /* if (q != '\'') */
  299. /*
  300. * OTHERWISE, we only process "\\", "\'" and "\#" sequences.
  301. * The latter only to easily hide preprocessing directives.
  302. */
  303. else switch (*pzS) {
  304. case '\\':
  305. case '\'':
  306. case '#':
  307. pzD[-1] = *pzS++;
  308. }
  309. } /* switch (*(pzD++) = *(pzS++)) */
  310. } /* for (;;) */
  311. }
  312. /*
  313. * Local Variables:
  314. * mode: C
  315. * c-file-style: "stroustrup"
  316. * tab-width: 4
  317. * indent-tabs-mode: nil
  318. * End:
  319. * end of autoopts/cook.c */