is_json.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. /*-
  2. * Copyright (c) 2018 Christos Zoulas
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  15. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  16. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  18. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  19. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  20. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  21. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  22. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  23. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  24. * POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /*
  27. * Parse JSON object serialization format (RFC-7159)
  28. */
  29. #ifndef TEST
  30. #include "file.h"
  31. #ifndef lint
  32. FILE_RCSID("@(#)$File: is_json.c,v 1.15 2020/06/07 19:05:47 christos Exp $")
  33. #endif
  34. #include <string.h>
  35. #include "magic.h"
  36. #endif
  37. #ifdef DEBUG
  38. #include <stdio.h>
  39. #define DPRINTF(a, b, c) \
  40. printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
  41. #else
  42. #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
  43. #endif
  44. #define JSON_ARRAY 0
  45. #define JSON_CONSTANT 1
  46. #define JSON_NUMBER 2
  47. #define JSON_OBJECT 3
  48. #define JSON_STRING 4
  49. #define JSON_ARRAYN 5
  50. #define JSON_MAX 6
  51. /*
  52. * if JSON_COUNT != 0:
  53. * count all the objects, require that we have the whole data file
  54. * otherwise:
  55. * stop if we find an object or an array
  56. */
  57. #ifndef JSON_COUNT
  58. #define JSON_COUNT 0
  59. #endif
  60. static int json_parse(const unsigned char **, const unsigned char *, size_t *,
  61. size_t);
  62. static int
  63. json_isspace(const unsigned char uc)
  64. {
  65. switch (uc) {
  66. case ' ':
  67. case '\n':
  68. case '\r':
  69. case '\t':
  70. return 1;
  71. default:
  72. return 0;
  73. }
  74. }
  75. static int
  76. json_isdigit(unsigned char uc)
  77. {
  78. switch (uc) {
  79. case '0': case '1': case '2': case '3': case '4':
  80. case '5': case '6': case '7': case '8': case '9':
  81. return 1;
  82. default:
  83. return 0;
  84. }
  85. }
  86. static int
  87. json_isxdigit(unsigned char uc)
  88. {
  89. if (json_isdigit(uc))
  90. return 1;
  91. switch (uc) {
  92. case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
  93. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
  94. return 1;
  95. default:
  96. return 0;
  97. }
  98. }
  99. static const unsigned char *
  100. json_skip_space(const unsigned char *uc, const unsigned char *ue)
  101. {
  102. while (uc < ue && json_isspace(*uc))
  103. uc++;
  104. return uc;
  105. }
  106. static int
  107. json_parse_string(const unsigned char **ucp, const unsigned char *ue)
  108. {
  109. const unsigned char *uc = *ucp;
  110. size_t i;
  111. DPRINTF("Parse string: ", uc, *ucp);
  112. while (uc < ue) {
  113. switch (*uc++) {
  114. case '\0':
  115. goto out;
  116. case '\\':
  117. if (uc == ue)
  118. goto out;
  119. switch (*uc++) {
  120. case '\0':
  121. goto out;
  122. case '"':
  123. case '\\':
  124. case '/':
  125. case 'b':
  126. case 'f':
  127. case 'n':
  128. case 'r':
  129. case 't':
  130. continue;
  131. case 'u':
  132. if (ue - uc < 4) {
  133. uc = ue;
  134. goto out;
  135. }
  136. for (i = 0; i < 4; i++)
  137. if (!json_isxdigit(*uc++))
  138. goto out;
  139. continue;
  140. default:
  141. goto out;
  142. }
  143. case '"':
  144. *ucp = uc;
  145. DPRINTF("Good string: ", uc, *ucp);
  146. return 1;
  147. default:
  148. continue;
  149. }
  150. }
  151. out:
  152. DPRINTF("Bad string: ", uc, *ucp);
  153. *ucp = uc;
  154. return 0;
  155. }
  156. static int
  157. json_parse_array(const unsigned char **ucp, const unsigned char *ue,
  158. size_t *st, size_t lvl)
  159. {
  160. const unsigned char *uc = *ucp;
  161. DPRINTF("Parse array: ", uc, *ucp);
  162. while (uc < ue) {
  163. if (*uc == ']')
  164. goto done;
  165. if (!json_parse(&uc, ue, st, lvl + 1))
  166. goto out;
  167. if (uc == ue)
  168. goto out;
  169. switch (*uc) {
  170. case ',':
  171. uc++;
  172. continue;
  173. case ']':
  174. done:
  175. st[JSON_ARRAYN]++;
  176. *ucp = uc + 1;
  177. DPRINTF("Good array: ", uc, *ucp);
  178. return 1;
  179. default:
  180. goto out;
  181. }
  182. }
  183. out:
  184. DPRINTF("Bad array: ", uc, *ucp);
  185. *ucp = uc;
  186. return 0;
  187. }
  188. static int
  189. json_parse_object(const unsigned char **ucp, const unsigned char *ue,
  190. size_t *st, size_t lvl)
  191. {
  192. const unsigned char *uc = *ucp;
  193. DPRINTF("Parse object: ", uc, *ucp);
  194. while (uc < ue) {
  195. uc = json_skip_space(uc, ue);
  196. if (uc == ue)
  197. goto out;
  198. if (*uc == '}') {
  199. uc++;
  200. goto done;
  201. }
  202. if (*uc++ != '"') {
  203. DPRINTF("not string", uc, *ucp);
  204. goto out;
  205. }
  206. DPRINTF("next field", uc, *ucp);
  207. if (!json_parse_string(&uc, ue)) {
  208. DPRINTF("not string", uc, *ucp);
  209. goto out;
  210. }
  211. uc = json_skip_space(uc, ue);
  212. if (uc == ue)
  213. goto out;
  214. if (*uc++ != ':') {
  215. DPRINTF("not colon", uc, *ucp);
  216. goto out;
  217. }
  218. if (!json_parse(&uc, ue, st, lvl + 1)) {
  219. DPRINTF("not json", uc, *ucp);
  220. goto out;
  221. }
  222. if (uc == ue)
  223. goto out;
  224. switch (*uc++) {
  225. case ',':
  226. continue;
  227. case '}': /* { */
  228. done:
  229. *ucp = uc;
  230. DPRINTF("Good object: ", uc, *ucp);
  231. return 1;
  232. default:
  233. *ucp = uc - 1;
  234. DPRINTF("not more", uc, *ucp);
  235. goto out;
  236. }
  237. }
  238. out:
  239. DPRINTF("Bad object: ", uc, *ucp);
  240. *ucp = uc;
  241. return 0;
  242. }
  243. static int
  244. json_parse_number(const unsigned char **ucp, const unsigned char *ue)
  245. {
  246. const unsigned char *uc = *ucp;
  247. int got = 0;
  248. DPRINTF("Parse number: ", uc, *ucp);
  249. if (uc == ue)
  250. return 0;
  251. if (*uc == '-')
  252. uc++;
  253. for (; uc < ue; uc++) {
  254. if (!json_isdigit(*uc))
  255. break;
  256. got = 1;
  257. }
  258. if (uc == ue)
  259. goto out;
  260. if (*uc == '.')
  261. uc++;
  262. for (; uc < ue; uc++) {
  263. if (!json_isdigit(*uc))
  264. break;
  265. got = 1;
  266. }
  267. if (uc == ue)
  268. goto out;
  269. if (got && (*uc == 'e' || *uc == 'E')) {
  270. uc++;
  271. got = 0;
  272. if (uc == ue)
  273. goto out;
  274. if (*uc == '+' || *uc == '-')
  275. uc++;
  276. for (; uc < ue; uc++) {
  277. if (!json_isdigit(*uc))
  278. break;
  279. got = 1;
  280. }
  281. }
  282. out:
  283. if (!got)
  284. DPRINTF("Bad number: ", uc, *ucp);
  285. else
  286. DPRINTF("Good number: ", uc, *ucp);
  287. *ucp = uc;
  288. return got;
  289. }
  290. static int
  291. json_parse_const(const unsigned char **ucp, const unsigned char *ue,
  292. const char *str, size_t len)
  293. {
  294. const unsigned char *uc = *ucp;
  295. DPRINTF("Parse const: ", uc, *ucp);
  296. for (len--; uc < ue && --len;) {
  297. if (*uc++ == *++str)
  298. continue;
  299. }
  300. if (len)
  301. DPRINTF("Bad const: ", uc, *ucp);
  302. *ucp = uc;
  303. return len == 0;
  304. }
  305. static int
  306. json_parse(const unsigned char **ucp, const unsigned char *ue,
  307. size_t *st, size_t lvl)
  308. {
  309. const unsigned char *uc;
  310. int rv = 0;
  311. int t;
  312. uc = json_skip_space(*ucp, ue);
  313. if (uc == ue)
  314. goto out;
  315. // Avoid recursion
  316. if (lvl > 20)
  317. return 0;
  318. #if JSON_COUNT
  319. /* bail quickly if not counting */
  320. if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
  321. return 1;
  322. #endif
  323. DPRINTF("Parse general: ", uc, *ucp);
  324. switch (*uc++) {
  325. case '"':
  326. rv = json_parse_string(&uc, ue);
  327. t = JSON_STRING;
  328. break;
  329. case '[':
  330. rv = json_parse_array(&uc, ue, st, lvl + 1);
  331. t = JSON_ARRAY;
  332. break;
  333. case '{': /* '}' */
  334. rv = json_parse_object(&uc, ue, st, lvl + 1);
  335. t = JSON_OBJECT;
  336. break;
  337. case 't':
  338. rv = json_parse_const(&uc, ue, "true", sizeof("true"));
  339. t = JSON_CONSTANT;
  340. break;
  341. case 'f':
  342. rv = json_parse_const(&uc, ue, "false", sizeof("false"));
  343. t = JSON_CONSTANT;
  344. break;
  345. case 'n':
  346. rv = json_parse_const(&uc, ue, "null", sizeof("null"));
  347. t = JSON_CONSTANT;
  348. break;
  349. default:
  350. --uc;
  351. rv = json_parse_number(&uc, ue);
  352. t = JSON_NUMBER;
  353. break;
  354. }
  355. if (rv)
  356. st[t]++;
  357. uc = json_skip_space(uc, ue);
  358. out:
  359. *ucp = uc;
  360. DPRINTF("End general: ", uc, *ucp);
  361. if (lvl == 0)
  362. return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
  363. return rv;
  364. }
  365. #ifndef TEST
  366. int
  367. file_is_json(struct magic_set *ms, const struct buffer *b)
  368. {
  369. const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
  370. const unsigned char *ue = uc + b->flen;
  371. size_t st[JSON_MAX];
  372. int mime = ms->flags & MAGIC_MIME;
  373. if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
  374. return 0;
  375. memset(st, 0, sizeof(st));
  376. if (!json_parse(&uc, ue, st, 0))
  377. return 0;
  378. if (mime == MAGIC_MIME_ENCODING)
  379. return 1;
  380. if (mime) {
  381. if (file_printf(ms, "application/json") == -1)
  382. return -1;
  383. return 1;
  384. }
  385. if (file_printf(ms, "JSON data") == -1)
  386. return -1;
  387. #if JSON_COUNT
  388. #define P(n) st[n], st[n] > 1 ? "s" : ""
  389. if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
  390. "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
  391. "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
  392. "u >1array%s)",
  393. P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
  394. P(JSON_NUMBER), P(JSON_ARRAYN))
  395. == -1)
  396. return -1;
  397. #endif
  398. return 1;
  399. }
  400. #else
  401. #include <sys/types.h>
  402. #include <sys/stat.h>
  403. #include <stdio.h>
  404. #include <fcntl.h>
  405. #include <unistd.h>
  406. #include <stdlib.h>
  407. #include <stdint.h>
  408. #include <err.h>
  409. int
  410. main(int argc, char *argv[])
  411. {
  412. int fd, rv;
  413. struct stat st;
  414. unsigned char *p;
  415. size_t stats[JSON_MAX];
  416. if ((fd = open(argv[1], O_RDONLY)) == -1)
  417. err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
  418. if (fstat(fd, &st) == -1)
  419. err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
  420. if ((p = malloc(st.st_size)) == NULL)
  421. err(EXIT_FAILURE, "Can't allocate %jd bytes",
  422. (intmax_t)st.st_size);
  423. if (read(fd, p, st.st_size) != st.st_size)
  424. err(EXIT_FAILURE, "Can't read %jd bytes",
  425. (intmax_t)st.st_size);
  426. memset(stats, 0, sizeof(stats));
  427. printf("is json %d\n", json_parse((const unsigned char **)&p,
  428. p + st.st_size, stats, 0));
  429. return 0;
  430. }
  431. #endif