is_json.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. /*-
  2. * Copyright (c) 2018 Christos Zoulas
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  15. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  16. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  18. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  19. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  20. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  21. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  22. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  23. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  24. * POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /*
  27. * Parse JSON object serialization format (RFC-7159)
  28. */
  29. #ifndef TEST
  30. #include "file.h"
  31. #ifndef lint
  32. FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $")
  33. #endif
  34. #include <string.h>
  35. #include "magic.h"
  36. #endif
  37. #ifdef DEBUG
  38. #include <stdio.h>
  39. #define DPRINTF(a, b, c) \
  40. printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
  41. #else
  42. #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
  43. #endif
  44. #define JSON_ARRAY 0
  45. #define JSON_CONSTANT 1
  46. #define JSON_NUMBER 2
  47. #define JSON_OBJECT 3
  48. #define JSON_STRING 4
  49. #define JSON_ARRAYN 5
  50. #define JSON_MAX 6
  51. /*
  52. * if JSON_COUNT != 0:
  53. * count all the objects, require that we have the whole data file
  54. * otherwise:
  55. * stop if we find an object or an array
  56. */
  57. #ifndef JSON_COUNT
  58. #define JSON_COUNT 0
  59. #endif
  60. static int json_parse(const unsigned char **, const unsigned char *, size_t *,
  61. size_t);
  62. static int
  63. json_isspace(const unsigned char uc)
  64. {
  65. switch (uc) {
  66. case ' ':
  67. case '\n':
  68. case '\r':
  69. case '\t':
  70. return 1;
  71. default:
  72. return 0;
  73. }
  74. }
  75. static int
  76. json_isdigit(unsigned char uc)
  77. {
  78. switch (uc) {
  79. case '0': case '1': case '2': case '3': case '4':
  80. case '5': case '6': case '7': case '8': case '9':
  81. return 1;
  82. default:
  83. return 0;
  84. }
  85. }
  86. static int
  87. json_isxdigit(unsigned char uc)
  88. {
  89. if (json_isdigit(uc))
  90. return 1;
  91. switch (uc) {
  92. case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
  93. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
  94. return 1;
  95. default:
  96. return 0;
  97. }
  98. }
  99. static const unsigned char *
  100. json_skip_space(const unsigned char *uc, const unsigned char *ue)
  101. {
  102. while (uc < ue && json_isspace(*uc))
  103. uc++;
  104. return uc;
  105. }
  106. static int
  107. json_parse_string(const unsigned char **ucp, const unsigned char *ue)
  108. {
  109. const unsigned char *uc = *ucp;
  110. size_t i;
  111. DPRINTF("Parse string: ", uc, *ucp);
  112. while (uc < ue) {
  113. switch (*uc++) {
  114. case '\0':
  115. goto out;
  116. case '\\':
  117. if (uc == ue)
  118. goto out;
  119. switch (*uc++) {
  120. case '\0':
  121. goto out;
  122. case '"':
  123. case '\\':
  124. case '/':
  125. case 'b':
  126. case 'f':
  127. case 'n':
  128. case 'r':
  129. case 't':
  130. continue;
  131. case 'u':
  132. if (ue - uc < 4) {
  133. uc = ue;
  134. goto out;
  135. }
  136. for (i = 0; i < 4; i++)
  137. if (!json_isxdigit(*uc++))
  138. goto out;
  139. continue;
  140. default:
  141. goto out;
  142. }
  143. case '"':
  144. *ucp = uc;
  145. return 1;
  146. default:
  147. continue;
  148. }
  149. }
  150. out:
  151. DPRINTF("Bad string: ", uc, *ucp);
  152. *ucp = uc;
  153. return 0;
  154. }
  155. static int
  156. json_parse_array(const unsigned char **ucp, const unsigned char *ue,
  157. size_t *st, size_t lvl)
  158. {
  159. const unsigned char *uc = *ucp;
  160. int more = 0; /* Array has more than 1 element */
  161. DPRINTF("Parse array: ", uc, *ucp);
  162. while (uc < ue) {
  163. if (!json_parse(&uc, ue, st, lvl + 1))
  164. goto out;
  165. if (uc == ue)
  166. goto out;
  167. switch (*uc) {
  168. case ',':
  169. more++;
  170. uc++;
  171. continue;
  172. case ']':
  173. if (more)
  174. st[JSON_ARRAYN]++;
  175. *ucp = uc + 1;
  176. return 1;
  177. default:
  178. goto out;
  179. }
  180. }
  181. out:
  182. DPRINTF("Bad array: ", uc, *ucp);
  183. *ucp = uc;
  184. return 0;
  185. }
  186. static int
  187. json_parse_object(const unsigned char **ucp, const unsigned char *ue,
  188. size_t *st, size_t lvl)
  189. {
  190. const unsigned char *uc = *ucp;
  191. DPRINTF("Parse object: ", uc, *ucp);
  192. while (uc < ue) {
  193. uc = json_skip_space(uc, ue);
  194. if (uc == ue)
  195. goto out;
  196. if (*uc++ != '"') {
  197. DPRINTF("not string", uc, *ucp);
  198. goto out;
  199. }
  200. DPRINTF("next field", uc, *ucp);
  201. if (!json_parse_string(&uc, ue)) {
  202. DPRINTF("not string", uc, *ucp);
  203. goto out;
  204. }
  205. uc = json_skip_space(uc, ue);
  206. if (uc == ue)
  207. goto out;
  208. if (*uc++ != ':') {
  209. DPRINTF("not colon", uc, *ucp);
  210. goto out;
  211. }
  212. if (!json_parse(&uc, ue, st, lvl + 1)) {
  213. DPRINTF("not json", uc, *ucp);
  214. goto out;
  215. }
  216. if (uc == ue)
  217. goto out;
  218. switch (*uc++) {
  219. case ',':
  220. continue;
  221. case '}': /* { */
  222. *ucp = uc;
  223. DPRINTF("Good object: ", uc, *ucp);
  224. return 1;
  225. default:
  226. *ucp = uc - 1;
  227. DPRINTF("not more", uc, *ucp);
  228. goto out;
  229. }
  230. }
  231. out:
  232. DPRINTF("Bad object: ", uc, *ucp);
  233. *ucp = uc;
  234. return 0;
  235. }
  236. static int
  237. json_parse_number(const unsigned char **ucp, const unsigned char *ue)
  238. {
  239. const unsigned char *uc = *ucp;
  240. int got = 0;
  241. DPRINTF("Parse number: ", uc, *ucp);
  242. if (uc == ue)
  243. return 0;
  244. if (*uc == '-')
  245. uc++;
  246. for (; uc < ue; uc++) {
  247. if (!json_isdigit(*uc))
  248. break;
  249. got = 1;
  250. }
  251. if (uc == ue)
  252. goto out;
  253. if (*uc == '.')
  254. uc++;
  255. for (; uc < ue; uc++) {
  256. if (!json_isdigit(*uc))
  257. break;
  258. got = 1;
  259. }
  260. if (uc == ue)
  261. goto out;
  262. if (got && (*uc == 'e' || *uc == 'E')) {
  263. uc++;
  264. got = 0;
  265. if (uc == ue)
  266. goto out;
  267. if (*uc == '+' || *uc == '-')
  268. uc++;
  269. for (; uc < ue; uc++) {
  270. if (!json_isdigit(*uc))
  271. break;
  272. got = 1;
  273. }
  274. }
  275. out:
  276. if (!got)
  277. DPRINTF("Bad number: ", uc, *ucp);
  278. else
  279. DPRINTF("Good number: ", uc, *ucp);
  280. *ucp = uc;
  281. return got;
  282. }
  283. static int
  284. json_parse_const(const unsigned char **ucp, const unsigned char *ue,
  285. const char *str, size_t len)
  286. {
  287. const unsigned char *uc = *ucp;
  288. DPRINTF("Parse const: ", uc, *ucp);
  289. for (len--; uc < ue && --len;) {
  290. if (*uc++ == *++str)
  291. continue;
  292. }
  293. if (len)
  294. DPRINTF("Bad const: ", uc, *ucp);
  295. *ucp = uc;
  296. return len == 0;
  297. }
  298. static int
  299. json_parse(const unsigned char **ucp, const unsigned char *ue,
  300. size_t *st, size_t lvl)
  301. {
  302. const unsigned char *uc;
  303. int rv = 0;
  304. int t;
  305. uc = json_skip_space(*ucp, ue);
  306. if (uc == ue)
  307. goto out;
  308. // Avoid recursion
  309. if (lvl > 20)
  310. return 0;
  311. #if JSON_COUNT
  312. /* bail quickly if not counting */
  313. if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
  314. return 1;
  315. #endif
  316. DPRINTF("Parse general: ", uc, *ucp);
  317. switch (*uc++) {
  318. case '"':
  319. rv = json_parse_string(&uc, ue);
  320. t = JSON_STRING;
  321. break;
  322. case '[':
  323. rv = json_parse_array(&uc, ue, st, lvl + 1);
  324. t = JSON_ARRAY;
  325. break;
  326. case '{': /* '}' */
  327. rv = json_parse_object(&uc, ue, st, lvl + 1);
  328. t = JSON_OBJECT;
  329. break;
  330. case 't':
  331. rv = json_parse_const(&uc, ue, "true", sizeof("true"));
  332. t = JSON_CONSTANT;
  333. break;
  334. case 'f':
  335. rv = json_parse_const(&uc, ue, "false", sizeof("false"));
  336. t = JSON_CONSTANT;
  337. break;
  338. case 'n':
  339. rv = json_parse_const(&uc, ue, "null", sizeof("null"));
  340. t = JSON_CONSTANT;
  341. break;
  342. default:
  343. --uc;
  344. rv = json_parse_number(&uc, ue);
  345. t = JSON_NUMBER;
  346. break;
  347. }
  348. if (rv)
  349. st[t]++;
  350. uc = json_skip_space(uc, ue);
  351. out:
  352. *ucp = uc;
  353. DPRINTF("End general: ", uc, *ucp);
  354. if (lvl == 0)
  355. return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
  356. return rv;
  357. }
  358. #ifndef TEST
  359. int
  360. file_is_json(struct magic_set *ms, const struct buffer *b)
  361. {
  362. const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
  363. const unsigned char *ue = uc + b->flen;
  364. size_t st[JSON_MAX];
  365. int mime = ms->flags & MAGIC_MIME;
  366. if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
  367. return 0;
  368. memset(st, 0, sizeof(st));
  369. if (!json_parse(&uc, ue, st, 0))
  370. return 0;
  371. if (mime == MAGIC_MIME_ENCODING)
  372. return 1;
  373. if (mime) {
  374. if (file_printf(ms, "application/json") == -1)
  375. return -1;
  376. return 1;
  377. }
  378. if (file_printf(ms, "JSON data") == -1)
  379. return -1;
  380. #if JSON_COUNT
  381. #define P(n) st[n], st[n] > 1 ? "s" : ""
  382. if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
  383. "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
  384. "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
  385. "u >1array%s)",
  386. P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
  387. P(JSON_NUMBER), P(JSON_ARRAYN))
  388. == -1)
  389. return -1;
  390. #endif
  391. return 1;
  392. }
  393. #else
  394. #include <sys/types.h>
  395. #include <sys/stat.h>
  396. #include <stdio.h>
  397. #include <fcntl.h>
  398. #include <unistd.h>
  399. #include <stdlib.h>
  400. #include <stdint.h>
  401. #include <err.h>
  402. int
  403. main(int argc, char *argv[])
  404. {
  405. int fd, rv;
  406. struct stat st;
  407. unsigned char *p;
  408. size_t stats[JSON_MAX];
  409. if ((fd = open(argv[1], O_RDONLY)) == -1)
  410. err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
  411. if (fstat(fd, &st) == -1)
  412. err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
  413. if ((p = malloc(st.st_size)) == NULL)
  414. err(EXIT_FAILURE, "Can't allocate %jd bytes",
  415. (intmax_t)st.st_size);
  416. if (read(fd, p, st.st_size) != st.st_size)
  417. err(EXIT_FAILURE, "Can't read %jd bytes",
  418. (intmax_t)st.st_size);
  419. memset(stats, 0, sizeof(stats));
  420. printf("is json %d\n", json_parse((const unsigned char **)&p,
  421. p + st.st_size, stats, 0));
  422. return 0;
  423. }
  424. #endif