123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500 |
- /*-
- * Copyright (c) 2018 Christos Zoulas
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
- /*
- * Parse JSON object serialization format (RFC-7159)
- */
- #ifndef TEST
- #include "file.h"
- #ifndef lint
- FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
- #endif
- #include "magic.h"
- #else
- #include <stdio.h>
- #include <stddef.h>
- #endif
- #include <string.h>
- #ifdef DEBUG
- #include <stdio.h>
- #define DPRINTF(a, b, c) \
- printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
- (int)(b - c), (const char *)(c))
- #define __file_debugused
- #else
- #define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
- #define __file_debugused __attribute__((__unused__))
- #endif
- #define JSON_ARRAY 0
- #define JSON_CONSTANT 1
- #define JSON_NUMBER 2
- #define JSON_OBJECT 3
- #define JSON_STRING 4
- #define JSON_ARRAYN 5
- #define JSON_MAX 6
- /*
- * if JSON_COUNT != 0:
- * count all the objects, require that we have the whole data file
- * otherwise:
- * stop if we find an object or an array
- */
- #ifndef JSON_COUNT
- #define JSON_COUNT 0
- #endif
- static int json_parse(const unsigned char **, const unsigned char *, size_t *,
- size_t);
- static int
- json_isspace(const unsigned char uc)
- {
- switch (uc) {
- case ' ':
- case '\n':
- case '\r':
- case '\t':
- return 1;
- default:
- return 0;
- }
- }
- static int
- json_isdigit(unsigned char uc)
- {
- switch (uc) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- return 1;
- default:
- return 0;
- }
- }
- static int
- json_isxdigit(unsigned char uc)
- {
- if (json_isdigit(uc))
- return 1;
- switch (uc) {
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- return 1;
- default:
- return 0;
- }
- }
- static const unsigned char *
- json_skip_space(const unsigned char *uc, const unsigned char *ue)
- {
- while (uc < ue && json_isspace(*uc))
- uc++;
- return uc;
- }
- /*ARGSUSED*/
- static int
- json_parse_string(const unsigned char **ucp, const unsigned char *ue,
- size_t lvl __file_debugused)
- {
- const unsigned char *uc = *ucp;
- size_t i;
- DPRINTF("Parse string: ", uc, *ucp);
- while (uc < ue) {
- switch (*uc++) {
- case '\0':
- goto out;
- case '\\':
- if (uc == ue)
- goto out;
- switch (*uc++) {
- case '\0':
- goto out;
- case '"':
- case '\\':
- case '/':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- continue;
- case 'u':
- if (ue - uc < 4) {
- uc = ue;
- goto out;
- }
- for (i = 0; i < 4; i++)
- if (!json_isxdigit(*uc++))
- goto out;
- continue;
- default:
- goto out;
- }
- case '"':
- DPRINTF("Good string: ", uc, *ucp);
- *ucp = uc;
- return 1;
- default:
- continue;
- }
- }
- out:
- DPRINTF("Bad string: ", uc, *ucp);
- *ucp = uc;
- return 0;
- }
- static int
- json_parse_array(const unsigned char **ucp, const unsigned char *ue,
- size_t *st, size_t lvl)
- {
- const unsigned char *uc = *ucp;
- DPRINTF("Parse array: ", uc, *ucp);
- while (uc < ue) {
- uc = json_skip_space(uc, ue);
- if (uc == ue)
- goto out;
- if (*uc == ']')
- goto done;
- if (!json_parse(&uc, ue, st, lvl + 1))
- goto out;
- if (uc == ue)
- goto out;
- switch (*uc) {
- case ',':
- uc++;
- continue;
- case ']':
- done:
- st[JSON_ARRAYN]++;
- DPRINTF("Good array: ", uc, *ucp);
- *ucp = uc + 1;
- return 1;
- default:
- goto out;
- }
- }
- out:
- DPRINTF("Bad array: ", uc, *ucp);
- *ucp = uc;
- return 0;
- }
- static int
- json_parse_object(const unsigned char **ucp, const unsigned char *ue,
- size_t *st, size_t lvl)
- {
- const unsigned char *uc = *ucp;
- DPRINTF("Parse object: ", uc, *ucp);
- while (uc < ue) {
- uc = json_skip_space(uc, ue);
- if (uc == ue)
- goto out;
- if (*uc == '}') {
- uc++;
- goto done;
- }
- if (*uc++ != '"') {
- DPRINTF("not string", uc, *ucp);
- goto out;
- }
- DPRINTF("next field", uc, *ucp);
- if (!json_parse_string(&uc, ue, lvl)) {
- DPRINTF("not string", uc, *ucp);
- goto out;
- }
- uc = json_skip_space(uc, ue);
- if (uc == ue)
- goto out;
- if (*uc++ != ':') {
- DPRINTF("not colon", uc, *ucp);
- goto out;
- }
- if (!json_parse(&uc, ue, st, lvl + 1)) {
- DPRINTF("not json", uc, *ucp);
- goto out;
- }
- if (uc == ue)
- goto out;
- switch (*uc++) {
- case ',':
- continue;
- case '}': /* { */
- done:
- DPRINTF("Good object: ", uc, *ucp);
- *ucp = uc;
- return 1;
- default:
- DPRINTF("not more", uc, *ucp);
- *ucp = uc - 1;
- goto out;
- }
- }
- out:
- DPRINTF("Bad object: ", uc, *ucp);
- *ucp = uc;
- return 0;
- }
- /*ARGSUSED*/
- static int
- json_parse_number(const unsigned char **ucp, const unsigned char *ue,
- size_t lvl __file_debugused)
- {
- const unsigned char *uc = *ucp;
- int got = 0;
- DPRINTF("Parse number: ", uc, *ucp);
- if (uc == ue)
- return 0;
- if (*uc == '-')
- uc++;
- for (; uc < ue; uc++) {
- if (!json_isdigit(*uc))
- break;
- got = 1;
- }
- if (uc == ue)
- goto out;
- if (*uc == '.')
- uc++;
- for (; uc < ue; uc++) {
- if (!json_isdigit(*uc))
- break;
- got = 1;
- }
- if (uc == ue)
- goto out;
- if (got && (*uc == 'e' || *uc == 'E')) {
- uc++;
- got = 0;
- if (uc == ue)
- goto out;
- if (*uc == '+' || *uc == '-')
- uc++;
- for (; uc < ue; uc++) {
- if (!json_isdigit(*uc))
- break;
- got = 1;
- }
- }
- out:
- if (!got)
- DPRINTF("Bad number: ", uc, *ucp);
- else
- DPRINTF("Good number: ", uc, *ucp);
- *ucp = uc;
- return got;
- }
- /*ARGSUSED*/
- static int
- json_parse_const(const unsigned char **ucp, const unsigned char *ue,
- const char *str, size_t len, size_t lvl __file_debugused)
- {
- const unsigned char *uc = *ucp;
- DPRINTF("Parse const: ", uc, *ucp);
- *ucp += --len - 1;
- if (*ucp > ue)
- *ucp = ue;
- for (; uc < ue && --len;) {
- if (*uc++ != *++str) {
- DPRINTF("Bad const: ", uc, *ucp);
- return 0;
- }
- }
- DPRINTF("Good const: ", uc, *ucp);
- return 1;
- }
- static int
- json_parse(const unsigned char **ucp, const unsigned char *ue,
- size_t *st, size_t lvl)
- {
- const unsigned char *uc, *ouc;
- int rv = 0;
- int t;
- ouc = uc = json_skip_space(*ucp, ue);
- if (uc == ue)
- goto out;
- // Avoid recursion
- if (lvl > 500) {
- DPRINTF("Too many levels", uc, *ucp);
- return 0;
- }
- #if JSON_COUNT
- /* bail quickly if not counting */
- if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
- return 1;
- #endif
- DPRINTF("Parse general: ", uc, *ucp);
- switch (*uc++) {
- case '"':
- rv = json_parse_string(&uc, ue, lvl + 1);
- t = JSON_STRING;
- break;
- case '[':
- rv = json_parse_array(&uc, ue, st, lvl + 1);
- t = JSON_ARRAY;
- break;
- case '{': /* '}' */
- rv = json_parse_object(&uc, ue, st, lvl + 1);
- t = JSON_OBJECT;
- break;
- case 't':
- rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
- t = JSON_CONSTANT;
- break;
- case 'f':
- rv = json_parse_const(&uc, ue, "false", sizeof("false"),
- lvl + 1);
- t = JSON_CONSTANT;
- break;
- case 'n':
- rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
- t = JSON_CONSTANT;
- break;
- default:
- --uc;
- rv = json_parse_number(&uc, ue, lvl + 1);
- t = JSON_NUMBER;
- break;
- }
- if (rv)
- st[t]++;
- uc = json_skip_space(uc, ue);
- out:
- DPRINTF("End general: ", uc, *ucp);
- *ucp = uc;
- if (lvl == 0) {
- if (!rv)
- return 0;
- if (uc == ue)
- return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
- if (*ouc == *uc && json_parse(&uc, ue, st, 1))
- return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
- else
- return 0;
- }
- return rv;
- }
- #ifndef TEST
- int
- file_is_json(struct magic_set *ms, const struct buffer *b)
- {
- const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
- const unsigned char *ue = uc + b->flen;
- size_t st[JSON_MAX];
- int mime = ms->flags & MAGIC_MIME;
- int jt;
- if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
- return 0;
- memset(st, 0, sizeof(st));
- if ((jt = json_parse(&uc, ue, st, 0)) == 0)
- return 0;
- if (mime == MAGIC_MIME_ENCODING)
- return 1;
- if (mime) {
- if (file_printf(ms, "application/%s",
- jt == 1 ? "json" : "x-ndjson") == -1)
- return -1;
- return 1;
- }
- if (file_printf(ms, "%sJSON text data",
- jt == 1 ? "" : "New Line Delimited ") == -1)
- return -1;
- #if JSON_COUNT
- #define P(n) st[n], st[n] > 1 ? "s" : ""
- if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
- "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
- "u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
- "u >1array%s)",
- P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
- P(JSON_NUMBER), P(JSON_ARRAYN))
- == -1)
- return -1;
- #endif
- return 1;
- }
- #else
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <stdio.h>
- #include <fcntl.h>
- #include <unistd.h>
- #include <stdlib.h>
- #include <stdint.h>
- #include <err.h>
- int
- main(int argc, char *argv[])
- {
- int fd;
- struct stat st;
- unsigned char *p;
- size_t stats[JSON_MAX];
- if ((fd = open(argv[1], O_RDONLY)) == -1)
- err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
- if (fstat(fd, &st) == -1)
- err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
- if ((p = CAST(char *, malloc(st.st_size))) == NULL)
- err(EXIT_FAILURE, "Can't allocate %jd bytes",
- (intmax_t)st.st_size);
- if (read(fd, p, st.st_size) != st.st_size)
- err(EXIT_FAILURE, "Can't read %jd bytes",
- (intmax_t)st.st_size);
- memset(stats, 0, sizeof(stats));
- printf("is json %d\n", json_parse((const unsigned char **)&p,
- p + st.st_size, stats, 0));
- return 0;
- }
- #endif
|