You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

json_tokener.h 10 kB

json_tokener_parse_ex: handle out of memory errors Do not silently truncate values or skip entries if out of memory errors occur. Proof of Concept: - Create poc.c, a program which creates an eight megabyte large json object with key "A" and a lot of "B"s as value, one of them is UTF-formatted: ```c #include <err.h> #include <stdio.h> #include <string.h> #include "json.h" #define STR_LEN (8 * 1024 * 1024) #define STR_PREFIX "{ \"A\": \"" #define STR_SUFFIX "\\u0042\" }" int main(void) { char *str; struct json_tokener *tok; struct json_object *obj; if ((tok = json_tokener_new()) == NULL) errx(1, "json_tokener_new"); if ((str = malloc(STR_LEN)) == NULL) err(1, "malloc"); memset(str, 'B', STR_LEN); memcpy(str, STR_PREFIX, sizeof(STR_PREFIX) - 1); memcpy(str + STR_LEN - sizeof(STR_SUFFIX), STR_SUFFIX, sizeof(STR_SUFFIX)); obj = json_tokener_parse(str); free(str); printf("%p\n", obj); if (obj != NULL) { printf("%.*s\n", 50, json_object_to_json_string(obj)); json_object_put(obj); } json_tokener_free(tok); return 0; } ``` - Compile and run poc, assuming you have enough free heap space: ``` gcc $(pkg-config --cflags --libs) -o poc poc.c ./poc 0x559421e15de0 { "A": "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB ``` - Reduce available heap and run again, which leads to truncation: ``` ulimit -d 10000 ./poc 0x555a5b453de0 { "A": "B" } ``` - Compile json-c with this change and run with reduced heap again: ``` ulimit -d 10000 ./poc (nil) ``` The output is limited to 70 characters, i.e. json-c parses the 8 MB string correctly but the poc does not print all of them to the screen. The truncation occurs because the parser tries to add all chars up to the UTF-8 formatted 'B' at once. Since memory is limited to 10 MB there is not enough for this operation. The parser does not fail but continues normally. Another possibility is to create a json file close to 2 GB and run a program on a system with limited amount of RAM, i.e. around 3 GB. But ulimit restrictions are much easier for proof of concepts. Treat memory errors correctly and abort operations.
3 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. /*
  2. * $Id: json_tokener.h,v 1.10 2006/07/25 03:24:50 mclark Exp $
  3. *
  4. * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
  5. * Michael Clark <michael@metaparadigm.com>
  6. *
  7. * This library is free software; you can redistribute it and/or modify
  8. * it under the terms of the MIT license. See COPYING for details.
  9. *
  10. */
  11. /**
  12. * @file
  13. * @brief Methods to parse an input string into a tree of json_object objects.
  14. */
  15. #ifndef _json_tokener_h_
  16. #define _json_tokener_h_
  17. #include "json_object.h"
  18. #include <stddef.h>
  19. #ifdef __cplusplus
  20. extern "C" {
  21. #endif
  22. enum json_tokener_error
  23. {
  24. json_tokener_success,
  25. json_tokener_continue,
  26. json_tokener_error_depth,
  27. json_tokener_error_memory,
  28. json_tokener_error_parse_eof,
  29. json_tokener_error_parse_unexpected,
  30. json_tokener_error_parse_null,
  31. json_tokener_error_parse_boolean,
  32. json_tokener_error_parse_number,
  33. json_tokener_error_parse_array,
  34. json_tokener_error_parse_object_key_name,
  35. json_tokener_error_parse_object_key_sep,
  36. json_tokener_error_parse_object_value_sep,
  37. json_tokener_error_parse_string,
  38. json_tokener_error_parse_comment,
  39. json_tokener_error_parse_utf8_string,
  40. json_tokener_error_size
  41. };
  42. /**
  43. * @deprecated Don't use this outside of json_tokener.c, it will be made private in a future release.
  44. */
  45. enum json_tokener_state
  46. {
  47. json_tokener_state_eatws,
  48. json_tokener_state_start,
  49. json_tokener_state_finish,
  50. json_tokener_state_null,
  51. json_tokener_state_comment_start,
  52. json_tokener_state_comment,
  53. json_tokener_state_comment_eol,
  54. json_tokener_state_comment_end,
  55. json_tokener_state_string,
  56. json_tokener_state_string_escape,
  57. json_tokener_state_escape_unicode,
  58. json_tokener_state_escape_unicode_need_escape,
  59. json_tokener_state_escape_unicode_need_u,
  60. json_tokener_state_boolean,
  61. json_tokener_state_number,
  62. json_tokener_state_array,
  63. json_tokener_state_array_add,
  64. json_tokener_state_array_sep,
  65. json_tokener_state_object_field_start,
  66. json_tokener_state_object_field,
  67. json_tokener_state_object_field_end,
  68. json_tokener_state_object_value,
  69. json_tokener_state_object_value_add,
  70. json_tokener_state_object_sep,
  71. json_tokener_state_array_after_sep,
  72. json_tokener_state_object_field_start_after_sep,
  73. json_tokener_state_inf
  74. };
  75. /**
  76. * @deprecated Don't use this outside of json_tokener.c, it will be made private in a future release.
  77. */
  78. struct json_tokener_srec
  79. {
  80. enum json_tokener_state state, saved_state;
  81. struct json_object *obj;
  82. struct json_object *current;
  83. char *obj_field_name;
  84. };
  85. #define JSON_TOKENER_DEFAULT_DEPTH 32
  86. /**
  87. * Internal state of the json parser.
  88. * Do not access any fields of this structure directly.
  89. * Its definition is published due to historical limitations
  90. * in the json tokener API, and will be changed to be an opaque
  91. * type in the future.
  92. */
  93. struct json_tokener
  94. {
  95. /**
  96. * @deprecated Do not access any of these fields outside of json_tokener.c
  97. */
  98. char *str;
  99. struct printbuf *pb;
  100. int max_depth, depth, is_double, st_pos;
  101. /**
  102. * @deprecated See json_tokener_get_parse_end() instead.
  103. */
  104. int char_offset;
  105. /**
  106. * @deprecated See json_tokener_get_error() instead.
  107. */
  108. enum json_tokener_error err;
  109. unsigned int ucs_char, high_surrogate;
  110. char quote_char;
  111. struct json_tokener_srec *stack;
  112. int flags;
  113. };
  114. /**
  115. * Return the offset of the byte after the last byte parsed
  116. * relative to the start of the most recent string passed in
  117. * to json_tokener_parse_ex(). i.e. this is where parsing
  118. * would start again if the input contains another JSON object
  119. * after the currently parsed one.
  120. *
  121. * Note that when multiple parse calls are issued, this is *not* the
  122. * total number of characters parsed.
  123. *
  124. * In the past this would have been accessed as tok->char_offset.
  125. *
  126. * See json_tokener_parse_ex() for an example of how to use this.
  127. */
  128. JSON_EXPORT size_t json_tokener_get_parse_end(struct json_tokener *tok);
  129. /**
  130. * @deprecated Unused in json-c code
  131. */
  132. typedef struct json_tokener json_tokener;
  133. /**
  134. * Be strict when parsing JSON input. Use caution with
  135. * this flag as what is considered valid may become more
  136. * restrictive from one release to the next, causing your
  137. * code to fail on previously working input.
  138. *
  139. * Note that setting this will also effectively disable parsing
  140. * of multiple json objects in a single character stream
  141. * (e.g. {"foo":123}{"bar":234}); if you want to allow that
  142. * also set JSON_TOKENER_ALLOW_TRAILING_CHARS
  143. *
  144. * This flag is not set by default.
  145. *
  146. * @see json_tokener_set_flags()
  147. */
  148. #define JSON_TOKENER_STRICT 0x01
  149. /**
  150. * Use with JSON_TOKENER_STRICT to allow trailing characters after the
  151. * first parsed object.
  152. *
  153. * @see json_tokener_set_flags()
  154. */
  155. #define JSON_TOKENER_ALLOW_TRAILING_CHARS 0x02
  156. /**
  157. * Cause json_tokener_parse_ex() to validate that input is UTF8.
  158. * If this flag is specified and validation fails, then
  159. * json_tokener_get_error(tok) will return
  160. * json_tokener_error_parse_utf8_string
  161. *
  162. * This flag is not set by default.
  163. *
  164. * @see json_tokener_set_flags()
  165. */
  166. #define JSON_TOKENER_VALIDATE_UTF8 0x10
  167. /**
  168. * Given an error previously returned by json_tokener_get_error(),
  169. * return a human readable description of the error.
  170. *
  171. * @return a generic error message is returned if an invalid error value is provided.
  172. */
  173. JSON_EXPORT const char *json_tokener_error_desc(enum json_tokener_error jerr);
  174. /**
  175. * Retrieve the error caused by the last call to json_tokener_parse_ex(),
  176. * or json_tokener_success if there is no error.
  177. *
  178. * When parsing a JSON string in pieces, if the tokener is in the middle
  179. * of parsing this will return json_tokener_continue.
  180. *
  181. * @see json_tokener_error_desc().
  182. */
  183. JSON_EXPORT enum json_tokener_error json_tokener_get_error(struct json_tokener *tok);
  184. /**
  185. * Allocate a new json_tokener.
  186. * When done using that to parse objects, free it with json_tokener_free().
  187. * See json_tokener_parse_ex() for usage details.
  188. */
  189. JSON_EXPORT struct json_tokener *json_tokener_new(void);
  190. /**
  191. * Allocate a new json_tokener with a custom max nesting depth.
  192. * @see JSON_TOKENER_DEFAULT_DEPTH
  193. */
  194. JSON_EXPORT struct json_tokener *json_tokener_new_ex(int depth);
  195. /**
  196. * Free a json_tokener previously allocated with json_tokener_new().
  197. */
  198. JSON_EXPORT void json_tokener_free(struct json_tokener *tok);
  199. /**
  200. * Reset the state of a json_tokener, to prepare to parse a
  201. * brand new JSON object.
  202. */
  203. JSON_EXPORT void json_tokener_reset(struct json_tokener *tok);
  204. /**
  205. * Parse a json_object out of the string `str`.
  206. *
  207. * If you need more control over how the parsing occurs,
  208. * see json_tokener_parse_ex().
  209. */
  210. JSON_EXPORT struct json_object *json_tokener_parse(const char *str);
  211. /**
  212. * Parser a json_object out of the string `str`, but if it fails
  213. * return the error in `*error`.
  214. * @see json_tokener_parse()
  215. * @see json_tokener_parse_ex()
  216. */
  217. JSON_EXPORT struct json_object *json_tokener_parse_verbose(const char *str,
  218. enum json_tokener_error *error);
  219. /**
  220. * Set flags that control how parsing will be done.
  221. */
  222. JSON_EXPORT void json_tokener_set_flags(struct json_tokener *tok, int flags);
  223. /**
  224. * Parse a string and return a non-NULL json_object if a valid JSON value
  225. * is found. The string does not need to be a JSON object or array;
  226. * it can also be a string, number or boolean value.
  227. *
  228. * A partial JSON string can be parsed. If the parsing is incomplete,
  229. * NULL will be returned and json_tokener_get_error() will return
  230. * json_tokener_continue.
  231. * json_tokener_parse_ex() can then be called with additional bytes in str
  232. * to continue the parsing.
  233. *
  234. * If json_tokener_parse_ex() returns NULL and the error is anything other than
  235. * json_tokener_continue, a fatal error has occurred and parsing must be
  236. * halted. Then, the tok object must not be reused until json_tokener_reset()
  237. * is called.
  238. *
  239. * When a valid JSON value is parsed, a non-NULL json_object will be
  240. * returned, with a reference count of one which belongs to the caller. Also,
  241. * json_tokener_get_error() will return json_tokener_success. Be sure to check
  242. * the type with json_object_is_type() or json_object_get_type() before using
  243. * the object.
  244. *
  245. * Trailing characters after the parsed value do not automatically cause an
  246. * error. It is up to the caller to decide whether to treat this as an
  247. * error or to handle the additional characters, perhaps by parsing another
  248. * json value starting from that point.
  249. *
  250. * If the caller knows that they are at the end of their input, the length
  251. * passed MUST include the final '\0' character, so values with no inherent
  252. * end (i.e. numbers) can be properly parsed, rather than just returning
  253. * json_tokener_continue.
  254. *
  255. * Extra characters can be detected by comparing the value returned by
  256. * json_tokener_get_parse_end() against
  257. * the length of the last len parameter passed in.
  258. *
  259. * The tokener does \b not maintain an internal buffer so the caller is
  260. * responsible for a subsequent call to json_tokener_parse_ex with an
  261. * appropriate str parameter starting with the extra characters.
  262. *
  263. * This interface is presently not 64-bit clean due to the int len argument
  264. * so the function limits the maximum string size to INT32_MAX (2GB).
  265. * If the function is called with len == -1 then strlen is called to check
  266. * the string length is less than INT32_MAX (2GB)
  267. *
  268. * Example:
  269. * @code
  270. json_object *jobj = NULL;
  271. const char *mystring = NULL;
  272. int stringlen = 0;
  273. enum json_tokener_error jerr;
  274. do {
  275. mystring = ... // get JSON string, e.g. read from file, etc...
  276. stringlen = strlen(mystring);
  277. if (end_of_input)
  278. stringlen++; // Include the '\0' if we know we're at the end of input
  279. jobj = json_tokener_parse_ex(tok, mystring, stringlen);
  280. } while ((jerr = json_tokener_get_error(tok)) == json_tokener_continue);
  281. if (jerr != json_tokener_success)
  282. {
  283. fprintf(stderr, "Error: %s\n", json_tokener_error_desc(jerr));
  284. // Handle errors, as appropriate for your application.
  285. }
  286. if (json_tokener_get_parse_end(tok) < stringlen)
  287. {
  288. // Handle extra characters after parsed object as desired.
  289. // e.g. issue an error, parse another object from that point, etc...
  290. }
  291. // Success, use jobj here.
  292. @endcode
  293. *
  294. * @param tok a json_tokener previously allocated with json_tokener_new()
  295. * @param str an string with any valid JSON expression, or portion of. This does not need to be null terminated.
  296. * @param len the length of str
  297. */
  298. JSON_EXPORT struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str,
  299. int len);
  300. #ifdef __cplusplus
  301. }
  302. #endif
  303. #endif