You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

json_tokener.c 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. /*
  2. * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
  3. *
  4. * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
  5. * Michael Clark <michael@metaparadigm.com>
  6. *
  7. * This library is free software; you can redistribute it and/or modify
  8. * it under the terms of the MIT license. See COPYING for details.
  9. *
  10. *
  11. * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
  12. * The copyrights to the contents of this file are licensed under the MIT License
  13. * (http://www.opensource.org/licenses/mit-license.php)
  14. */
  15. #include "config.h"
  16. #include <stdio.h>
  17. #include <stdlib.h>
  18. #include <stddef.h>
  19. #include <ctype.h>
  20. #include <string.h>
  21. #include "bits.h"
  22. #include "debug.h"
  23. #include "printbuf.h"
  24. #include "arraylist.h"
  25. #include "json_object.h"
  26. #include "json_tokener.h"
  27. #if !HAVE_STRNCASECMP && defined(_MSC_VER)
  28. /* MSC has the version as _strnicmp */
  29. # define strncasecmp _strnicmp
  30. #elif !HAVE_STRNCASECMP
  31. # error You do not have strncasecmp on your system.
  32. #endif /* HAVE_STRNCASECMP */
  33. static const char* json_null_str = "null";
  34. static const char* json_true_str = "true";
  35. static const char* json_false_str = "false";
  36. const char* json_tokener_errors[] = {
  37. "success",
  38. "continue",
  39. "nesting to deep",
  40. "unexpected end of data",
  41. "unexpected character",
  42. "null expected",
  43. "boolean expected",
  44. "number expected",
  45. "array value separator ',' expected",
  46. "quoted object property name expected",
  47. "object property name separator ':' expected",
  48. "object value separator ',' expected",
  49. "invalid string sequence",
  50. "expected comment",
  51. };
  52. struct json_tokener* json_tokener_new(void)
  53. {
  54. struct json_tokener *tok;
  55. tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
  56. if (!tok) return NULL;
  57. tok->pb = printbuf_new();
  58. json_tokener_reset(tok);
  59. return tok;
  60. }
  61. void json_tokener_free(struct json_tokener *tok)
  62. {
  63. json_tokener_reset(tok);
  64. if(tok) printbuf_free(tok->pb);
  65. free(tok);
  66. }
  67. static void json_tokener_reset_level(struct json_tokener *tok, int depth)
  68. {
  69. tok->stack[depth].state = json_tokener_state_eatws;
  70. tok->stack[depth].saved_state = json_tokener_state_start;
  71. json_object_put(tok->stack[depth].current);
  72. tok->stack[depth].current = NULL;
  73. free(tok->stack[depth].obj_field_name);
  74. tok->stack[depth].obj_field_name = NULL;
  75. }
  76. void json_tokener_reset(struct json_tokener *tok)
  77. {
  78. int i;
  79. if (!tok)
  80. return;
  81. for(i = tok->depth; i >= 0; i--)
  82. json_tokener_reset_level(tok, i);
  83. tok->depth = 0;
  84. tok->err = json_tokener_success;
  85. }
  86. struct json_object* json_tokener_parse(char *str)
  87. {
  88. struct json_tokener* tok;
  89. struct json_object* obj;
  90. tok = json_tokener_new();
  91. obj = json_tokener_parse_ex(tok, str, -1);
  92. if(tok->err != json_tokener_success)
  93. obj = (struct json_object*)error_ptr(-tok->err);
  94. json_tokener_free(tok);
  95. return obj;
  96. }
  97. #if !HAVE_STRNDUP
  98. /* CAW: compliant version of strndup() */
  99. char* strndup(const char* str, size_t n)
  100. {
  101. if(str) {
  102. size_t len = strlen(str);
  103. size_t nn = min(len,n);
  104. char* s = (char*)malloc(sizeof(char) * (nn + 1));
  105. if(s) {
  106. memcpy(s, str, nn);
  107. s[nn] = '\0';
  108. }
  109. return s;
  110. }
  111. return NULL;
  112. }
  113. #endif
  114. #define state tok->stack[tok->depth].state
  115. #define saved_state tok->stack[tok->depth].saved_state
  116. #define current tok->stack[tok->depth].current
  117. #define obj_field_name tok->stack[tok->depth].obj_field_name
  118. /* Optimization:
  119. * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
  120. * iterating character-by character. A large performance boost is
  121. * achieved by using tighter loops to locally handle units such as
  122. * comments and strings. Loops that handle an entire token within
  123. * their scope also gather entire strings and pass them to
  124. * printbuf_memappend() in a single call, rather than calling
  125. * printbuf_memappend() one char at a time.
  126. *
  127. * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
  128. * common to both the main loop and the tighter loops.
  129. */
  130. /* POP_CHAR(dest, tok) macro:
  131. * Not really a pop()...peeks at the current char and stores it in dest.
  132. * Returns 1 on success, sets tok->err and returns 0 if no more chars.
  133. * Implicit inputs: str, len vars
  134. */
  135. #define POP_CHAR(dest, tok) \
  136. (((tok)->char_offset == len) ? \
  137. (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
  138. (((tok)->err = json_tokener_success), 0) \
  139. : \
  140. (((tok)->err = json_tokener_continue), 0) \
  141. ) : \
  142. (((dest) = *str), 1) \
  143. )
  144. /* ADVANCE_CHAR() macro:
  145. * Incrementes str & tok->char_offset.
  146. * For convenience of existing conditionals, returns the old value of c (0 on eof)
  147. * Implicit inputs: c var
  148. */
  149. #define ADVANCE_CHAR(str, tok) \
  150. ( ++(str), ((tok)->char_offset)++, c)
  151. /* End optimization macro defs */
  152. struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
  153. char *str, int len)
  154. {
  155. struct json_object *obj = NULL;
  156. char c = '\1';
  157. tok->char_offset = 0;
  158. tok->err = json_tokener_success;
  159. while (POP_CHAR(c, tok)) {
  160. redo_char:
  161. switch(state) {
  162. case json_tokener_state_eatws:
  163. /* Advance until we change state */
  164. while (isspace(c)) {
  165. if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
  166. goto out;
  167. }
  168. if(c == '/') {
  169. printbuf_reset(tok->pb);
  170. printbuf_memappend_fast(tok->pb, &c, 1);
  171. state = json_tokener_state_comment_start;
  172. } else {
  173. state = saved_state;
  174. goto redo_char;
  175. }
  176. break;
  177. case json_tokener_state_start:
  178. switch(c) {
  179. case '{':
  180. state = json_tokener_state_eatws;
  181. saved_state = json_tokener_state_object_field_start;
  182. current = json_object_new_object();
  183. break;
  184. case '[':
  185. state = json_tokener_state_eatws;
  186. saved_state = json_tokener_state_array;
  187. current = json_object_new_array();
  188. break;
  189. case 'N':
  190. case 'n':
  191. state = json_tokener_state_null;
  192. printbuf_reset(tok->pb);
  193. tok->st_pos = 0;
  194. goto redo_char;
  195. case '"':
  196. case '\'':
  197. state = json_tokener_state_string;
  198. printbuf_reset(tok->pb);
  199. tok->quote_char = c;
  200. break;
  201. case 'T':
  202. case 't':
  203. case 'F':
  204. case 'f':
  205. state = json_tokener_state_boolean;
  206. printbuf_reset(tok->pb);
  207. tok->st_pos = 0;
  208. goto redo_char;
  209. #if defined(__GNUC__)
  210. case '0' ... '9':
  211. #else
  212. case '0':
  213. case '1':
  214. case '2':
  215. case '3':
  216. case '4':
  217. case '5':
  218. case '6':
  219. case '7':
  220. case '8':
  221. case '9':
  222. #endif
  223. case '-':
  224. state = json_tokener_state_number;
  225. printbuf_reset(tok->pb);
  226. tok->is_double = 0;
  227. goto redo_char;
  228. default:
  229. tok->err = json_tokener_error_parse_unexpected;
  230. goto out;
  231. }
  232. break;
  233. case json_tokener_state_finish:
  234. if(tok->depth == 0) goto out;
  235. obj = json_object_get(current);
  236. json_tokener_reset_level(tok, tok->depth);
  237. tok->depth--;
  238. goto redo_char;
  239. case json_tokener_state_null:
  240. printbuf_memappend_fast(tok->pb, &c, 1);
  241. if(strncasecmp(json_null_str, tok->pb->buf,
  242. min(tok->st_pos+1, strlen(json_null_str))) == 0) {
  243. if(tok->st_pos == strlen(json_null_str)) {
  244. current = NULL;
  245. saved_state = json_tokener_state_finish;
  246. state = json_tokener_state_eatws;
  247. goto redo_char;
  248. }
  249. } else {
  250. tok->err = json_tokener_error_parse_null;
  251. goto out;
  252. }
  253. tok->st_pos++;
  254. break;
  255. case json_tokener_state_comment_start:
  256. if(c == '*') {
  257. state = json_tokener_state_comment;
  258. } else if(c == '/') {
  259. state = json_tokener_state_comment_eol;
  260. } else {
  261. tok->err = json_tokener_error_parse_comment;
  262. goto out;
  263. }
  264. printbuf_memappend_fast(tok->pb, &c, 1);
  265. break;
  266. case json_tokener_state_comment:
  267. {
  268. /* Advance until we change state */
  269. char *case_start = str;
  270. while(c != '*') {
  271. if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
  272. printbuf_memappend_fast(tok->pb, case_start, str-case_start);
  273. goto out;
  274. }
  275. }
  276. printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
  277. state = json_tokener_state_comment_end;
  278. }
  279. break;
  280. case json_tokener_state_comment_eol:
  281. {
  282. /* Advance until we change state */
  283. char *case_start = str;
  284. while(c != '\n') {
  285. if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
  286. printbuf_memappend_fast(tok->pb, case_start, str-case_start);
  287. goto out;
  288. }
  289. }
  290. printbuf_memappend_fast(tok->pb, case_start, str-case_start);
  291. MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
  292. state = json_tokener_state_eatws;
  293. }
  294. break;
  295. case json_tokener_state_comment_end:
  296. printbuf_memappend_fast(tok->pb, &c, 1);
  297. if(c == '/') {
  298. MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
  299. state = json_tokener_state_eatws;
  300. } else {
  301. state = json_tokener_state_comment;
  302. }
  303. break;
  304. case json_tokener_state_string:
  305. {
  306. /* Advance until we change state */
  307. char *case_start = str;
  308. while(1) {
  309. if(c == tok->quote_char) {
  310. printbuf_memappend_fast(tok->pb, case_start, str-case_start);
  311. current = json_object_new_string(tok->pb->buf);
  312. saved_state = json_tokener_state_finish;
  313. state = json_tokener_state_eatws;
  314. break;
  315. } else if(c == '\\') {
  316. printbuf_memappend_fast(tok->pb, case_start, str-case_start);
  317. saved_state = json_tokener_state_string;
  318. state = json_tokener_state_string_escape;
  319. break;
  320. }
  321. if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
  322. printbuf_memappend_fast(tok->pb, case_start, str-case_start);
  323. goto out;
  324. }
  325. }
  326. }
  327. break;
  328. case json_tokener_state_string_escape:
  329. switch(c) {
  330. case '"':
  331. case '\\':
  332. case '/':
  333. printbuf_memappend_fast(tok->pb, &c, 1);
  334. state = saved_state;
  335. break;
  336. case 'b':
  337. case 'n':
  338. case 'r':
  339. case 't':
  340. if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
  341. else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
  342. else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
  343. else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
  344. state = saved_state;
  345. break;
  346. case 'u':
  347. tok->ucs_char = 0;
  348. tok->st_pos = 0;
  349. state = json_tokener_state_escape_unicode;
  350. break;
  351. default:
  352. tok->err = json_tokener_error_parse_string;
  353. goto out;
  354. }
  355. break;
  356. case json_tokener_state_escape_unicode:
  357. /* Note that the following code is inefficient for handling large
  358. * chunks of extended chars, calling printbuf_memappend() once
  359. * for each multi-byte character of input.
  360. * This is a good area for future optimization.
  361. */
  362. {
  363. /* Advance until we change state */
  364. while(1) {
  365. if(strchr(json_hex_chars, c)) {
  366. tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
  367. if(tok->st_pos == 4) {
  368. unsigned char utf_out[3];
  369. if (tok->ucs_char < 0x80) {
  370. utf_out[0] = tok->ucs_char;
  371. printbuf_memappend_fast(tok->pb, (char*)utf_out, 1);
  372. } else if (tok->ucs_char < 0x800) {
  373. utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
  374. utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
  375. printbuf_memappend_fast(tok->pb, (char*)utf_out, 2);
  376. } else {
  377. utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
  378. utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
  379. utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
  380. printbuf_memappend_fast(tok->pb, (char*)utf_out, 3);
  381. }
  382. state = saved_state;
  383. break;
  384. }
  385. } else {
  386. tok->err = json_tokener_error_parse_string;
  387. goto out;
  388. }
  389. if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok))
  390. goto out;
  391. }
  392. }
  393. break;
  394. case json_tokener_state_boolean:
  395. printbuf_memappend_fast(tok->pb, &c, 1);
  396. if(strncasecmp(json_true_str, tok->pb->buf,
  397. min(tok->st_pos+1, strlen(json_true_str))) == 0) {
  398. if(tok->st_pos == strlen(json_true_str)) {
  399. current = json_object_new_boolean(1);
  400. saved_state = json_tokener_state_finish;
  401. state = json_tokener_state_eatws;
  402. goto redo_char;
  403. }
  404. } else if(strncasecmp(json_false_str, tok->pb->buf,
  405. min(tok->st_pos+1, strlen(json_false_str))) == 0) {
  406. if(tok->st_pos == strlen(json_false_str)) {
  407. current = json_object_new_boolean(0);
  408. saved_state = json_tokener_state_finish;
  409. state = json_tokener_state_eatws;
  410. goto redo_char;
  411. }
  412. } else {
  413. tok->err = json_tokener_error_parse_boolean;
  414. goto out;
  415. }
  416. tok->st_pos++;
  417. break;
  418. case json_tokener_state_number:
  419. {
  420. /* Advance until we change state */
  421. char *case_start = str;
  422. int case_len=0;
  423. while(c && strchr(json_number_chars, c)) {
  424. ++case_len;
  425. if(c == '.' || c == 'e') tok->is_double = 1;
  426. if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
  427. printbuf_memappend_fast(tok->pb, case_start, case_len);
  428. goto out;
  429. }
  430. }
  431. if (case_len>0)
  432. printbuf_memappend_fast(tok->pb, case_start, case_len);
  433. }
  434. {
  435. int numi;
  436. double numd;
  437. if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
  438. current = json_object_new_int(numi);
  439. } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
  440. current = json_object_new_double(numd);
  441. } else {
  442. tok->err = json_tokener_error_parse_number;
  443. goto out;
  444. }
  445. saved_state = json_tokener_state_finish;
  446. state = json_tokener_state_eatws;
  447. goto redo_char;
  448. }
  449. break;
  450. case json_tokener_state_array:
  451. if(c == ']') {
  452. saved_state = json_tokener_state_finish;
  453. state = json_tokener_state_eatws;
  454. } else {
  455. if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
  456. tok->err = json_tokener_error_depth;
  457. goto out;
  458. }
  459. state = json_tokener_state_array_add;
  460. tok->depth++;
  461. json_tokener_reset_level(tok, tok->depth);
  462. goto redo_char;
  463. }
  464. break;
  465. case json_tokener_state_array_add:
  466. json_object_array_add(current, obj);
  467. saved_state = json_tokener_state_array_sep;
  468. state = json_tokener_state_eatws;
  469. goto redo_char;
  470. case json_tokener_state_array_sep:
  471. if(c == ']') {
  472. saved_state = json_tokener_state_finish;
  473. state = json_tokener_state_eatws;
  474. } else if(c == ',') {
  475. saved_state = json_tokener_state_array;
  476. state = json_tokener_state_eatws;
  477. } else {
  478. tok->err = json_tokener_error_parse_array;
  479. goto out;
  480. }
  481. break;
  482. case json_tokener_state_object_field_start:
  483. if(c == '}') {
  484. saved_state = json_tokener_state_finish;
  485. state = json_tokener_state_eatws;
  486. } else if (c == '"' || c == '\'') {
  487. tok->quote_char = c;
  488. printbuf_reset(tok->pb);
  489. state = json_tokener_state_object_field;
  490. } else {
  491. tok->err = json_tokener_error_parse_object_key_name;
  492. goto out;
  493. }
  494. break;
  495. case json_tokener_state_object_field:
  496. {
  497. /* Advance until we change state */
  498. char *case_start = str;
  499. while(1) {
  500. if(c == tok->quote_char) {
  501. printbuf_memappend_fast(tok->pb, case_start, str-case_start);
  502. obj_field_name = strdup(tok->pb->buf);
  503. saved_state = json_tokener_state_object_field_end;
  504. state = json_tokener_state_eatws;
  505. break;
  506. } else if(c == '\\') {
  507. printbuf_memappend_fast(tok->pb, case_start, str-case_start);
  508. saved_state = json_tokener_state_object_field;
  509. state = json_tokener_state_string_escape;
  510. break;
  511. }
  512. if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
  513. printbuf_memappend_fast(tok->pb, case_start, str-case_start);
  514. goto out;
  515. }
  516. }
  517. }
  518. break;
  519. case json_tokener_state_object_field_end:
  520. if(c == ':') {
  521. saved_state = json_tokener_state_object_value;
  522. state = json_tokener_state_eatws;
  523. } else {
  524. tok->err = json_tokener_error_parse_object_key_sep;
  525. goto out;
  526. }
  527. break;
  528. case json_tokener_state_object_value:
  529. if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
  530. tok->err = json_tokener_error_depth;
  531. goto out;
  532. }
  533. state = json_tokener_state_object_value_add;
  534. tok->depth++;
  535. json_tokener_reset_level(tok, tok->depth);
  536. goto redo_char;
  537. case json_tokener_state_object_value_add:
  538. json_object_object_add(current, obj_field_name, obj);
  539. free(obj_field_name);
  540. obj_field_name = NULL;
  541. saved_state = json_tokener_state_object_sep;
  542. state = json_tokener_state_eatws;
  543. goto redo_char;
  544. case json_tokener_state_object_sep:
  545. if(c == '}') {
  546. saved_state = json_tokener_state_finish;
  547. state = json_tokener_state_eatws;
  548. } else if(c == ',') {
  549. saved_state = json_tokener_state_object_field_start;
  550. state = json_tokener_state_eatws;
  551. } else {
  552. tok->err = json_tokener_error_parse_object_value_sep;
  553. goto out;
  554. }
  555. break;
  556. }
  557. if (!ADVANCE_CHAR(str, tok))
  558. goto out;
  559. } /* while(POP_CHAR) */
  560. out:
  561. if (!c) { /* We hit an eof char (0) */
  562. if(state != json_tokener_state_finish &&
  563. saved_state != json_tokener_state_finish)
  564. tok->err = json_tokener_error_parse_eof;
  565. }
  566. if(tok->err == json_tokener_success) return json_object_get(current);
  567. MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
  568. json_tokener_errors[tok->err], tok->char_offset);
  569. return NULL;
  570. }