You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

json_tokener.c 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /*
  2. * $Id: json_tokener.c,v 1.10 2004/07/27 00:42:31 mclark Exp $
  3. *
  4. * Copyright Metaparadigm Pte. Ltd. 2004.
  5. * Michael Clark <michael@metaparadigm.com>
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public (LGPL)
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details: http://www.gnu.org/
  16. *
  17. */
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <ctype.h>
  21. #include <string.h>
  22. #include "bits.h"
  23. #include "debug.h"
  24. #include "printbuf.h"
  25. #include "arraylist.h"
  26. #include "json_object.h"
  27. #include "json_tokener.h"
  28. static struct json_object* json_tokener_do_parse(struct json_tokener *this);
  29. struct json_object* json_tokener_parse(char * s)
  30. {
  31. struct json_tokener tok;
  32. struct json_object* obj;
  33. tok.source = s;
  34. tok.pos = 0;
  35. tok.pb = printbuf_new();
  36. obj = json_tokener_do_parse(&tok);
  37. printbuf_free(tok.pb);
  38. return obj;
  39. }
  40. static struct json_object* json_tokener_do_parse(struct json_tokener *this)
  41. {
  42. enum json_tokener_state state, saved_state;
  43. enum json_tokener_error err = json_tokener_success;
  44. struct json_object *current = NULL, *obj;
  45. char *obj_field_name = NULL;
  46. char quote_char;
  47. int deemed_double, start_offset;
  48. state = json_tokener_state_eatws;
  49. saved_state = json_tokener_state_start;
  50. char c;
  51. do {
  52. c = this->source[this->pos];
  53. switch(state) {
  54. case json_tokener_state_eatws:
  55. if(isspace(c)) {
  56. this->pos++;
  57. } else if(c == '/') {
  58. state = json_tokener_state_comment_start;
  59. start_offset = this->pos++;
  60. } else {
  61. state = saved_state;
  62. }
  63. break;
  64. case json_tokener_state_start:
  65. switch(c) {
  66. case '{':
  67. state = json_tokener_state_eatws;
  68. saved_state = json_tokener_state_object;
  69. current = json_object_new_object();
  70. this->pos++;
  71. break;
  72. case '[':
  73. state = json_tokener_state_eatws;
  74. saved_state = json_tokener_state_array;
  75. current = json_object_new_array();
  76. this->pos++;
  77. break;
  78. case 'N':
  79. case 'n':
  80. state = json_tokener_state_null;
  81. start_offset = this->pos++;
  82. break;
  83. case '"':
  84. case '\'':
  85. quote_char = c;
  86. printbuf_reset(this->pb);
  87. state = json_tokener_state_string;
  88. start_offset = ++this->pos;
  89. break;
  90. case 'T':
  91. case 't':
  92. case 'F':
  93. case 'f':
  94. state = json_tokener_state_boolean;
  95. start_offset = this->pos++;
  96. break;
  97. case '0' ... '9':
  98. case '-':
  99. deemed_double = 0;
  100. state = json_tokener_state_number;
  101. start_offset = this->pos++;
  102. break;
  103. default:
  104. err = json_tokener_error_parse_unexpected;
  105. goto out;
  106. }
  107. break;
  108. case json_tokener_state_finish:
  109. goto out;
  110. case json_tokener_state_null:
  111. if(strncasecmp("null", this->source + start_offset,
  112. this->pos - start_offset))
  113. return error_ptr(-json_tokener_error_parse_null);
  114. if(this->pos - start_offset == 4) {
  115. current = NULL;
  116. saved_state = json_tokener_state_finish;
  117. state = json_tokener_state_eatws;
  118. } else {
  119. this->pos++;
  120. }
  121. break;
  122. case json_tokener_state_comment_start:
  123. if(c == '*') {
  124. state = json_tokener_state_comment;
  125. } else if(c == '/') {
  126. state = json_tokener_state_comment_eol;
  127. } else {
  128. err = json_tokener_error_parse_comment;
  129. goto out;
  130. }
  131. this->pos++;
  132. break;
  133. case json_tokener_state_comment:
  134. if(c == '*') state = json_tokener_state_comment_end;
  135. this->pos++;
  136. break;
  137. case json_tokener_state_comment_eol:
  138. if(c == '\n') {
  139. if(mc_get_debug()) {
  140. char *tmp = strndup(this->source + start_offset,
  141. this->pos - start_offset);
  142. mc_debug("json_tokener_comment: %s\n", tmp);
  143. free(tmp);
  144. }
  145. state = json_tokener_state_eatws;
  146. }
  147. this->pos++;
  148. break;
  149. case json_tokener_state_comment_end:
  150. if(c == '/') {
  151. if(mc_get_debug()) {
  152. char *tmp = strndup(this->source + start_offset,
  153. this->pos - start_offset + 1);
  154. mc_debug("json_tokener_comment: %s\n", tmp);
  155. free(tmp);
  156. }
  157. state = json_tokener_state_eatws;
  158. } else {
  159. state = json_tokener_state_comment;
  160. }
  161. this->pos++;
  162. break;
  163. case json_tokener_state_string:
  164. if(c == quote_char) {
  165. printbuf_memappend(this->pb, this->source + start_offset,
  166. this->pos - start_offset);
  167. current = json_object_new_string(this->pb->buf);
  168. saved_state = json_tokener_state_finish;
  169. state = json_tokener_state_eatws;
  170. } else if(c == '\\') {
  171. saved_state = json_tokener_state_string;
  172. state = json_tokener_state_string_escape;
  173. }
  174. this->pos++;
  175. break;
  176. case json_tokener_state_string_escape:
  177. switch(c) {
  178. case '"':
  179. case '\\':
  180. printbuf_memappend(this->pb, this->source + start_offset,
  181. this->pos - start_offset - 1);
  182. start_offset = this->pos++;
  183. state = saved_state;
  184. break;
  185. case 'b':
  186. case 'n':
  187. case 'r':
  188. case 't':
  189. printbuf_memappend(this->pb, this->source + start_offset,
  190. this->pos - start_offset - 1);
  191. if(c == 'b') printbuf_memappend(this->pb, "\b", 1);
  192. else if(c == 'n') printbuf_memappend(this->pb, "\n", 1);
  193. else if(c == 'r') printbuf_memappend(this->pb, "\r", 1);
  194. else if(c == 't') printbuf_memappend(this->pb, "\t", 1);
  195. start_offset = ++this->pos;
  196. state = saved_state;
  197. break;
  198. case 'u':
  199. printbuf_memappend(this->pb, this->source + start_offset,
  200. this->pos - start_offset - 1);
  201. start_offset = ++this->pos;
  202. state = json_tokener_state_escape_unicode;
  203. break;
  204. default:
  205. err = json_tokener_error_parse_string;
  206. goto out;
  207. }
  208. break;
  209. case json_tokener_state_escape_unicode:
  210. if(strchr(json_hex_chars, c)) {
  211. this->pos++;
  212. if(this->pos - start_offset == 4) {
  213. unsigned char utf_out[3];
  214. unsigned int ucs_char =
  215. (hexdigit(*(this->source + start_offset)) << 12) +
  216. (hexdigit(*(this->source + start_offset + 1)) << 8) +
  217. (hexdigit(*(this->source + start_offset + 2)) << 4) +
  218. hexdigit(*(this->source + start_offset + 3));
  219. if (ucs_char < 0x80) {
  220. utf_out[0] = ucs_char;
  221. printbuf_memappend(this->pb, utf_out, 1);
  222. } else if (ucs_char < 0x800) {
  223. utf_out[0] = 0xc0 | (ucs_char >> 6);
  224. utf_out[1] = 0x80 | (ucs_char & 0x3f);
  225. printbuf_memappend(this->pb, utf_out, 2);
  226. } else {
  227. utf_out[0] = 0xe0 | (ucs_char >> 12);
  228. utf_out[1] = 0x80 | ((ucs_char >> 6) & 0x3f);
  229. utf_out[2] = 0x80 | (ucs_char & 0x3f);
  230. printbuf_memappend(this->pb, utf_out, 3);
  231. }
  232. start_offset = this->pos;
  233. state = saved_state;
  234. }
  235. } else {
  236. err = json_tokener_error_parse_string;
  237. goto out;
  238. }
  239. break;
  240. case json_tokener_state_boolean:
  241. if(strncasecmp("true", this->source + start_offset,
  242. this->pos - start_offset) == 0) {
  243. if(this->pos - start_offset == 4) {
  244. current = json_object_new_boolean(1);
  245. saved_state = json_tokener_state_finish;
  246. state = json_tokener_state_eatws;
  247. } else {
  248. this->pos++;
  249. }
  250. } else if(strncasecmp("false", this->source + start_offset,
  251. this->pos - start_offset) == 0) {
  252. if(this->pos - start_offset == 5) {
  253. current = json_object_new_boolean(0);
  254. saved_state = json_tokener_state_finish;
  255. state = json_tokener_state_eatws;
  256. } else {
  257. this->pos++;
  258. }
  259. } else {
  260. err = json_tokener_error_parse_boolean;
  261. goto out;
  262. }
  263. break;
  264. case json_tokener_state_number:
  265. if(!c || !strchr(json_number_chars, c)) {
  266. int numi;
  267. double numd;
  268. char *tmp = strndup(this->source + start_offset,
  269. this->pos - start_offset);
  270. if(!deemed_double && sscanf(tmp, "%d", &numi) == 1) {
  271. current = json_object_new_int(numi);
  272. } else if(deemed_double && sscanf(tmp, "%lf", &numd) == 1) {
  273. current = json_object_new_double(numd);
  274. } else {
  275. free(tmp);
  276. err = json_tokener_error_parse_number;
  277. goto out;
  278. }
  279. free(tmp);
  280. saved_state = json_tokener_state_finish;
  281. state = json_tokener_state_eatws;
  282. } else {
  283. if(c == '.' || c == 'e') deemed_double = 1;
  284. this->pos++;
  285. }
  286. break;
  287. case json_tokener_state_array:
  288. if(c == ']') {
  289. this->pos++;
  290. saved_state = json_tokener_state_finish;
  291. state = json_tokener_state_eatws;
  292. } else {
  293. obj = json_tokener_do_parse(this);
  294. if(is_error(obj)) {
  295. err = (enum json_tokener_error)obj;
  296. goto out;
  297. }
  298. json_object_array_add(current, obj);
  299. saved_state = json_tokener_state_array_sep;
  300. state = json_tokener_state_eatws;
  301. }
  302. break;
  303. case json_tokener_state_array_sep:
  304. if(c == ']') {
  305. this->pos++;
  306. saved_state = json_tokener_state_finish;
  307. state = json_tokener_state_eatws;
  308. } else if(c == ',') {
  309. this->pos++;
  310. saved_state = json_tokener_state_array;
  311. state = json_tokener_state_eatws;
  312. } else {
  313. json_object_put(current);
  314. return error_ptr(-json_tokener_error_parse_array);
  315. }
  316. break;
  317. case json_tokener_state_object:
  318. state = json_tokener_state_object_field_start;
  319. start_offset = this->pos;
  320. break;
  321. case json_tokener_state_object_field_start:
  322. if(c == '}') {
  323. this->pos++;
  324. saved_state = json_tokener_state_finish;
  325. state = json_tokener_state_eatws;
  326. } else if (c == '"' || c == '\'') {
  327. quote_char = c;
  328. printbuf_reset(this->pb);
  329. state = json_tokener_state_object_field;
  330. start_offset = ++this->pos;
  331. }
  332. break;
  333. case json_tokener_state_object_field:
  334. if(c == quote_char) {
  335. printbuf_memappend(this->pb, this->source + start_offset,
  336. this->pos - start_offset);
  337. obj_field_name = strdup(this->pb->buf);
  338. saved_state = json_tokener_state_object_field_end;
  339. state = json_tokener_state_eatws;
  340. } else if(c == '\\') {
  341. saved_state = json_tokener_state_object_field;
  342. state = json_tokener_state_string_escape;
  343. }
  344. this->pos++;
  345. break;
  346. case json_tokener_state_object_field_end:
  347. if(c == ':') {
  348. this->pos++;
  349. saved_state = json_tokener_state_object_value;
  350. state = json_tokener_state_eatws;
  351. } else {
  352. return error_ptr(-json_tokener_error_parse_object);
  353. }
  354. break;
  355. case json_tokener_state_object_value:
  356. obj = json_tokener_do_parse(this);
  357. if(is_error(obj)) {
  358. err = (enum json_tokener_error)obj;
  359. goto out;
  360. }
  361. json_object_object_add(current, obj_field_name, obj);
  362. free(obj_field_name);
  363. obj_field_name = NULL;
  364. saved_state = json_tokener_state_object_sep;
  365. state = json_tokener_state_eatws;
  366. break;
  367. case json_tokener_state_object_sep:
  368. if(c == '}') {
  369. this->pos++;
  370. saved_state = json_tokener_state_finish;
  371. state = json_tokener_state_eatws;
  372. } else if(c == ',') {
  373. this->pos++;
  374. saved_state = json_tokener_state_object;
  375. state = json_tokener_state_eatws;
  376. } else {
  377. err = json_tokener_error_parse_object;
  378. goto out;
  379. }
  380. break;
  381. }
  382. } while(c);
  383. if(state != json_tokener_state_finish &&
  384. saved_state != json_tokener_state_finish)
  385. err = json_tokener_error_parse_eof;
  386. out:
  387. free(obj_field_name);
  388. if(err == json_tokener_success) return current;
  389. mc_debug("json_tokener_do_parse: error=%d state=%d char=%c\n",
  390. err, state, c);
  391. json_object_put(current);
  392. return error_ptr(-err);
  393. }

No Description

Contributors (1)