|
|
@@ -7,6 +7,10 @@ |
|
|
|
* This library is free software; you can redistribute it and/or modify |
|
|
|
* it under the terms of the MIT license. See COPYING for details. |
|
|
|
* |
|
|
|
* |
|
|
|
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved. |
|
|
|
* The copyrights to the contents of this file are licensed under the MIT License |
|
|
|
* (http://www.opensource.org/licenses/mit-license.php) |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "config.h" |
|
|
@@ -135,35 +139,68 @@ char* strndup(const char* str, size_t n) |
|
|
|
#define current tok->stack[tok->depth].current |
|
|
|
#define obj_field_name tok->stack[tok->depth].obj_field_name |
|
|
|
|
|
|
|
/* Optimization: |
|
|
|
* json_tokener_parse_ex() consumed a lot of CPU in its main loop, |
|
|
|
* iterating character-by character. A large performance boost is |
|
|
|
* achieved by using tighter loops to locally handle units such as |
|
|
|
* comments and strings. Loops that handle an entire token within |
|
|
|
* their scope also gather entire strings and pass them to |
|
|
|
* printbuf_memappend() in a single call, rather than calling |
|
|
|
* printbuf_memappend() one char at a time. |
|
|
|
* |
|
|
|
* POP_CHAR() and ADVANCE_CHAR() macros are used for code that is |
|
|
|
* common to both the main loop and the tighter loops. |
|
|
|
*/ |
|
|
|
|
|
|
|
/* POP_CHAR(dest, tok) macro: |
|
|
|
* Not really a pop()...peeks at the current char and stores it in dest. |
|
|
|
* Returns 1 on success, sets tok->err and returns 0 if no more chars. |
|
|
|
* Implicit inputs: str, len vars |
|
|
|
*/ |
|
|
|
#define POP_CHAR(dest, tok) \ |
|
|
|
(((tok)->char_offset == len) ? \ |
|
|
|
(((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \ |
|
|
|
(((tok)->err = json_tokener_success), 0) \ |
|
|
|
: \ |
|
|
|
(((tok)->err = json_tokener_continue), 0) \ |
|
|
|
) : \ |
|
|
|
(((dest) = *str), 1) \ |
|
|
|
) |
|
|
|
|
|
|
|
/* ADVANCE_CHAR() macro: |
|
|
|
* Incrementes str & tok->char_offset. |
|
|
|
* For convenience of existing conditionals, returns the old value of c (0 on eof) |
|
|
|
* Implicit inputs: c var |
|
|
|
*/ |
|
|
|
#define ADVANCE_CHAR(str, tok) \ |
|
|
|
( ++(str), ((tok)->char_offset)++, c) |
|
|
|
|
|
|
|
/* End optimization macro defs */ |
|
|
|
|
|
|
|
|
|
|
|
struct json_object* json_tokener_parse_ex(struct json_tokener *tok, |
|
|
|
char *str, int len) |
|
|
|
{ |
|
|
|
struct json_object *obj = NULL; |
|
|
|
char c; |
|
|
|
char c = '\1'; |
|
|
|
|
|
|
|
tok->char_offset = 0; |
|
|
|
tok->err = json_tokener_success; |
|
|
|
|
|
|
|
do { |
|
|
|
if(tok->char_offset == len) { |
|
|
|
if(tok->depth == 0 && state == json_tokener_state_eatws && |
|
|
|
saved_state == json_tokener_state_finish) |
|
|
|
tok->err = json_tokener_success; |
|
|
|
else |
|
|
|
tok->err = json_tokener_continue; |
|
|
|
goto out; |
|
|
|
} |
|
|
|
while (POP_CHAR(c, tok)) { |
|
|
|
|
|
|
|
c = *str; |
|
|
|
redo_char: |
|
|
|
switch(state) { |
|
|
|
|
|
|
|
case json_tokener_state_eatws: |
|
|
|
if(isspace(c)) { |
|
|
|
/* okay */ |
|
|
|
} else if(c == '/') { |
|
|
|
/* Advance until we change state */ |
|
|
|
while (isspace(c)) { |
|
|
|
if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok))) |
|
|
|
goto out; |
|
|
|
} |
|
|
|
if(c == '/') { |
|
|
|
printbuf_reset(tok->pb); |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
printbuf_memappend_fast(tok->pb, &c, 1); |
|
|
|
state = json_tokener_state_comment_start; |
|
|
|
} else { |
|
|
|
state = saved_state; |
|
|
@@ -236,7 +273,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, |
|
|
|
goto redo_char; |
|
|
|
|
|
|
|
case json_tokener_state_null: |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
printbuf_memappend_fast(tok->pb, &c, 1); |
|
|
|
if(strncasecmp(json_null_str, tok->pb->buf, |
|
|
|
min(tok->st_pos+1, strlen(json_null_str))) == 0) { |
|
|
|
if(tok->st_pos == strlen(json_null_str)) { |
|
|
@@ -261,25 +298,42 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, |
|
|
|
tok->err = json_tokener_error_parse_comment; |
|
|
|
goto out; |
|
|
|
} |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
printbuf_memappend_fast(tok->pb, &c, 1); |
|
|
|
break; |
|
|
|
|
|
|
|
case json_tokener_state_comment: |
|
|
|
if(c == '*') state = json_tokener_state_comment_end; |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
break; |
|
|
|
{ |
|
|
|
/* Advance until we change state */ |
|
|
|
char *case_start = str; |
|
|
|
while(c != '*') { |
|
|
|
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) { |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, str-case_start); |
|
|
|
goto out; |
|
|
|
} |
|
|
|
} |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start); |
|
|
|
state = json_tokener_state_comment_end; |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case json_tokener_state_comment_eol: |
|
|
|
if(c == '\n') { |
|
|
|
{ |
|
|
|
/* Advance until we change state */ |
|
|
|
char *case_start = str; |
|
|
|
while(c != '\n') { |
|
|
|
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) { |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, str-case_start); |
|
|
|
goto out; |
|
|
|
} |
|
|
|
} |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, str-case_start); |
|
|
|
MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
|
|
|
state = json_tokener_state_eatws; |
|
|
|
} else { |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case json_tokener_state_comment_end: |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
printbuf_memappend_fast(tok->pb, &c, 1); |
|
|
|
if(c == '/') { |
|
|
|
MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); |
|
|
|
state = json_tokener_state_eatws; |
|
|
@@ -289,15 +343,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, |
|
|
|
break; |
|
|
|
|
|
|
|
case json_tokener_state_string: |
|
|
|
if(c == tok->quote_char) { |
|
|
|
current = json_object_new_string(tok->pb->buf); |
|
|
|
saved_state = json_tokener_state_finish; |
|
|
|
state = json_tokener_state_eatws; |
|
|
|
} else if(c == '\\') { |
|
|
|
saved_state = json_tokener_state_string; |
|
|
|
state = json_tokener_state_string_escape; |
|
|
|
} else { |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
{ |
|
|
|
/* Advance until we change state */ |
|
|
|
char *case_start = str; |
|
|
|
while(1) { |
|
|
|
if(c == tok->quote_char) { |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, str-case_start); |
|
|
|
current = json_object_new_string(tok->pb->buf); |
|
|
|
saved_state = json_tokener_state_finish; |
|
|
|
state = json_tokener_state_eatws; |
|
|
|
break; |
|
|
|
} else if(c == '\\') { |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, str-case_start); |
|
|
|
saved_state = json_tokener_state_string; |
|
|
|
state = json_tokener_state_string_escape; |
|
|
|
break; |
|
|
|
} |
|
|
|
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) { |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, str-case_start); |
|
|
|
goto out; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
@@ -306,17 +372,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, |
|
|
|
case '"': |
|
|
|
case '\\': |
|
|
|
case '/': |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
printbuf_memappend_fast(tok->pb, &c, 1); |
|
|
|
state = saved_state; |
|
|
|
break; |
|
|
|
case 'b': |
|
|
|
case 'n': |
|
|
|
case 'r': |
|
|
|
case 't': |
|
|
|
if(c == 'b') printbuf_memappend(tok->pb, "\b", 1); |
|
|
|
else if(c == 'n') printbuf_memappend(tok->pb, "\n", 1); |
|
|
|
else if(c == 'r') printbuf_memappend(tok->pb, "\r", 1); |
|
|
|
else if(c == 't') printbuf_memappend(tok->pb, "\t", 1); |
|
|
|
if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1); |
|
|
|
else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1); |
|
|
|
else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1); |
|
|
|
else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1); |
|
|
|
state = saved_state; |
|
|
|
break; |
|
|
|
case 'u': |
|
|
@@ -331,33 +397,46 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, |
|
|
|
break; |
|
|
|
|
|
|
|
case json_tokener_state_escape_unicode: |
|
|
|
if(strchr(json_hex_chars, c)) { |
|
|
|
tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4)); |
|
|
|
if(tok->st_pos == 4) { |
|
|
|
unsigned char utf_out[3]; |
|
|
|
if (tok->ucs_char < 0x80) { |
|
|
|
utf_out[0] = tok->ucs_char; |
|
|
|
printbuf_memappend(tok->pb, (char*)utf_out, 1); |
|
|
|
} else if (tok->ucs_char < 0x800) { |
|
|
|
utf_out[0] = 0xc0 | (tok->ucs_char >> 6); |
|
|
|
utf_out[1] = 0x80 | (tok->ucs_char & 0x3f); |
|
|
|
printbuf_memappend(tok->pb, (char*)utf_out, 2); |
|
|
|
} else { |
|
|
|
utf_out[0] = 0xe0 | (tok->ucs_char >> 12); |
|
|
|
utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
|
|
|
utf_out[2] = 0x80 | (tok->ucs_char & 0x3f); |
|
|
|
printbuf_memappend(tok->pb, (char*)utf_out, 3); |
|
|
|
} |
|
|
|
state = saved_state; |
|
|
|
/* Note that the following code is inefficient for handling large |
|
|
|
* chunks of extended chars, calling printbuf_memappend() once |
|
|
|
* for each multi-byte character of input. |
|
|
|
* This is a good area for future optimization. |
|
|
|
*/ |
|
|
|
{ |
|
|
|
/* Advance until we change state */ |
|
|
|
while(1) { |
|
|
|
if(strchr(json_hex_chars, c)) { |
|
|
|
tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4)); |
|
|
|
if(tok->st_pos == 4) { |
|
|
|
unsigned char utf_out[3]; |
|
|
|
if (tok->ucs_char < 0x80) { |
|
|
|
utf_out[0] = tok->ucs_char; |
|
|
|
printbuf_memappend_fast(tok->pb, (char*)utf_out, 1); |
|
|
|
} else if (tok->ucs_char < 0x800) { |
|
|
|
utf_out[0] = 0xc0 | (tok->ucs_char >> 6); |
|
|
|
utf_out[1] = 0x80 | (tok->ucs_char & 0x3f); |
|
|
|
printbuf_memappend_fast(tok->pb, (char*)utf_out, 2); |
|
|
|
} else { |
|
|
|
utf_out[0] = 0xe0 | (tok->ucs_char >> 12); |
|
|
|
utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); |
|
|
|
utf_out[2] = 0x80 | (tok->ucs_char & 0x3f); |
|
|
|
printbuf_memappend_fast(tok->pb, (char*)utf_out, 3); |
|
|
|
} |
|
|
|
state = saved_state; |
|
|
|
break; |
|
|
|
} |
|
|
|
} else { |
|
|
|
tok->err = json_tokener_error_parse_string; |
|
|
|
goto out; |
|
|
|
} |
|
|
|
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) |
|
|
|
goto out; |
|
|
|
} |
|
|
|
} else { |
|
|
|
tok->err = json_tokener_error_parse_string; |
|
|
|
goto out; |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case json_tokener_state_boolean: |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
printbuf_memappend_fast(tok->pb, &c, 1); |
|
|
|
if(strncasecmp(json_true_str, tok->pb->buf, |
|
|
|
min(tok->st_pos+1, strlen(json_true_str))) == 0) { |
|
|
|
if(tok->st_pos == strlen(json_true_str)) { |
|
|
@@ -382,23 +461,35 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, |
|
|
|
break; |
|
|
|
|
|
|
|
case json_tokener_state_number: |
|
|
|
if(c && strchr(json_number_chars, c)) { |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
if(c == '.' || c == 'e' || c == 'E') tok->is_double = 1; |
|
|
|
} else { |
|
|
|
int numi; |
|
|
|
double numd; |
|
|
|
if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) { |
|
|
|
current = json_object_new_int(numi); |
|
|
|
} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) { |
|
|
|
current = json_object_new_double(numd); |
|
|
|
} else { |
|
|
|
tok->err = json_tokener_error_parse_number; |
|
|
|
goto out; |
|
|
|
{ |
|
|
|
/* Advance until we change state */ |
|
|
|
char *case_start = str; |
|
|
|
int case_len=0; |
|
|
|
while(c && strchr(json_number_chars, c)) { |
|
|
|
++case_len; |
|
|
|
if(c == '.' || c == 'e') tok->is_double = 1; |
|
|
|
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) { |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, case_len); |
|
|
|
goto out; |
|
|
|
} |
|
|
|
} |
|
|
|
saved_state = json_tokener_state_finish; |
|
|
|
state = json_tokener_state_eatws; |
|
|
|
goto redo_char; |
|
|
|
if (case_len>0) |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, case_len); |
|
|
|
} |
|
|
|
{ |
|
|
|
int numi; |
|
|
|
double numd; |
|
|
|
if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) { |
|
|
|
current = json_object_new_int(numi); |
|
|
|
} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) { |
|
|
|
current = json_object_new_double(numd); |
|
|
|
} else { |
|
|
|
tok->err = json_tokener_error_parse_number; |
|
|
|
goto out; |
|
|
|
} |
|
|
|
saved_state = json_tokener_state_finish; |
|
|
|
state = json_tokener_state_eatws; |
|
|
|
goto redo_char; |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
@@ -452,15 +543,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, |
|
|
|
break; |
|
|
|
|
|
|
|
case json_tokener_state_object_field: |
|
|
|
if(c == tok->quote_char) { |
|
|
|
obj_field_name = strdup(tok->pb->buf); |
|
|
|
saved_state = json_tokener_state_object_field_end; |
|
|
|
state = json_tokener_state_eatws; |
|
|
|
} else if(c == '\\') { |
|
|
|
saved_state = json_tokener_state_object_field; |
|
|
|
state = json_tokener_state_string_escape; |
|
|
|
} else { |
|
|
|
printbuf_memappend(tok->pb, &c, 1); |
|
|
|
{ |
|
|
|
/* Advance until we change state */ |
|
|
|
char *case_start = str; |
|
|
|
while(1) { |
|
|
|
if(c == tok->quote_char) { |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, str-case_start); |
|
|
|
obj_field_name = strdup(tok->pb->buf); |
|
|
|
saved_state = json_tokener_state_object_field_end; |
|
|
|
state = json_tokener_state_eatws; |
|
|
|
break; |
|
|
|
} else if(c == '\\') { |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, str-case_start); |
|
|
|
saved_state = json_tokener_state_object_field; |
|
|
|
state = json_tokener_state_string_escape; |
|
|
|
break; |
|
|
|
} |
|
|
|
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) { |
|
|
|
printbuf_memappend_fast(tok->pb, case_start, str-case_start); |
|
|
|
goto out; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
@@ -506,15 +609,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, |
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
str++; |
|
|
|
tok->char_offset++; |
|
|
|
} while(c); |
|
|
|
|
|
|
|
if(state != json_tokener_state_finish && |
|
|
|
saved_state != json_tokener_state_finish) |
|
|
|
tok->err = json_tokener_error_parse_eof; |
|
|
|
if (!ADVANCE_CHAR(str, tok)) |
|
|
|
goto out; |
|
|
|
} /* while(POP_CHAR) */ |
|
|
|
|
|
|
|
out: |
|
|
|
if (!c) { /* We hit an eof char (0) */ |
|
|
|
if(state != json_tokener_state_finish && |
|
|
|
saved_state != json_tokener_state_finish) |
|
|
|
tok->err = json_tokener_error_parse_eof; |
|
|
|
} |
|
|
|
|
|
|
|
if(tok->err == json_tokener_success) return json_object_get(current); |
|
|
|
MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", |
|
|
|
json_tokener_errors[tok->err], tok->char_offset); |
|
|
|