This causes previously failing strings like "123-456" to return a valid json_object with the appropriate value. If you care about the trailing content, call json_tokener_parse_ex() and check the parse end point with json_tokener_get_parse_end().tags/json-c-0.15-20200726
| @@ -30,6 +30,9 @@ Other changes | |||||
| * Fix incremental parsing of numbers, especially those with exponents, e.g. | * Fix incremental parsing of numbers, especially those with exponents, e.g. | ||||
| so parsing "[0", "e+", "-]" now properly returns an error. | so parsing "[0", "e+", "-]" now properly returns an error. | ||||
| Strict mode now rejects missing exponents ("0e"). | Strict mode now rejects missing exponents ("0e"). | ||||
| * Successfully return number objects at the top level even when they are | |||||
| followed by a "-", "." or "e". This makes parsing things like "123-45" | |||||
| behave consistently with things like "123xyz". | |||||
| *** | *** | ||||
| @@ -54,7 +54,6 @@ | |||||
| // Don't define this. It's not thread-safe. | // Don't define this. It's not thread-safe. | ||||
| /* #define REFCOUNT_DEBUG 1 */ | /* #define REFCOUNT_DEBUG 1 */ | ||||
| const char *json_number_chars = "0123456789.+-eE"; | |||||
| const char *json_hex_chars = "0123456789abcdefABCDEF"; | const char *json_hex_chars = "0123456789abcdefABCDEF"; | ||||
| static void json_object_generic_delete(struct json_object *jso); | static void json_object_generic_delete(struct json_object *jso); | ||||
| @@ -98,7 +98,6 @@ struct json_object_string | |||||
| void _json_c_set_last_err(const char *err_fmt, ...); | void _json_c_set_last_err(const char *err_fmt, ...); | ||||
| extern const char *json_number_chars; | |||||
| extern const char *json_hex_chars; | extern const char *json_hex_chars; | ||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| @@ -193,7 +193,17 @@ struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokene | |||||
| return NULL; | return NULL; | ||||
| obj = json_tokener_parse_ex(tok, str, -1); | obj = json_tokener_parse_ex(tok, str, -1); | ||||
| *error = tok->err; | *error = tok->err; | ||||
| if (tok->err != json_tokener_success) | |||||
| if (tok->err != json_tokener_success | |||||
| #if 0 | |||||
| /* This would be a more sensible default, and cause parsing | |||||
| * things like "null123" to fail when the caller can't know | |||||
| * where the parsing left off, but starting to fail would | |||||
| * be a notable behaviour change. Save for a 1.0 release. | |||||
| */ | |||||
| || json_tokener_get_parse_end(tok) != strlen(str) | |||||
| #endif | |||||
| ) | |||||
| { | { | ||||
| if (obj != NULL) | if (obj != NULL) | ||||
| json_object_put(obj); | json_object_put(obj); | ||||
| @@ -838,7 +848,8 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * | |||||
| const char *case_start = str; | const char *case_start = str; | ||||
| int case_len = 0; | int case_len = 0; | ||||
| int is_exponent = 0; | int is_exponent = 0; | ||||
| int negativesign_next_possible_location = 1; | |||||
| int neg_sign_ok = 1; | |||||
| int pos_sign_ok = 0; | |||||
| if (printbuf_length(tok->pb) > 0) | if (printbuf_length(tok->pb) > 0) | ||||
| { | { | ||||
| /* We don't save all state from the previous incremental parse | /* We don't save all state from the previous incremental parse | ||||
| @@ -852,14 +863,26 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * | |||||
| char *last_saved_char = | char *last_saved_char = | ||||
| &tok->pb->buf[printbuf_length(tok->pb) - 1]; | &tok->pb->buf[printbuf_length(tok->pb) - 1]; | ||||
| is_exponent = 1; | is_exponent = 1; | ||||
| pos_sign_ok = neg_sign_ok = 1; | |||||
| /* If the "e" isn't at the end, we can't start with a '-' */ | /* If the "e" isn't at the end, we can't start with a '-' */ | ||||
| if (e_loc != last_saved_char) | if (e_loc != last_saved_char) | ||||
| negativesign_next_possible_location = -1; | |||||
| { | |||||
| neg_sign_ok = 0; | |||||
| pos_sign_ok = 0; | |||||
| } | |||||
| // else leave it set to 1, i.e. start of the new input | // else leave it set to 1, i.e. start of the new input | ||||
| } | } | ||||
| } | } | ||||
| while (c && strchr(json_number_chars, c)) | |||||
| while (c && | |||||
| ((c >= '0' && c <= '9') || | |||||
| (!is_exponent && (c=='e' || c=='E')) || | |||||
| (neg_sign_ok && c=='-') || | |||||
| (pos_sign_ok && c=='+') || | |||||
| (!tok->is_double && c=='.') | |||||
| )) | |||||
| { | { | ||||
| pos_sign_ok = neg_sign_ok = 0; | |||||
| ++case_len; | ++case_len; | ||||
| /* non-digit characters checks */ | /* non-digit characters checks */ | ||||
| @@ -871,40 +894,16 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * | |||||
| switch (c) | switch (c) | ||||
| { | { | ||||
| case '.': | case '.': | ||||
| if (tok->is_double != 0) | |||||
| { | |||||
| /* '.' can only be found once, and out of the exponent part. | |||||
| * Thus, if the input is already flagged as double, it | |||||
| * is invalid. | |||||
| */ | |||||
| tok->err = json_tokener_error_parse_number; | |||||
| goto out; | |||||
| } | |||||
| tok->is_double = 1; | tok->is_double = 1; | ||||
| pos_sign_ok = 1; | |||||
| neg_sign_ok = 1; | |||||
| break; | break; | ||||
| case 'e': /* FALLTHRU */ | case 'e': /* FALLTHRU */ | ||||
| case 'E': | case 'E': | ||||
| if (is_exponent != 0) | |||||
| { | |||||
| /* only one exponent possible */ | |||||
| tok->err = json_tokener_error_parse_number; | |||||
| goto out; | |||||
| } | |||||
| is_exponent = 1; | is_exponent = 1; | ||||
| tok->is_double = 1; | tok->is_double = 1; | ||||
| /* the exponent part can begin with a negative sign */ | /* the exponent part can begin with a negative sign */ | ||||
| negativesign_next_possible_location = case_len + 1; | |||||
| break; | |||||
| case '-': | |||||
| if (case_len != negativesign_next_possible_location) | |||||
| { | |||||
| /* If the negative sign is not where expected (ie | |||||
| * start of input or start of exponent part), the | |||||
| * input is invalid. | |||||
| */ | |||||
| tok->err = json_tokener_error_parse_number; | |||||
| goto out; | |||||
| } | |||||
| pos_sign_ok = neg_sign_ok = 1; | |||||
| break; | break; | ||||
| default: break; | default: break; | ||||
| } | } | ||||
| @@ -915,6 +914,22 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * | |||||
| goto out; | goto out; | ||||
| } | } | ||||
| } | } | ||||
| /* | |||||
| Now we know c isn't a valid number char, but check whether | |||||
| it might have been intended to be, and return a potentially | |||||
| more understandable error right away. | |||||
| However, if we're at the top-level, use the number as-is | |||||
| because c can be part of a new object to parse on the | |||||
| next call to json_tokener_parse(). | |||||
| */ | |||||
| if (tok->depth > 0 && | |||||
| c != ',' && c != ']' && c != '}' && c != '/' && | |||||
| c != 'I' && c != 'i' && | |||||
| !isspace((unsigned char)c)) | |||||
| { | |||||
| tok->err = json_tokener_error_parse_number; | |||||
| goto out; | |||||
| } | |||||
| if (case_len > 0) | if (case_len > 0) | ||||
| printbuf_memappend_fast(tok->pb, case_start, case_len); | printbuf_memappend_fast(tok->pb, case_start, case_len); | ||||
| @@ -141,16 +141,18 @@ static void test_basic_parse() | |||||
| single_basic_parse("12", 0); | single_basic_parse("12", 0); | ||||
| single_basic_parse("12.3", 0); | single_basic_parse("12.3", 0); | ||||
| single_basic_parse("12.3.4", 0); /* non-sensical, returns null */ | |||||
| /* was returning (int)2015 before patch, should return null */ | |||||
| single_basic_parse("2015-01-15", 0); | |||||
| /* ...but this works. It's rather inconsistent, and a future major release | |||||
| * should change the behavior so it either always returns null when extra | |||||
| * bytes are present (preferred), or always return object created from as much | |||||
| * as was able to be parsed. | |||||
| /* Even though, when using json_tokener_parse() there's no way to | |||||
| * know when there is more data after the parsed object, | |||||
| * an object is successfully returned anyway (in some cases) | |||||
| */ | */ | ||||
| single_basic_parse("12.3.4", 0); | |||||
| single_basic_parse("2015-01-15", 0); | |||||
| single_basic_parse("12.3xxx", 0); | single_basic_parse("12.3xxx", 0); | ||||
| single_basic_parse("12.3{\"a\":123}", 0); | |||||
| single_basic_parse("12.3\n", 0); | |||||
| single_basic_parse("12.3 ", 0); | |||||
| single_basic_parse("{\"FoO\" : -12.3E512}", 0); | single_basic_parse("{\"FoO\" : -12.3E512}", 0); | ||||
| single_basic_parse("{\"FoO\" : -12.3e512}", 0); | single_basic_parse("{\"FoO\" : -12.3e512}", 0); | ||||
| @@ -368,7 +370,10 @@ struct incremental_step | |||||
| {"[0e-]", -1, -1, json_tokener_success, 1}, | {"[0e-]", -1, -1, json_tokener_success, 1}, | ||||
| {"[0e-]", -1, 4, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT}, | {"[0e-]", -1, 4, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT}, | ||||
| {"0e+-", 5, 3, json_tokener_error_parse_number, 1}, | |||||
| /* You might expect this to fail, but it won't because | |||||
| it's a valid partial parse; note the char_offset: */ | |||||
| {"0e+-", 5, 3, json_tokener_success, 1}, | |||||
| {"0e+-", 5, 3, json_tokener_error_parse_number, 1, JSON_TOKENER_STRICT}, | |||||
| {"[0e+-]", -1, 4, json_tokener_error_parse_number, 1}, | {"[0e+-]", -1, 4, json_tokener_error_parse_number, 1}, | ||||
| /* Similar tests for other kinds of objects: */ | /* Similar tests for other kinds of objects: */ | ||||
| @@ -447,11 +452,22 @@ struct incremental_step | |||||
| {"{\"a\":1}{\"b\":2}", 15, 7, json_tokener_success, 0}, | {"{\"a\":1}{\"b\":2}", 15, 7, json_tokener_success, 0}, | ||||
| {&"{\"a\":1}{\"b\":2}"[7], 8, 7, json_tokener_success, 1}, | {&"{\"a\":1}{\"b\":2}"[7], 8, 7, json_tokener_success, 1}, | ||||
| /* Some bad formatting. Check we get the correct error status | |||||
| * XXX this means we can't have two numbers in the incremental parse | |||||
| * XXX stream with the second one being a negative number! | |||||
| */ | |||||
| {"2015-01-15", 10, 4, json_tokener_error_parse_number, 1}, | |||||
| /* | |||||
| * Though this may seem invalid at first glance, it | |||||
| * parses as three separate numbers, 2015, -1 and -15 | |||||
| * Of course, simply pasting together a stream of arbitrary | |||||
| * positive numbers won't work, since there'll be no way to | |||||
| * tell where in e.g. "2015015" the next number stats, so | |||||
| * a reliably parsable stream must not include json_type_int | |||||
| * or json_type_double objects without some other delimiter. | |||||
| * e.g. whitespace | |||||
| */ | |||||
| {&"2015-01-15"[0], 11, 4, json_tokener_success, 1}, | |||||
| {&"2015-01-15"[4], 7, 3, json_tokener_success, 1}, | |||||
| {&"2015-01-15"[7], 4, 3, json_tokener_success, 1}, | |||||
| {&"2015 01 15"[0], 11, 5, json_tokener_success, 1}, | |||||
| {&"2015 01 15"[4], 7, 4, json_tokener_success, 1}, | |||||
| {&"2015 01 15"[7], 4, 3, json_tokener_success, 1}, | |||||
| /* Strings have a well defined end point, so we can stop at the quote */ | /* Strings have a well defined end point, so we can stop at the quote */ | ||||
| {"\"blue\"", -1, -1, json_tokener_success, 0}, | {"\"blue\"", -1, -1, json_tokener_success, 0}, | ||||
| @@ -40,9 +40,13 @@ new_obj.to_string(nAn)=NaN | |||||
| new_obj.to_string(iNfinity)=Infinity | new_obj.to_string(iNfinity)=Infinity | ||||
| new_obj.to_string(12)=12 | new_obj.to_string(12)=12 | ||||
| new_obj.to_string(12.3)=12.3 | new_obj.to_string(12.3)=12.3 | ||||
| new_obj.to_string(12.3.4)=null | |||||
| new_obj.to_string(2015-01-15)=null | |||||
| new_obj.to_string(12.3.4)=12.3 | |||||
| new_obj.to_string(2015-01-15)=2015 | |||||
| new_obj.to_string(12.3xxx)=12.3 | new_obj.to_string(12.3xxx)=12.3 | ||||
| new_obj.to_string(12.3{"a":123})=12.3 | |||||
| new_obj.to_string(12.3 | |||||
| )=12.3 | |||||
| new_obj.to_string(12.3 )=12.3 | |||||
| new_obj.to_string({"FoO" : -12.3E512})={ "FoO": -12.3E512 } | new_obj.to_string({"FoO" : -12.3E512})={ "FoO": -12.3E512 } | ||||
| new_obj.to_string({"FoO" : -12.3e512})={ "FoO": -12.3e512 } | new_obj.to_string({"FoO" : -12.3e512})={ "FoO": -12.3e512 } | ||||
| new_obj.to_string({"FoO" : -12.3E51.2})=null | new_obj.to_string({"FoO" : -12.3E51.2})=null | ||||
| @@ -162,6 +166,7 @@ json_tokener_parse_ex(tok, 0e- , 4) ... OK: got object of type [double | |||||
| json_tokener_parse_ex(tok, 0e- , 4) ... OK: got correct error: unexpected end of data | json_tokener_parse_ex(tok, 0e- , 4) ... OK: got correct error: unexpected end of data | ||||
| json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got object of type [array]: [ 0 ] | json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got object of type [array]: [ 0 ] | ||||
| json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got correct error: number expected | json_tokener_parse_ex(tok, [0e-] , 5) ... OK: got correct error: number expected | ||||
| json_tokener_parse_ex(tok, 0e+- , 5) ... OK: got object of type [double]: 0 | |||||
| json_tokener_parse_ex(tok, 0e+- , 5) ... OK: got correct error: number expected | json_tokener_parse_ex(tok, 0e+- , 5) ... OK: got correct error: number expected | ||||
| json_tokener_parse_ex(tok, [0e+-] , 6) ... OK: got correct error: number expected | json_tokener_parse_ex(tok, [0e+-] , 6) ... OK: got correct error: number expected | ||||
| json_tokener_parse_ex(tok, false , 5) ... OK: got correct error: continue | json_tokener_parse_ex(tok, false , 5) ... OK: got correct error: continue | ||||
| @@ -215,7 +220,12 @@ json_tokener_parse_ex(tok, nullx , 6) ... OK: got object of type [null]: | |||||
| json_tokener_parse_ex(tok, x , 2) ... OK: got correct error: unexpected character | json_tokener_parse_ex(tok, x , 2) ... OK: got correct error: unexpected character | ||||
| json_tokener_parse_ex(tok, {"a":1}{"b":2}, 15) ... OK: got object of type [object]: { "a": 1 } | json_tokener_parse_ex(tok, {"a":1}{"b":2}, 15) ... OK: got object of type [object]: { "a": 1 } | ||||
| json_tokener_parse_ex(tok, {"b":2} , 8) ... OK: got object of type [object]: { "b": 2 } | json_tokener_parse_ex(tok, {"b":2} , 8) ... OK: got object of type [object]: { "b": 2 } | ||||
| json_tokener_parse_ex(tok, 2015-01-15 , 10) ... OK: got correct error: number expected | |||||
| json_tokener_parse_ex(tok, 2015-01-15 , 11) ... OK: got object of type [int]: 2015 | |||||
| json_tokener_parse_ex(tok, -01-15 , 7) ... OK: got object of type [int]: -1 | |||||
| json_tokener_parse_ex(tok, -15 , 4) ... OK: got object of type [int]: -15 | |||||
| json_tokener_parse_ex(tok, 2015 01 15 , 11) ... OK: got object of type [int]: 2015 | |||||
| json_tokener_parse_ex(tok, 01 15 , 7) ... OK: got object of type [int]: 1 | |||||
| json_tokener_parse_ex(tok, 15 , 4) ... OK: got object of type [int]: 15 | |||||
| json_tokener_parse_ex(tok, "blue" , 6) ... OK: got object of type [string]: "blue" | json_tokener_parse_ex(tok, "blue" , 6) ... OK: got object of type [string]: "blue" | ||||
| json_tokener_parse_ex(tok, "\"" , 4) ... OK: got object of type [string]: "\"" | json_tokener_parse_ex(tok, "\"" , 4) ... OK: got object of type [string]: "\"" | ||||
| json_tokener_parse_ex(tok, "\\" , 4) ... OK: got object of type [string]: "\\" | json_tokener_parse_ex(tok, "\\" , 4) ... OK: got object of type [string]: "\\" | ||||
| @@ -265,5 +275,5 @@ json_tokener_parse_ex(tok, "\ud855 | |||||
| json_tokener_parse_ex(tok, "\ud0031À" , 10) ... OK: got correct error: invalid utf-8 string | json_tokener_parse_ex(tok, "\ud0031À" , 10) ... OK: got correct error: invalid utf-8 string | ||||
| json_tokener_parse_ex(tok, 11�11 , 5) ... OK: got correct error: invalid utf-8 string | json_tokener_parse_ex(tok, 11�11 , 5) ... OK: got correct error: invalid utf-8 string | ||||
| json_tokener_parse_ex(tok, {"1�":1} , 8) ... OK: got correct error: invalid utf-8 string | json_tokener_parse_ex(tok, {"1�":1} , 8) ... OK: got correct error: invalid utf-8 string | ||||
| End Incremental Tests OK=179 ERROR=0 | |||||
| End Incremental Tests OK=185 ERROR=0 | |||||
| ================================== | ================================== | ||||