| @@ -83,6 +83,7 @@ static const char* json_tokener_errors[] = { | |||
| "object value separator ',' expected", | |||
| "invalid string sequence", | |||
| "expected comment", | |||
| "invalid utf-8 string", | |||
| "buffer size overflow" | |||
| }; | |||
| @@ -222,8 +223,12 @@ struct json_object* json_tokener_parse_verbose(const char *str, | |||
| : \ | |||
| (((tok)->err = json_tokener_continue), 0) \ | |||
| ) : \ | |||
| (((dest) = *str), 1) \ | |||
| ) | |||
| (((tok->flags & JSON_TOKENER_STRICT) && \ | |||
| (!json_tokener_validate_utf8(*str, nBytesp)))? \ | |||
| ((tok->err = json_tokener_error_parse_utf8_string), 0) \ | |||
| : \ | |||
| (((dest) = *str), 1) \ | |||
| )) | |||
| /* ADVANCE_CHAR() macro: | |||
| * Increments str & tok->char_offset. | |||
| @@ -242,6 +247,9 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, | |||
| { | |||
| struct json_object *obj = NULL; | |||
| char c = '\1'; | |||
| unsigned int nBytes = 0; | |||
| unsigned int *nBytesp = &nBytes; | |||
| #ifdef HAVE_USELOCALE | |||
| locale_t oldlocale = uselocale(NULL); | |||
| locale_t newloc; | |||
| @@ -948,6 +956,10 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, | |||
| } /* while(PEEK_CHAR) */ | |||
| out: | |||
| if ((tok->flags & JSON_TOKENER_STRICT) && (nBytes != 0)) | |||
| { | |||
| tok->err = json_tokener_error_parse_utf8_string; | |||
| } | |||
| if (c && | |||
| (state == json_tokener_state_finish) && | |||
| (tok->depth == 0) && | |||
| @@ -985,6 +997,37 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, | |||
| return NULL; | |||
| } | |||
| json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes) | |||
| { | |||
| unsigned char chr = c; | |||
| if (*nBytes == 0) | |||
| { | |||
| if (chr >= 0x80) | |||
| { | |||
| if(chr >= 0xFC && chr <= 0xFd) | |||
| *nBytes = 6; | |||
| else if (chr >= 0xF8) | |||
| *nBytes = 5; | |||
| else if (chr >= 0xF0) | |||
| *nBytes = 4; | |||
| else if (chr >= 0xE0) | |||
| *nBytes = 3; | |||
| else if (chr >= 0xC0) | |||
| *nBytes = 2; | |||
| else | |||
| return 0; | |||
| (*nBytes)--; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| if ((chr & 0xC0) != 0x80) | |||
| return 0; | |||
| (*nBytes)--; | |||
| } | |||
| return 1; | |||
| } | |||
| void json_tokener_set_flags(struct json_tokener *tok, int flags) | |||
| { | |||
| tok->flags = flags; | |||
| @@ -38,6 +38,7 @@ enum json_tokener_error { | |||
| json_tokener_error_parse_object_value_sep, | |||
| json_tokener_error_parse_string, | |||
| json_tokener_error_parse_comment, | |||
| json_tokener_error_parse_utf8_string, | |||
| json_tokener_error_size | |||
| }; | |||
| @@ -162,6 +163,11 @@ JSON_EXPORT void json_tokener_reset(struct json_tokener *tok); | |||
| JSON_EXPORT struct json_object* json_tokener_parse(const char *str); | |||
| JSON_EXPORT struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error); | |||
| /** | |||
| * validete the utf-8 string in strict model. | |||
| * if not utf-8 format, return err. | |||
| */ | |||
| json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes); | |||
| /** | |||
| * Set flags that control how parsing will be done. | |||
| */ | |||
| @@ -355,6 +355,41 @@ struct incremental_step { | |||
| { "[1,2,3,]", -1, 7, json_tokener_error_parse_unexpected, 3 }, | |||
| { "{\"a\":1,}", -1, 7, json_tokener_error_parse_unexpected, 3 }, | |||
| // utf-8 test | |||
| // acsll encoding | |||
| { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 3 }, | |||
| { "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22",-1, -1, json_tokener_success, 1 }, | |||
| // utf-8 encoding | |||
| { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 3 }, | |||
| { "\x22\xe4\xb8",-1, -1, json_tokener_error_parse_utf8_string, 2 }, | |||
| { "\x96\xe7\x95\x8c\x22",-1, 0, json_tokener_error_parse_utf8_string, 3 }, | |||
| { "\x22\xe4\xb8\x96\xe7\x95\x8c\x22",-1, -1, json_tokener_success, 1 }, | |||
| { "\x22\xcf\x80\xcf\x86\x22",-1, -1, json_tokener_success, 3 }, | |||
| { "\x22\xf0\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 }, | |||
| { "\x22\xf8\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 }, | |||
| { "\x22\xfd\xa5\xa5\xa5\x91\x95\x22",-1, -1, json_tokener_success, 3 }, | |||
| // wrong utf-8 encoding | |||
| { "\x22\xe6\x9d\x4e\x22",-1, 3, json_tokener_error_parse_utf8_string, 3 }, | |||
| { "\x22\xe6\x9d\x4e\x22",-1, 5, json_tokener_success, 1 }, | |||
| // GBK encoding | |||
| { "\x22\xc0\xee\xc5\xf4\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 }, | |||
| { "\x22\xc0\xee\xc5\xf4\x22",-1, 6, json_tokener_success, 1 }, | |||
| // char after space | |||
| { "\x20\x20\x22\xe4\xb8\x96\x22",-1, -1, json_tokener_success, 3 }, | |||
| { "\x20\x20\x81\x22\xe4\xb8\x96\x22",-1, 2, json_tokener_error_parse_utf8_string, 3 }, | |||
| { "\x5b\x20\x81\x31\x5d",-1, 2, json_tokener_error_parse_utf8_string, 3 }, | |||
| // char in state inf | |||
| { "\x49\x6e\x66\x69\x6e\x69\x74\x79",9, 8, json_tokener_success, 1 }, | |||
| { "\x49\x6e\x66\x81\x6e\x69\x74\x79",-1, 3, json_tokener_error_parse_utf8_string, 3 }, | |||
| // char in escape unicode | |||
| { "\x22\x5c\x75\x64\x38\x35\x35\x5c\x75\x64\x63\x35\x35\x22",15, 14, json_tokener_success, 3 }, | |||
| { "\x22\x5c\x75\x64\x38\x35\x35\xc0\x75\x64\x63\x35\x35\x22",-1, 8, json_tokener_error_parse_utf8_string, 3 }, | |||
| { "\x22\x5c\x75\x64\x30\x30\x33\x31\xc0\x22",-1, 9, json_tokener_error_parse_utf8_string, 3 }, | |||
| // char in number | |||
| { "\x31\x31\x81\x31\x31",-1, 2, json_tokener_error_parse_utf8_string, 3 }, | |||
| // char in object | |||
| { "\x7b\x22\x31\x81\x22\x3a\x31\x7d",-1, 3, json_tokener_error_parse_utf8_string, 3 }, | |||
| { NULL, -1, -1, json_tokener_success, 0 }, | |||
| }; | |||
| @@ -183,5 +183,29 @@ json_tokener_parse_ex(tok, [1,2,3,] , 8) ... OK: got object of type [array] | |||
| json_tokener_parse_ex(tok, [1,2,,3,] , 9) ... OK: got correct error: unexpected character | |||
| json_tokener_parse_ex(tok, [1,2,3,] , 8) ... OK: got correct error: unexpected character | |||
| json_tokener_parse_ex(tok, {"a":1,} , 8) ... OK: got correct error: unexpected character | |||
| End Incremental Tests OK=105 ERROR=0 | |||
| json_tokener_parse_ex(tok, "123asc$%&" , 11) ... OK: got object of type [string]: "123asc$%&" | |||
| json_tokener_parse_ex(tok, "123asc$%&" , 11) ... OK: got object of type [string]: "123asc$%&" | |||
| json_tokener_parse_ex(tok, "世界" , 8) ... OK: got object of type [string]: "世界" | |||
| json_tokener_parse_ex(tok, "ä¸ , 3) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, –界" , 5) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, "世界" , 8) ... OK: got object of type [string]: "世界" | |||
| json_tokener_parse_ex(tok, "πφ" , 6) ... OK: got object of type [string]: "πφ" | |||
| json_tokener_parse_ex(tok, "𥑕" , 6) ... OK: got object of type [string]: "𥑕" | |||
| json_tokener_parse_ex(tok, "ø¥¥‘•" , 7) ... OK: got object of type [string]: "ø¥¥‘•" | |||
| json_tokener_parse_ex(tok, "ý¥¥¥‘•" , 8) ... OK: got object of type [string]: "ý¥¥¥‘•" | |||
| json_tokener_parse_ex(tok, "æ�N" , 5) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, "æ�N" , 5) ... OK: got object of type [string]: "æ�N" | |||
| json_tokener_parse_ex(tok, "ÀîÅô" , 6) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, "ÀîÅô" , 6) ... OK: got object of type [string]: "ÀîÅô" | |||
| json_tokener_parse_ex(tok, "世" , 7) ... OK: got object of type [string]: "世" | |||
| json_tokener_parse_ex(tok, �"世" , 8) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, [ �1] , 5) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, Infinity , 9) ... OK: got object of type [double]: Infinity | |||
| json_tokener_parse_ex(tok, Inf�nity , 8) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, "\ud855\udc55", 15) ... OK: got object of type [string]: "𥑕" | |||
| json_tokener_parse_ex(tok, "\ud855Àudc55", 14) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, "\ud0031À" , 10) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, 11�11 , 5) ... OK: got correct error: invalid utf-8 string | |||
| json_tokener_parse_ex(tok, {"1�":1} , 8) ... OK: got correct error: invalid utf-8 string | |||
| End Incremental Tests OK=129 ERROR=0 | |||
| ================================== | |||