@@ -83,6 +83,7 @@ static const char* json_tokener_errors[] = { | |||||
"object value separator ',' expected", | "object value separator ',' expected", | ||||
"invalid string sequence", | "invalid string sequence", | ||||
"expected comment", | "expected comment", | ||||
"invalid utf-8 string", | |||||
"buffer size overflow" | "buffer size overflow" | ||||
}; | }; | ||||
@@ -282,6 +283,13 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, | |||||
} | } | ||||
#endif | #endif | ||||
if ((tok->flags & JSON_TOKENER_STRICT) && | |||||
(!json_tokener_validate_utf8(str))) | |||||
{ | |||||
tok->err = json_tokener_error_parse_utf8_string; | |||||
goto out; | |||||
} | |||||
while (PEEK_CHAR(c, tok)) { | while (PEEK_CHAR(c, tok)) { | ||||
redo_char: | redo_char: | ||||
@@ -985,6 +993,50 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, | |||||
return NULL; | return NULL; | ||||
} | } | ||||
json_bool json_tokener_validate_utf8(const char *str) | |||||
{ | |||||
unsigned int nBytes = 0; | |||||
unsigned char chr = *str; | |||||
unsigned int i; | |||||
for (i = 0; str[i] != '\0'; ++i) | |||||
{ | |||||
chr = *(str + i); | |||||
if (nBytes == 0) | |||||
{ | |||||
/*Multibyte character, count the num of bytes(nBytes) */ | |||||
if (chr >= 0x80) | |||||
{ | |||||
if(chr >= 0xFC && chr <= 0xFD) | |||||
nBytes = 6; | |||||
else if (chr >= 0xF8) | |||||
nBytes = 5; | |||||
else if (chr >= 0xF0) | |||||
nBytes = 4; | |||||
else if (chr >= 0xE0) | |||||
nBytes = 3; | |||||
else if (chr >= 0xC0) | |||||
nBytes = 2; | |||||
else | |||||
return 0; | |||||
nBytes--; | |||||
} | |||||
} | |||||
else | |||||
{ | |||||
/*The non-first byte of multibyte character should be 10xxxxxx */ | |||||
if ((chr & 0xC0) != 0x80) | |||||
return 0; | |||||
nBytes--; | |||||
} | |||||
} | |||||
/*Violate UTF-8 encoding rules*/ | |||||
if (nBytes != 0) | |||||
{ | |||||
return 0; | |||||
} | |||||
return 1; | |||||
} | |||||
void json_tokener_set_flags(struct json_tokener *tok, int flags) | void json_tokener_set_flags(struct json_tokener *tok, int flags) | ||||
{ | { | ||||
tok->flags = flags; | tok->flags = flags; | ||||
@@ -38,6 +38,7 @@ enum json_tokener_error { | |||||
json_tokener_error_parse_object_value_sep, | json_tokener_error_parse_object_value_sep, | ||||
json_tokener_error_parse_string, | json_tokener_error_parse_string, | ||||
json_tokener_error_parse_comment, | json_tokener_error_parse_comment, | ||||
json_tokener_error_parse_utf8_string, | |||||
json_tokener_error_size | json_tokener_error_size | ||||
}; | }; | ||||
@@ -162,6 +163,12 @@ JSON_EXPORT void json_tokener_reset(struct json_tokener *tok); | |||||
JSON_EXPORT struct json_object* json_tokener_parse(const char *str); | JSON_EXPORT struct json_object* json_tokener_parse(const char *str); | ||||
JSON_EXPORT struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error); | JSON_EXPORT struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error); | ||||
/** | |||||
* validete the utf-8 string before parse in strict model. | |||||
* if not utf-8 format, return err. | |||||
*/ | |||||
json_bool json_tokener_validate_utf8(const char *str); | |||||
/** | /** | ||||
* Set flags that control how parsing will be done. | * Set flags that control how parsing will be done. | ||||
*/ | */ | ||||
@@ -355,6 +355,28 @@ struct incremental_step { | |||||
{ "[1,2,3,]", -1, 7, json_tokener_error_parse_unexpected, 3 }, | { "[1,2,3,]", -1, 7, json_tokener_error_parse_unexpected, 3 }, | ||||
{ "{\"a\":1,}", -1, 7, json_tokener_error_parse_unexpected, 3 }, | { "{\"a\":1,}", -1, 7, json_tokener_error_parse_unexpected, 3 }, | ||||
// acsll encoding "123asc$%&" | |||||
{ "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22", -1, -1, json_tokener_success, 3 }, | |||||
{ "\x22\x31\x32\x33\x61\x73\x63\x24\x25\x26\x22", -1, -1, json_tokener_success, 1 }, | |||||
// utf-8 encoding "世界" "πφ" "𥑕" | |||||
{ "\x22\xe4\xb8\x96\xe7\x95\x8c\x22", -1, -1, json_tokener_success, 3 }, | |||||
{ "\x22\xe4\xb8\x96\xe7\x95\x8c\x22", -1, -1, json_tokener_success, 1 }, | |||||
{ "\x22\xcf\x80\xcf\x86\x22", -1, -1, json_tokener_success, 3 }, | |||||
{ "\x22\xf0\xa5\x91\x95\x22", -1, -1, json_tokener_success, 3 }, | |||||
{ "\x22\xf8\xa5\xa5\x91\x95\x22", -1, -1, json_tokener_success, 3 }, | |||||
{ "\x22\xfd\xa5\xa5\xa5\x91\x95\x22", -1, -1, json_tokener_success, 3 }, | |||||
// wrong utf-8 encoding | |||||
{ "\x22\xe6\x9d\x4e\x22", -1, 0, json_tokener_error_parse_utf8_string, 3 }, | |||||
{ "\x22\xe6\x9d\x4e\x22", -1, 5, json_tokener_success, 1 }, | |||||
// GBK encoding | |||||
{ "\x22\xc0\xee\xc5\xf4\x22", -1, 0, json_tokener_error_parse_utf8_string, 3 }, | |||||
{ "\x22\xc0\xee\xc5\xf4\x22", -1, 6, json_tokener_success, 1 }, | |||||
// ucs-2/utf-16 encoding | |||||
{ "\x22\x11\xd2\x22", -1, 0, json_tokener_error_parse_utf8_string, 3 }, | |||||
{ "\x22\x11\xd2\x22", -1, 4, json_tokener_success, 1 }, | |||||
{ "\x22\x55\xd8\55\xdc\x22", -1, 0, json_tokener_error_parse_utf8_string, 3 }, | |||||
{ "\x22\x16\x4e\x4c\x75\x22", -1, 6, json_tokener_success, 1 }, | |||||
{ NULL, -1, -1, json_tokener_success, 0 }, | { NULL, -1, -1, json_tokener_success, 0 }, | ||||
}; | }; | ||||
@@ -183,5 +183,21 @@ json_tokener_parse_ex(tok, [1,2,3,] , 8) ... OK: got object of type [array] | |||||
json_tokener_parse_ex(tok, [1,2,,3,] , 9) ... OK: got correct error: unexpected character | json_tokener_parse_ex(tok, [1,2,,3,] , 9) ... OK: got correct error: unexpected character | ||||
json_tokener_parse_ex(tok, [1,2,3,] , 8) ... OK: got correct error: unexpected character | json_tokener_parse_ex(tok, [1,2,3,] , 8) ... OK: got correct error: unexpected character | ||||
json_tokener_parse_ex(tok, {"a":1,} , 8) ... OK: got correct error: unexpected character | json_tokener_parse_ex(tok, {"a":1,} , 8) ... OK: got correct error: unexpected character | ||||
End Incremental Tests OK=105 ERROR=0 | |||||
json_tokener_parse_ex(tok, "123asc$%&" , 11) ... OK: got object of type [string]: "123asc$%&" | |||||
json_tokener_parse_ex(tok, "123asc$%&" , 11) ... OK: got object of type [string]: "123asc$%&" | |||||
json_tokener_parse_ex(tok, "世界" , 8) ... OK: got object of type [string]: "世界" | |||||
json_tokener_parse_ex(tok, "世界" , 8) ... OK: got object of type [string]: "世界" | |||||
json_tokener_parse_ex(tok, "πφ" , 6) ... OK: got object of type [string]: "πφ" | |||||
json_tokener_parse_ex(tok, "𥑕" , 6) ... OK: got object of type [string]: "𥑕" | |||||
json_tokener_parse_ex(tok, "ø¥¥‘•" , 7) ... OK: got object of type [string]: "ø¥¥‘•" | |||||
json_tokener_parse_ex(tok, "ý¥¥¥‘•" , 8) ... OK: got object of type [string]: "ý¥¥¥‘•" | |||||
json_tokener_parse_ex(tok, "æ�N" , 5) ... OK: got correct error: invalid utf-8 string | |||||
json_tokener_parse_ex(tok, "æ�N" , 5) ... OK: got object of type [string]: "æ�N" | |||||
json_tokener_parse_ex(tok, "ÀîÅô" , 6) ... OK: got correct error: invalid utf-8 string | |||||
json_tokener_parse_ex(tok, "ÀîÅô" , 6) ... OK: got object of type [string]: "ÀîÅô" | |||||
json_tokener_parse_ex(tok, "Ò" , 4) ... OK: got correct error: invalid utf-8 string | |||||
json_tokener_parse_ex(tok, "Ò" , 4) ... OK: got object of type [string]: "\u0011Ò" | |||||
json_tokener_parse_ex(tok, "UØ-Ü" , 6) ... OK: got correct error: invalid utf-8 string | |||||
json_tokener_parse_ex(tok, "NLu" , 6) ... OK: got object of type [string]: "\u0016NLu" | |||||
End Incremental Tests OK=121 ERROR=0 | |||||
================================== | ================================== |