diff --git a/json_object.c b/json_object.c index 344af51..05f0d95 100644 --- a/json_object.c +++ b/json_object.c @@ -107,6 +107,8 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl { int pos = 0, start_offset = 0; unsigned char c; + /* the previous byte. It's used for identifying rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C) */ + unsigned char old_c = '\1'; while (len--) { c = str[pos]; @@ -135,7 +137,8 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl else if(c == '\t') printbuf_memappend(pb, "\\t", 2); else if(c == '\f') printbuf_memappend(pb, "\\f", 2); else if(c == '"') printbuf_memappend(pb, "\\\"", 2); - else if(c == '\\') printbuf_memappend(pb, "\\\\", 2); + else if(c == '\\' && old_c < 0x80) printbuf_memappend(pb, "\\\\", 2); /* If the previous byte is ASCII character, this byte is escape character. */ + else if(c == '\\' && old_c >= 0x80) printbuf_memappend(pb, "\\", 1); /* Else these two bytes are maybe a double-byte GBK character. */ else if(c == '/') printbuf_memappend(pb, "\\/", 2); start_offset = ++pos; @@ -157,6 +160,8 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl } else pos++; } + + old_c = c; } if (pos - start_offset > 0) printbuf_memappend(pb, str + start_offset, pos - start_offset); diff --git a/json_tokener.c b/json_tokener.c index 561f730..061a845 100644 --- a/json_tokener.c +++ b/json_tokener.c @@ -533,6 +533,8 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, { /* Advance until we change state */ const char *case_start = str; + /* the previous byte. It's used for identifying rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C) */ + unsigned char old_c = '\1'; while(1) { if(c == tok->quote_char) { printbuf_memappend_fast(tok->pb, case_start, str-case_start); @@ -542,12 +544,14 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, saved_state = json_tokener_state_finish; state = json_tokener_state_eatws; break; - } else if(c == '\\') { + } else if(c == '\\' && old_c < 0x80) { /* If the previous byte is not ASCII character, maybe this is a double-byte GBK character, we can skip it. */ printbuf_memappend_fast(tok->pb, case_start, str-case_start); saved_state = json_tokener_state_string; state = json_tokener_state_string_escape; break; } + + old_c = c; if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { printbuf_memappend_fast(tok->pb, case_start, str-case_start); goto out; @@ -885,6 +889,8 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, { /* Advance until we change state */ const char *case_start = str; + /* the previous byte. It's used for identifying rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C) */ + unsigned char old_c = '\1'; while(1) { if(c == tok->quote_char) { printbuf_memappend_fast(tok->pb, case_start, str-case_start); @@ -892,12 +898,14 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, saved_state = json_tokener_state_object_field_end; state = json_tokener_state_eatws; break; - } else if(c == '\\') { + } else if(c == '\\' && old_c < 0x80) { /* If the previous byte is not ASCII character, maybe this is a double-byte GBK character, we can skip it. */ printbuf_memappend_fast(tok->pb, case_start, str-case_start); saved_state = json_tokener_state_object_field; state = json_tokener_state_string_escape; break; } + + old_c =c; if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { printbuf_memappend_fast(tok->pb, case_start, str-case_start); goto out;