Browse Source

compatible with rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C)

pull/483/head
pj81102 6 years ago
parent
commit
cedb65df43
2 changed files with 16 additions and 3 deletions
  1. +6
    -1
      json_object.c
  2. +10
    -2
      json_tokener.c

+ 6
- 1
json_object.c View File

@@ -107,6 +107,8 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl
{ {
int pos = 0, start_offset = 0; int pos = 0, start_offset = 0;
unsigned char c; unsigned char c;
/* the previous byte. It's used for identifying rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C) */
unsigned char old_c = '\1';
while (len--) while (len--)
{ {
c = str[pos]; c = str[pos];
@@ -135,7 +137,8 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl
else if(c == '\t') printbuf_memappend(pb, "\\t", 2); else if(c == '\t') printbuf_memappend(pb, "\\t", 2);
else if(c == '\f') printbuf_memappend(pb, "\\f", 2); else if(c == '\f') printbuf_memappend(pb, "\\f", 2);
else if(c == '"') printbuf_memappend(pb, "\\\"", 2); else if(c == '"') printbuf_memappend(pb, "\\\"", 2);
else if(c == '\\') printbuf_memappend(pb, "\\\\", 2);
else if(c == '\\' && old_c < 0x80) printbuf_memappend(pb, "\\\\", 2); /* If the previous byte is ASCII character, this byte is escape character. */
else if(c == '\\' && old_c >= 0x80) printbuf_memappend(pb, "\\", 1); /* Else these two bytes are maybe a double-byte GBK character. */
else if(c == '/') printbuf_memappend(pb, "\\/", 2); else if(c == '/') printbuf_memappend(pb, "\\/", 2);


start_offset = ++pos; start_offset = ++pos;
@@ -157,6 +160,8 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl
} else } else
pos++; pos++;
} }

old_c = c;
} }
if (pos - start_offset > 0) if (pos - start_offset > 0)
printbuf_memappend(pb, str + start_offset, pos - start_offset); printbuf_memappend(pb, str + start_offset, pos - start_offset);


+ 10
- 2
json_tokener.c View File

@@ -533,6 +533,8 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
{ {
/* Advance until we change state */ /* Advance until we change state */
const char *case_start = str; const char *case_start = str;
/* the previous byte. It's used for identifying rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C) */
unsigned char old_c = '\1';
while(1) { while(1) {
if(c == tok->quote_char) { if(c == tok->quote_char) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start); printbuf_memappend_fast(tok->pb, case_start, str-case_start);
@@ -542,12 +544,14 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
saved_state = json_tokener_state_finish; saved_state = json_tokener_state_finish;
state = json_tokener_state_eatws; state = json_tokener_state_eatws;
break; break;
} else if(c == '\\') {
} else if(c == '\\' && old_c < 0x80) { /* If the previous byte is not ASCII character, maybe this is a double-byte GBK character, we can skip it. */
printbuf_memappend_fast(tok->pb, case_start, str-case_start); printbuf_memappend_fast(tok->pb, case_start, str-case_start);
saved_state = json_tokener_state_string; saved_state = json_tokener_state_string;
state = json_tokener_state_string_escape; state = json_tokener_state_string_escape;
break; break;
} }

old_c = c;
if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start); printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out; goto out;
@@ -885,6 +889,8 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
{ {
/* Advance until we change state */ /* Advance until we change state */
const char *case_start = str; const char *case_start = str;
/* the previous byte. It's used for identifying rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C) */
unsigned char old_c = '\1';
while(1) { while(1) {
if(c == tok->quote_char) { if(c == tok->quote_char) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start); printbuf_memappend_fast(tok->pb, case_start, str-case_start);
@@ -892,12 +898,14 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
saved_state = json_tokener_state_object_field_end; saved_state = json_tokener_state_object_field_end;
state = json_tokener_state_eatws; state = json_tokener_state_eatws;
break; break;
} else if(c == '\\') {
} else if(c == '\\' && old_c < 0x80) { /* If the previous byte is not ASCII character, maybe this is a double-byte GBK character, we can skip it. */
printbuf_memappend_fast(tok->pb, case_start, str-case_start); printbuf_memappend_fast(tok->pb, case_start, str-case_start);
saved_state = json_tokener_state_object_field; saved_state = json_tokener_state_object_field;
state = json_tokener_state_string_escape; state = json_tokener_state_string_escape;
break; break;
} }

old_c =c;
if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start); printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out; goto out;


Loading…
Cancel
Save