Browse Source

compatible with rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C)

pull/483/head
pj81102 6 years ago
parent
commit
cedb65df43
2 changed files with 16 additions and 3 deletions
  1. +6
    -1
      json_object.c
  2. +10
    -2
      json_tokener.c

+ 6
- 1
json_object.c View File

@@ -107,6 +107,8 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl
{
int pos = 0, start_offset = 0;
unsigned char c;
/* the previous byte. It's used for identifying rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C) */
unsigned char old_c = '\1';
while (len--)
{
c = str[pos];
@@ -135,7 +137,8 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl
else if(c == '\t') printbuf_memappend(pb, "\\t", 2);
else if(c == '\f') printbuf_memappend(pb, "\\f", 2);
else if(c == '"') printbuf_memappend(pb, "\\\"", 2);
else if(c == '\\') printbuf_memappend(pb, "\\\\", 2);
else if(c == '\\' && old_c < 0x80) printbuf_memappend(pb, "\\\\", 2); /* If the previous byte is ASCII character, this byte is escape character. */
else if(c == '\\' && old_c >= 0x80) printbuf_memappend(pb, "\\", 1); /* Else these two bytes are maybe a double-byte GBK character. */
else if(c == '/') printbuf_memappend(pb, "\\/", 2);

start_offset = ++pos;
@@ -157,6 +160,8 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl
} else
pos++;
}

old_c = c;
}
if (pos - start_offset > 0)
printbuf_memappend(pb, str + start_offset, pos - start_offset);


+ 10
- 2
json_tokener.c View File

@@ -533,6 +533,8 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
{
/* Advance until we change state */
const char *case_start = str;
/* the previous byte. It's used for identifying rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C) */
unsigned char old_c = '\1';
while(1) {
if(c == tok->quote_char) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
@@ -542,12 +544,14 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
saved_state = json_tokener_state_finish;
state = json_tokener_state_eatws;
break;
} else if(c == '\\') {
} else if(c == '\\' && old_c < 0x80) { /* If the previous byte is not ASCII character, maybe this is a double-byte GBK character, we can skip it. */
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
saved_state = json_tokener_state_string;
state = json_tokener_state_string_escape;
break;
}

old_c = c;
if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;
@@ -885,6 +889,8 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
{
/* Advance until we change state */
const char *case_start = str;
/* the previous byte. It's used for identifying rarely-used Chinese characters in GBK charset, those characters's the second byte is just the backslash '\'(0x5C) */
unsigned char old_c = '\1';
while(1) {
if(c == tok->quote_char) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
@@ -892,12 +898,14 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
saved_state = json_tokener_state_object_field_end;
state = json_tokener_state_eatws;
break;
} else if(c == '\\') {
} else if(c == '\\' && old_c < 0x80) { /* If the previous byte is not ASCII character, maybe this is a double-byte GBK character, we can skip it. */
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
saved_state = json_tokener_state_object_field;
state = json_tokener_state_string_escape;
break;
}

old_c =c;
if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;


Loading…
Cancel
Save