Browse Source

Merge pull request #893 from sffc/supplemental-code-point-bug

Fix bug involving supplemental code points that look like high surrogates
master
Eric Hawicz GitHub 1 month ago
parent
commit
2372e9518e
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
3 changed files with 16 additions and 3 deletions
  1. +2
    -2
      json_tokener.c
  2. +8
    -0
      tests/test_parse.c
  3. +6
    -1
      tests/test_parse.expected

+ 2
- 2
json_tokener.c View File

@@ -145,8 +145,8 @@ enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
}

/* Stuff for decoding unicode sequences */
#define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)
#define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)
#define IS_HIGH_SURROGATE(uc) (((uc)&0xFFFFFC00) == 0xD800)
#define IS_LOW_SURROGATE(uc) (((uc)&0xFFFFFC00) == 0xDC00)
#define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)
static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};



+ 8
- 0
tests/test_parse.c View File

@@ -113,6 +113,9 @@ static void test_basic_parse(void)
single_basic_parse("\"\\udd27\"", 0);
// Test with a "short" high surrogate
single_basic_parse("[9,'\\uDAD", 0);
single_basic_parse("\"[9,'\\uDAD\"", 0);
// Test with a supplemental character that looks like a high surrogate
single_basic_parse("\"\\uD836\\uDE87\"", 0);
single_basic_parse("null", 0);
single_basic_parse("NaN", 0);
single_basic_parse("-NaN", 0); /* non-sensical, returns null */
@@ -332,6 +335,11 @@ struct incremental_step
{"{ \"foo", -1, -1, json_tokener_continue, 1, 0},
{": \"bar\"}", -1, 0, json_tokener_error_parse_unexpected, 1, 0},

/* Check a supplemental code point that looks like a high surrogate */
{"\"\\uD836", -1, -1, json_tokener_continue, 0, 0},
{"\\uDE87", -1, -1, json_tokener_continue, 0, 0},
{"\"", -1, -1, json_tokener_success, 1, 0},

/* Check incremental parsing with trailing characters */
{"{ \"foo", -1, -1, json_tokener_continue, 0, 0},
{"\": {\"bar", -1, -1, json_tokener_continue, 0, 0},


+ 6
- 1
tests/test_parse.expected View File

@@ -13,6 +13,8 @@ new_obj.to_string("\ud840\u4e16")="�世"
new_obj.to_string("\ud840")="�"
new_obj.to_string("\udd27")="�"
new_obj.to_string([9,'\uDAD)=null
new_obj.to_string("[9,'\uDAD")=null
new_obj.to_string("\uD836\uDE87")="𝪇"
new_obj.to_string(null)=null
new_obj.to_string(NaN)=NaN
new_obj.to_string(-NaN)=null
@@ -138,6 +140,9 @@ json_tokener_parse_ex(tok, "ä" , 4) ... OK: got object of type [string
json_tokener_parse_ex(tok, "ä" , 4) ... OK: got object of type [string]: "ä"
json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue
json_tokener_parse_ex(tok, : "bar"} , 8) ... OK: got correct error: unexpected character
json_tokener_parse_ex(tok, "\uD836 , 7) ... OK: got correct error: continue
json_tokener_parse_ex(tok, \uDE87 , 6) ... OK: got correct error: continue
json_tokener_parse_ex(tok, " , 1) ... OK: got object of type [string]: "𝪇"
json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue
json_tokener_parse_ex(tok, ": {"bar , 8) ... OK: got correct error: continue
json_tokener_parse_ex(tok, ":13}}XXXX , 10) ... OK: got object of type [object]: { "foo": { "bar": 13 } }
@@ -363,5 +368,5 @@ json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid
json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence
json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence
json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence
End Incremental Tests OK=269 ERROR=0
End Incremental Tests OK=272 ERROR=0
==================================

Loading…
Cancel
Save