From dbefb48948566227e1fc534c8c80e0965e5d6e48 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 30 Jul 2025 17:39:47 -0700 Subject: [PATCH 1/2] Add tests with current behavior --- tests/test_parse.c | 8 ++++++++ tests/test_parse.expected | 7 ++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/test_parse.c b/tests/test_parse.c index 71b881c..9bcf278 100644 --- a/tests/test_parse.c +++ b/tests/test_parse.c @@ -113,6 +113,9 @@ static void test_basic_parse(void) single_basic_parse("\"\\udd27\"", 0); // Test with a "short" high surrogate single_basic_parse("[9,'\\uDAD", 0); + single_basic_parse("\"[9,'\\uDAD\"", 0); + // Test with a supplemental character that looks like a high surrogate + single_basic_parse("\"\\uD836\\uDE87\"", 0); single_basic_parse("null", 0); single_basic_parse("NaN", 0); single_basic_parse("-NaN", 0); /* non-sensical, returns null */ @@ -332,6 +335,11 @@ struct incremental_step {"{ \"foo", -1, -1, json_tokener_continue, 1, 0}, {": \"bar\"}", -1, 0, json_tokener_error_parse_unexpected, 1, 0}, + /* Check a supplemental code point that looks like a high surrogate */ + {"\"\\uD836", -1, -1, json_tokener_continue, 0, 0}, + {"\\uDE87", -1, -1, json_tokener_continue, 0, 0}, + {"\"", -1, -1, json_tokener_success, 1, 0}, + /* Check incremental parsing with trailing characters */ {"{ \"foo", -1, -1, json_tokener_continue, 0, 0}, {"\": {\"bar", -1, -1, json_tokener_continue, 0, 0}, diff --git a/tests/test_parse.expected b/tests/test_parse.expected index c82cfd1..6f7b8aa 100644 --- a/tests/test_parse.expected +++ b/tests/test_parse.expected @@ -13,6 +13,8 @@ new_obj.to_string("\ud840\u4e16")="�世" new_obj.to_string("\ud840")="�" new_obj.to_string("\udd27")="�" new_obj.to_string([9,'\uDAD)=null +new_obj.to_string("[9,'\uDAD")=null +new_obj.to_string("\uD836\uDE87")="�" new_obj.to_string(null)=null new_obj.to_string(NaN)=NaN new_obj.to_string(-NaN)=null @@ -138,6 +140,9 @@ json_tokener_parse_ex(tok, "ä" , 4) ... OK: got object of type [string json_tokener_parse_ex(tok, "ä" , 4) ... OK: got object of type [string]: "ä" json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue json_tokener_parse_ex(tok, : "bar"} , 8) ... OK: got correct error: unexpected character +json_tokener_parse_ex(tok, "\uD836 , 7) ... OK: got correct error: continue +json_tokener_parse_ex(tok, \uDE87 , 6) ... OK: got correct error: continue +json_tokener_parse_ex(tok, " , 1) ... OK: got object of type [string]: "�" json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue json_tokener_parse_ex(tok, ": {"bar , 8) ... OK: got correct error: continue json_tokener_parse_ex(tok, ":13}}XXXX , 10) ... OK: got object of type [object]: { "foo": { "bar": 13 } } @@ -363,5 +368,5 @@ json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence json_tokener_parse_ex(tok, {"":1} , 7) ... OK: got correct error: invalid string sequence -End Incremental Tests OK=269 ERROR=0 +End Incremental Tests OK=272 ERROR=0 ================================== From 7974657c5699416b1195f77f9571d9f46bf608b8 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 30 Jul 2025 17:40:56 -0700 Subject: [PATCH 2/2] Fix code and update tests --- json_tokener.c | 4 ++-- tests/test_parse.expected | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/json_tokener.c b/json_tokener.c index a6bcbbb..8412eae 100644 --- a/json_tokener.c +++ b/json_tokener.c @@ -145,8 +145,8 @@ enum json_tokener_error json_tokener_get_error(struct json_tokener *tok) } /* Stuff for decoding unicode sequences */ -#define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800) -#define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00) +#define IS_HIGH_SURROGATE(uc) (((uc)&0xFFFFFC00) == 0xD800) +#define IS_LOW_SURROGATE(uc) (((uc)&0xFFFFFC00) == 0xDC00) #define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000) static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD}; diff --git a/tests/test_parse.expected b/tests/test_parse.expected index 6f7b8aa..8d3961f 100644 --- a/tests/test_parse.expected +++ b/tests/test_parse.expected @@ -14,7 +14,7 @@ new_obj.to_string("\ud840")="�" new_obj.to_string("\udd27")="�" new_obj.to_string([9,'\uDAD)=null new_obj.to_string("[9,'\uDAD")=null -new_obj.to_string("\uD836\uDE87")="�" +new_obj.to_string("\uD836\uDE87")="𝪇" new_obj.to_string(null)=null new_obj.to_string(NaN)=NaN new_obj.to_string(-NaN)=null @@ -142,7 +142,7 @@ json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continu json_tokener_parse_ex(tok, : "bar"} , 8) ... OK: got correct error: unexpected character json_tokener_parse_ex(tok, "\uD836 , 7) ... OK: got correct error: continue json_tokener_parse_ex(tok, \uDE87 , 6) ... OK: got correct error: continue -json_tokener_parse_ex(tok, " , 1) ... OK: got object of type [string]: "�" +json_tokener_parse_ex(tok, " , 1) ... OK: got object of type [string]: "𝪇" json_tokener_parse_ex(tok, { "foo , 6) ... OK: got correct error: continue json_tokener_parse_ex(tok, ": {"bar , 8) ... OK: got correct error: continue json_tokener_parse_ex(tok, ":13}}XXXX , 10) ... OK: got object of type [object]: { "foo": { "bar": 13 } }