@@ -26,6 +26,7 @@ | |||
/tests/test_charcase | |||
/tests/test_compare | |||
/tests/test_double_serializer | |||
/tests/test_float | |||
/tests/test_locale | |||
/tests/test_null | |||
/tests/test_parse | |||
@@ -36,6 +37,7 @@ | |||
/tests/test_util_file | |||
/tests/test_visit | |||
/tests/test_json_pointer | |||
/tests/test_utf8 | |||
/tests/*.vg.out | |||
/tests/*.log | |||
/tests/*.trs | |||
@@ -105,13 +105,30 @@ get_string_component(const struct json_object *jso) | |||
static int json_escape_str(struct printbuf *pb, const char *str, int len, int flags) | |||
{ | |||
int pos = 0, start_offset = 0; | |||
int pos = 0, start_offset = 0, utf8_start = 0, utf8_end = 0; | |||
unsigned char c; | |||
while (len--) | |||
{ | |||
c = str[pos]; | |||
switch(c) | |||
{ | |||
if (utf8_end > pos) { | |||
// Expecting a continuation byte. | |||
if (c >= 0x80 && c <= 0xBf) { | |||
// Found the continuation byte. | |||
goto utf8_loop_end; | |||
} else { | |||
// Invalid byte. | |||
if(utf8_start - start_offset > 0) | |||
printbuf_memappend(pb, str + start_offset, utf8_start - start_offset); | |||
printbuf_memappend(pb, "\xEF\xBF\xBD", 3); | |||
start_offset = pos; | |||
utf8_end = pos; // get out of the UTF-8 state | |||
goto utf8_reset; | |||
} | |||
} | |||
utf8_reset: | |||
switch(c) { | |||
case '\b': | |||
case '\n': | |||
case '\r': | |||
@@ -122,7 +139,6 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl | |||
case '/': | |||
if((flags & JSON_C_TO_STRING_NOSLASHESCAPE) && c == '/') | |||
{ | |||
pos++; | |||
break; | |||
} | |||
@@ -138,7 +154,7 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl | |||
else if(c == '\\') printbuf_memappend(pb, "\\\\", 2); | |||
else if(c == '/') printbuf_memappend(pb, "\\/", 2); | |||
start_offset = ++pos; | |||
start_offset = pos + 1; | |||
break; | |||
default: | |||
if(c < ' ') | |||
@@ -150,12 +166,45 @@ static int json_escape_str(struct printbuf *pb, const char *str, int len, int fl | |||
sprintbuf(pb, "\\u00%c%c", | |||
json_hex_chars[c >> 4], | |||
json_hex_chars[c & 0xf]); | |||
start_offset = ++pos; | |||
} else | |||
pos++; | |||
start_offset = pos + 1; | |||
} else if (c >= 0x80) { | |||
// Expecting a start byte. | |||
if (c >= 0xC2 && c <= 0xDF) { | |||
// 2-byte start byte. | |||
utf8_start = pos; | |||
utf8_end = pos + 2; | |||
} else if (c >= 0xE0 && c <= 0xEF) { | |||
// 3-byte start byte. | |||
utf8_start = pos; | |||
utf8_end = pos + 3; | |||
} else if (c >= 0xF0 && c <= 0xF4) { | |||
// 4-byte start byte. | |||
utf8_start = pos; | |||
utf8_end = pos + 4; | |||
} else { | |||
// Invalid byte. | |||
if(pos - start_offset > 0) | |||
printbuf_memappend(pb, | |||
str + start_offset, | |||
pos - start_offset); | |||
printbuf_memappend(pb, "\xEF\xBF\xBD", 3); | |||
start_offset = pos + 1; | |||
} | |||
} else { | |||
// Some other valid ASCII character. | |||
} | |||
break; | |||
} | |||
utf8_loop_end: | |||
pos++; | |||
} | |||
if (utf8_end > pos) { | |||
if(utf8_start - start_offset > 0) | |||
printbuf_memappend(pb, str + start_offset, utf8_start - start_offset); | |||
printbuf_memappend(pb, "\xEF\xBF\xBD", 3); | |||
} | |||
if (pos - start_offset > 0) | |||
else if (pos - start_offset > 0) | |||
printbuf_memappend(pb, str + start_offset, pos - start_offset); | |||
return 0; | |||
} | |||
@@ -25,6 +25,7 @@ TESTS+= test_compare.test | |||
TESTS+= test_set_value.test | |||
TESTS+= test_visit.test | |||
TESTS+= test_json_pointer.test | |||
TESTS+= test_utf8.test | |||
check_PROGRAMS= | |||
check_PROGRAMS += $(TESTS:.test=) | |||
@@ -0,0 +1,73 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include "json.h" | |||
int main() { | |||
const char inputs[][20] = { | |||
"\0", // empty string | |||
"AbC;dE\0", // ASCII string | |||
"\xE2\x82\xAC\0", // A single valid UTF-8 | |||
"Ab\xE2\x82\xAC;dE\0", // Valid UTF-8 in context | |||
"Ab\xFF;dE\0", // One illegal byte | |||
"Ab\xE2;dE\0", // One invalid start byte | |||
"Ab\xE2\xE2;dE\0", // Two invalid start bytes | |||
"Ab\xE2\xE2\xE2;dE\0", // Three invalid start bytes | |||
"Ab\xE2\xE2...\xE2;dE\0", // Two disjoint invalid sequences | |||
"Ab\xE2\x82\xFF;dE\0", // First two bytes are OK but not the third | |||
"Ab\xE2\x82\xFF\xE2;dE\0", // Like above but with another start byte | |||
"\xE2\0", // A start byte that "overhangs" the end | |||
"A\xFD\0", // Normal ASCII character with invalid byte at end | |||
}; | |||
const char outputs[][30] = { | |||
"\"\"\0", | |||
"\"AbC;dE\"\0", | |||
"\"\xE2\x82\xAC\"\0", | |||
"\"Ab\xE2\x82\xAC;dE\"\0", | |||
"\"Ab\xEF\xBF\xBD;dE\"\0", | |||
"\"Ab\xEF\xBF\xBD;dE\"\0", | |||
"\"Ab\xEF\xBF\xBD\xEF\xBF\xBD;dE\"\0", | |||
"\"Ab\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD;dE\"\0", | |||
"\"Ab\xEF\xBF\xBD\xEF\xBF\xBD...\xEF\xBF\xBD;dE\"\0", | |||
"\"Ab\xEF\xBF\xBD\xEF\xBF\xBD;dE\"\0", | |||
"\"Ab\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD;dE\"\0", | |||
"\"\xEF\xBF\xBD\"", | |||
"\"A\xEF\xBF\xBD\"", | |||
}; | |||
const size_t num_cases = 13; | |||
int errcode = 0; | |||
for (size_t i=0; i<num_cases; i++) { | |||
const char* in = inputs[i]; | |||
const char* expected = outputs[i]; | |||
const size_t expected_len = strlen(expected); | |||
json_object* strobj = json_object_new_string(in); | |||
const char* actual = json_object_to_json_string(strobj); | |||
size_t actual_len = strlen(actual); | |||
if (expected_len != actual_len) { | |||
printf("FAIL ON CASE %d: expected length %d but got %d\n", | |||
(int)i, (int)expected_len, (int)actual_len); | |||
printf("%s\n", actual); | |||
errcode = 1; | |||
goto cleanup; | |||
} | |||
if (memcmp(expected, actual, actual_len) != 0) { | |||
printf("FAIL ON CASE %d: expected '%s' but got '%s'\n", | |||
(int)i, expected, actual); | |||
errcode = 2; | |||
goto cleanup; | |||
} | |||
printf("PASS CASE %d\n", (int)i); | |||
cleanup: | |||
json_object_put(strobj); | |||
} | |||
return errcode; | |||
} |
@@ -0,0 +1,13 @@ | |||
PASS CASE 0 | |||
PASS CASE 1 | |||
PASS CASE 2 | |||
PASS CASE 3 | |||
PASS CASE 4 | |||
PASS CASE 5 | |||
PASS CASE 6 | |||
PASS CASE 7 | |||
PASS CASE 8 | |||
PASS CASE 9 | |||
PASS CASE 10 | |||
PASS CASE 11 | |||
PASS CASE 12 |
@@ -0,0 +1,12 @@ | |||
#!/bin/sh | |||
# Common definitions | |||
if test -z "$srcdir"; then | |||
srcdir="${0%/*}" | |||
test "$srcdir" = "$0" && srcdir=. | |||
test -z "$srcdir" && srcdir=. | |||
fi | |||
. "$srcdir/test-defs.sh" | |||
run_output_test test_utf8 | |||
exit $? |