From 1f46d2f40ff46095203359ab9e1f0fd2ed66fe8f Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Sat, 1 Dec 2018 18:57:22 +0100 Subject: [PATCH 1/8] json_object_private: remove _delete field This field is set based on o_type when the object is created and it is not changed during the lifetime of the object. Therefore we can check o_type to choose the proper delete function in json_object_put(), and save sizeof(void *) bytes in struct json_object_private. --- json_object.c | 23 ++++++++++++++++------- json_object_private.h | 3 --- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/json_object.c b/json_object.c index 344af51..1d9a53c 100644 --- a/json_object.c +++ b/json_object.c @@ -44,7 +44,6 @@ const char *json_number_chars = "0123456789.+-eE"; const char *json_hex_chars = "0123456789abcdefABCDEF"; -static void json_object_generic_delete(struct json_object* jso); static struct json_object* json_object_new(enum json_type o_type); static json_object_to_json_string_fn json_object_object_to_json_string; @@ -54,6 +53,12 @@ static json_object_to_json_string_fn json_object_int_to_json_string; static json_object_to_json_string_fn json_object_string_to_json_string; static json_object_to_json_string_fn json_object_array_to_json_string; +typedef void (json_object_private_delete_fn)(struct json_object *o); + +static json_object_private_delete_fn json_object_string_delete; +static json_object_private_delete_fn json_object_array_delete; +static json_object_private_delete_fn json_object_object_delete; +static json_object_private_delete_fn json_object_generic_delete; /* ref count debugging */ @@ -205,7 +210,16 @@ int json_object_put(struct json_object *jso) if (jso->_user_delete) jso->_user_delete(jso, jso->_userdata); - jso->_delete(jso); + + if (jso->o_type == json_type_string) + json_object_string_delete(jso); + else if (jso->o_type == json_type_array) + json_object_array_delete(jso); + else if (jso->o_type == json_type_object) + json_object_object_delete(jso); + else + json_object_generic_delete(jso); + return 1; } @@ -232,7 +246,6 @@ static struct json_object* json_object_new(enum json_type o_type) return NULL; jso->o_type = o_type; jso->_ref_count = 1; - jso->_delete = &json_object_generic_delete; #ifdef REFCOUNT_DEBUG lh_table_insert(json_object_table, jso, jso); MC_DEBUG("json_object_new_%s: %p\n", json_type_to_name(jso->o_type), jso); @@ -441,7 +454,6 @@ struct json_object* json_object_new_object(void) struct json_object *jso = json_object_new(json_type_object); if (!jso) return NULL; - jso->_delete = &json_object_object_delete; jso->_to_json_string = &json_object_object_to_json_string; jso->o.c_object = lh_kchar_table_new(JSON_OBJECT_DEF_HASH_ENTRIES, &json_object_lh_entry_free); @@ -1020,7 +1032,6 @@ struct json_object* json_object_new_string(const char *s) struct json_object *jso = json_object_new(json_type_string); if (!jso) return NULL; - jso->_delete = &json_object_string_delete; jso->_to_json_string = &json_object_string_to_json_string; jso->o.c_string.len = strlen(s); if(jso->o.c_string.len < LEN_DIRECT_STRING_DATA) { @@ -1043,7 +1054,6 @@ struct json_object* json_object_new_string_len(const char *s, const int len) struct json_object *jso = json_object_new(json_type_string); if (!jso) return NULL; - jso->_delete = &json_object_string_delete; jso->_to_json_string = &json_object_string_to_json_string; if(len < LEN_DIRECT_STRING_DATA) { dstbuf = jso->o.c_string.str.data; @@ -1172,7 +1182,6 @@ struct json_object* json_object_new_array(void) struct json_object *jso = json_object_new(json_type_array); if (!jso) return NULL; - jso->_delete = &json_object_array_delete; jso->_to_json_string = &json_object_array_to_json_string; jso->o.c_array = array_list_new(&json_object_array_entry_free); if(jso->o.c_array == NULL) diff --git a/json_object_private.h b/json_object_private.h index 4c6681a..d964b16 100644 --- a/json_object_private.h +++ b/json_object_private.h @@ -22,13 +22,10 @@ extern "C" { #define LEN_DIRECT_STRING_DATA 32 /**< how many bytes are directly stored in json_object for strings? */ -typedef void (json_object_private_delete_fn)(struct json_object *o); - struct json_object { enum json_type o_type; uint32_t _ref_count; - json_object_private_delete_fn *_delete; json_object_to_json_string_fn *_to_json_string; struct printbuf *_pb; union data { From ab3e40b37c482f049801fa47f153289111801cf8 Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Sun, 2 Dec 2018 00:23:30 +0100 Subject: [PATCH 2/8] json_object_deep_copy: fix deep copy of strings containing '\0' --- json_object.c | 2 +- tests/test_deep_copy.c | 1 + tests/test_deep_copy.expected | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/json_object.c b/json_object.c index 1d9a53c..e3c5744 100644 --- a/json_object.c +++ b/json_object.c @@ -1388,7 +1388,7 @@ int json_c_shallow_copy_default(json_object *src, json_object *parent, const cha break; case json_type_string: - *dst = json_object_new_string(get_string_component(src)); + *dst = json_object_new_string_len(get_string_component(src), src->o.c_string.len); break; case json_type_object: diff --git a/tests/test_deep_copy.c b/tests/test_deep_copy.c index 7a6e63f..53b8a84 100644 --- a/tests/test_deep_copy.c +++ b/tests/test_deep_copy.c @@ -18,6 +18,7 @@ static const char *json_str1 = " \"GlossDiv\": {" " \"title\": \"S\"," " \"null_obj\": null, " +" \"null_str\": \" \\u0000 \"," " \"GlossList\": {" " \"GlossEntry\": {" " \"ID\": \"SGML\"," diff --git a/tests/test_deep_copy.expected b/tests/test_deep_copy.expected index d009e94..ef65b64 100644 --- a/tests/test_deep_copy.expected +++ b/tests/test_deep_copy.expected @@ -13,6 +13,7 @@ Printing JSON objects for visual inspection "GlossDiv":{ "title":"S", "null_obj":null, + "null_str":" \u0000 ", "GlossList":{ "GlossEntry":{ "ID":"SGML", From 158c248d5cdee5ffacc3f35e567682021b78c31c Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Sat, 8 Dec 2018 19:07:49 +0100 Subject: [PATCH 3/8] json_tokener: optimize check for whitespace characters speedup for 32-bit: ~15% speedup for 64-bit: ~ 2% --- json_tokener.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/json_tokener.c b/json_tokener.c index 561f730..d168e3f 100644 --- a/json_tokener.c +++ b/json_tokener.c @@ -50,6 +50,22 @@ # error You do not have strncasecmp on your system. #endif /* HAVE_STRNCASECMP */ +/* The following helper functions are used to speed up parsing. They + * are faster than their ctype counterparts because they assume that + * the input is in ASCII and that the locale is set to "C". The + * compiler will also inline these functions, providing an additional + * speedup by saving on function calls. + */ +static int is_ws_char(char c) +{ + return c == ' ' + || c == '\t' + || c == '\n' + || c == '\v' + || c == '\f' + || c == '\r'; +} + /* Use C99 NAN by default; if not available, nan("") should work too. */ #ifndef NAN #define NAN nan("") @@ -295,7 +311,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, case json_tokener_state_eatws: /* Advance until we change state */ - while (isspace((unsigned char)c)) { + while (is_ws_char(c)) { if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok))) goto out; } From 45c601bfa488c35cdd953c6fbdb3ffa1a7c90abf Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Sat, 8 Dec 2018 19:13:25 +0100 Subject: [PATCH 4/8] json_tokener: optimize check for hex characters speedup for 32-bit: ~1% speedup for 64-bit: ~1% --- json_tokener.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/json_tokener.c b/json_tokener.c index d168e3f..b3b28b6 100644 --- a/json_tokener.c +++ b/json_tokener.c @@ -66,6 +66,13 @@ static int is_ws_char(char c) || c == '\r'; } +static int is_hex_char(char c) +{ + return (c >= '0' && c <= '9') + || (c >= 'A' && c <= 'F') + || (c >= 'a' && c <= 'f'); +} + /* Use C99 NAN by default; if not available, nan("") should work too. */ #ifndef NAN #define NAN nan("") @@ -609,7 +616,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, /* Handle a 4-byte sequence, or two sequences if a surrogate pair */ while(1) { - if (c && strchr(json_hex_chars, c)) { + if (c && is_hex_char(c)) { tok->ucs_char += ((unsigned int)jt_hexdigit(c) << ((3-tok->st_pos++)*4)); if(tok->st_pos == 4) { unsigned char unescaped_utf[4]; @@ -641,7 +648,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, got_hi_surrogate = tok->ucs_char; /* Not at end, and the next two chars should be "\u" */ if ((len == -1 || len > (tok->char_offset + 2)) && - // str[0] != '0' && // implied by json_hex_chars, above. + // str[0] != '0' && // implied by is_hex_char, above. (str[1] == '\\') && (str[2] == 'u')) { From d98fc501fb69df7612d22c19ea1bc2315fbc8151 Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Sat, 8 Dec 2018 19:17:25 +0100 Subject: [PATCH 5/8] json_tokener: optimize check for number characters speedup for 32-bit: ~5% speedup for 64-bit: ~3% --- json_object.c | 1 - json_object_private.h | 1 - json_tokener.c | 12 +++++++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/json_object.c b/json_object.c index e3c5744..cea5245 100644 --- a/json_object.c +++ b/json_object.c @@ -41,7 +41,6 @@ // Don't define this. It's not thread-safe. /* #define REFCOUNT_DEBUG 1 */ -const char *json_number_chars = "0123456789.+-eE"; const char *json_hex_chars = "0123456789abcdefABCDEF"; static struct json_object* json_object_new(enum json_type o_type); diff --git a/json_object_private.h b/json_object_private.h index d964b16..a023a85 100644 --- a/json_object_private.h +++ b/json_object_private.h @@ -51,7 +51,6 @@ struct json_object void _json_c_set_last_err(const char *err_fmt, ...); -extern const char *json_number_chars; extern const char *json_hex_chars; #ifdef __cplusplus diff --git a/json_tokener.c b/json_tokener.c index b3b28b6..b5fb210 100644 --- a/json_tokener.c +++ b/json_tokener.c @@ -73,6 +73,16 @@ static int is_hex_char(char c) || (c >= 'a' && c <= 'f'); } +static int is_number_char(char c) +{ + return (c >= '0' && c <= '9') + || c == '.' + || c == '+' + || c == '-' + || c == 'e' + || c == 'E'; +} + /* Use C99 NAN by default; if not available, nan("") should work too. */ #ifndef NAN #define NAN nan("") @@ -757,7 +767,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, int case_len=0; int is_exponent=0; int negativesign_next_possible_location=1; - while(c && strchr(json_number_chars, c)) { + while(c && is_number_char(c)) { ++case_len; /* non-digit characters checks */ From c9a0ac5886ccdb79457d89e5d197fa041195df04 Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Sat, 8 Dec 2018 19:28:46 +0100 Subject: [PATCH 6/8] json_tokener: optimize parsing of integer values speedup for 32-bit: ~8% speedup for 64-bit: ~9% --- json_tokener.c | 2 +- json_util.c | 52 ++++++++++++++++++++++++++++++++++++++------------ json_util.h | 1 + 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/json_tokener.c b/json_tokener.c index b5fb210..6fc4937 100644 --- a/json_tokener.c +++ b/json_tokener.c @@ -824,7 +824,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok, { int64_t num64; double numd; - if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) { + if (!tok->is_double && json_parse_sanitized_int64(tok->pb->buf, tok->pb->bpos, &num64) == 0) { if (num64 && tok->pb->buf[0]=='0' && (tok->flags & JSON_TOKENER_STRICT)) { /* in strict mode, number must not start with 0 */ diff --git a/json_util.c b/json_util.c index ad7704a..d36ef6f 100644 --- a/json_util.c +++ b/json_util.c @@ -39,10 +39,6 @@ #endif /* HAVE_UNISTD_H */ #ifdef WIN32 -# if MSC_VER < 1800 -/* strtoll is available only since Visual Studio 2013 */ -# define strtoll _strtoi64 -# endif # define WIN32_LEAN_AND_MEAN # include # include @@ -195,16 +191,48 @@ int json_parse_double(const char *buf, double *retval) return end == buf ? 1 : 0; } +// The input buffer 'buf' must contain only digits (0 to 9), except +// for the first character, which may be a negative sign '-'. +int json_parse_sanitized_int64(const char *buf, size_t len, int64_t *retval) +{ + uint64_t uval = 0; + int is_negative = (*buf == '-'); + size_t ii = is_negative ? 1 : 0; + + if (ii == len || buf[ii] == '\0') + return 1; + + while (ii < len) + { + uint64_t tmp = (uval * 10) + buf[ii++] - '0'; + // Check for overflow. + if ((int64_t) uval > (int64_t) tmp) + { + *retval = is_negative ? INT64_MIN : INT64_MAX; + return 0; + } + uval = tmp; + } + + *retval = is_negative ? -uval : uval; + + return 0; +} + int json_parse_int64(const char *buf, int64_t *retval) { - char *end = NULL; - int64_t val; - - errno = 0; - val = strtoll(buf, &end, 10); - if (end != buf) - *retval = val; - return ((val == 0 && errno != 0) || (end == buf)) ? 1 : 0; + size_t len = 0; + // Skip leading white spaces. + while (isspace(*buf)) + buf++; + // Calculate length of valid input. + if (buf[len] == '-') + len++; + while (buf[len] >= '0' && buf[len] <= '9') + len++; + if (len == 0) + return 1; + return json_parse_sanitized_int64(buf, len, retval); } #ifndef HAVE_REALLOC diff --git a/json_util.h b/json_util.h index 3e1b294..bd7f970 100644 --- a/json_util.h +++ b/json_util.h @@ -90,6 +90,7 @@ extern int json_object_to_fd(int fd, struct json_object *obj, int flags); const char *json_util_get_last_err(void); +extern int json_parse_sanitized_int64(const char *buf, size_t len, int64_t *retval); extern int json_parse_int64(const char *buf, int64_t *retval); extern int json_parse_double(const char *buf, double *retval); From 906188e1cfb4a8b64928dc1d872e5aa05ceea87f Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Sat, 8 Dec 2018 22:14:41 +0100 Subject: [PATCH 7/8] json_object: speed up creation of objects Instead of using calloc(), call malloc() and initialize the relevant fields individually. speedup for 32-bit: ~15% speedup for 64-bit: ~ 5% --- json_object.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/json_object.c b/json_object.c index cea5245..9dd9c08 100644 --- a/json_object.c +++ b/json_object.c @@ -240,11 +240,14 @@ static struct json_object* json_object_new(enum json_type o_type) { struct json_object *jso; - jso = (struct json_object*)calloc(sizeof(struct json_object), 1); + jso = (struct json_object*)malloc(sizeof(struct json_object)); if (!jso) return NULL; jso->o_type = o_type; jso->_ref_count = 1; + jso->_pb = NULL; + jso->_user_delete = NULL; + jso->_userdata = NULL; #ifdef REFCOUNT_DEBUG lh_table_insert(json_object_table, jso, jso); MC_DEBUG("json_object_new_%s: %p\n", json_type_to_name(jso->o_type), jso); @@ -1034,7 +1037,7 @@ struct json_object* json_object_new_string(const char *s) jso->_to_json_string = &json_object_string_to_json_string; jso->o.c_string.len = strlen(s); if(jso->o.c_string.len < LEN_DIRECT_STRING_DATA) { - memcpy(jso->o.c_string.str.data, s, jso->o.c_string.len); + memcpy(jso->o.c_string.str.data, s, jso->o.c_string.len + 1); } else { jso->o.c_string.str.ptr = strdup(s); if (!jso->o.c_string.str.ptr) From 38a112380b9ff49a34335664cef8ffdb52be7d85 Mon Sep 17 00:00:00 2001 From: Ramiro Polla Date: Sat, 8 Dec 2018 23:30:19 +0100 Subject: [PATCH 8/8] json_object: cleanup of *set_string* functions This commit also has the side-effect that errno is set on failed calls to json_object_set_string(_len). --- json_object.c | 87 +++++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 47 deletions(-) diff --git a/json_object.c b/json_object.c index 9dd9c08..70a0f26 100644 --- a/json_object.c +++ b/json_object.c @@ -1029,49 +1029,45 @@ static void json_object_string_delete(struct json_object* jso) json_object_generic_delete(jso); } -struct json_object* json_object_new_string(const char *s) +static int set_string_len(struct json_object *jso, const char *s, int len) { - struct json_object *jso = json_object_new(json_type_string); - if (!jso) - return NULL; - jso->_to_json_string = &json_object_string_to_json_string; - jso->o.c_string.len = strlen(s); - if(jso->o.c_string.len < LEN_DIRECT_STRING_DATA) { - memcpy(jso->o.c_string.str.data, s, jso->o.c_string.len + 1); - } else { - jso->o.c_string.str.ptr = strdup(s); - if (!jso->o.c_string.str.ptr) + char *dstbuf = NULL; + if (len < LEN_DIRECT_STRING_DATA) + { + dstbuf = jso->o.c_string.str.data; + } + else + { + dstbuf = (char *) malloc(len + 1); + if (dstbuf == NULL) { - json_object_generic_delete(jso); errno = ENOMEM; - return NULL; + return 0; } + jso->o.c_string.str.ptr = dstbuf; } - return jso; + memcpy(dstbuf, (const void *)s, len); + dstbuf[len] = '\0'; + jso->o.c_string.len = len; + return 1; +} + +struct json_object *json_object_new_string(const char *s) +{ + return json_object_new_string_len(s, (int)strlen(s)); } -struct json_object* json_object_new_string_len(const char *s, const int len) +struct json_object *json_object_new_string_len(const char *s, const int len) { - char *dstbuf; struct json_object *jso = json_object_new(json_type_string); if (!jso) return NULL; jso->_to_json_string = &json_object_string_to_json_string; - if(len < LEN_DIRECT_STRING_DATA) { - dstbuf = jso->o.c_string.str.data; - } else { - jso->o.c_string.str.ptr = (char*)malloc(len + 1); - if (!jso->o.c_string.str.ptr) - { - json_object_generic_delete(jso); - errno = ENOMEM; - return NULL; - } - dstbuf = jso->o.c_string.str.ptr; + if (set_string_len(jso, s, len) == 0) + { + json_object_generic_delete(jso); + return NULL; } - memcpy(dstbuf, (const void *)s, len); - dstbuf[len] = '\0'; - jso->o.c_string.len = len; return jso; } @@ -1101,26 +1097,23 @@ int json_object_get_string_len(const struct json_object *jso) } } -int json_object_set_string(json_object* jso, const char* s) { +int json_object_set_string(json_object *jso, const char *s) +{ return json_object_set_string_len(jso, s, (int)(strlen(s))); } -int json_object_set_string_len(json_object* jso, const char* s, int len){ - char *dstbuf; - if (jso==NULL || jso->o_type!=json_type_string) return 0; - if (leno.c_string.str.data; - if (jso->o.c_string.len>=LEN_DIRECT_STRING_DATA) free(jso->o.c_string.str.ptr); - } else { - dstbuf=(char *)malloc(len+1); - if (dstbuf==NULL) return 0; - if (jso->o.c_string.len>=LEN_DIRECT_STRING_DATA) free(jso->o.c_string.str.ptr); - jso->o.c_string.str.ptr=dstbuf; - } - jso->o.c_string.len=len; - memcpy(dstbuf, (const void *)s, len); - dstbuf[len] = '\0'; - return 1; +int json_object_set_string_len(json_object *jso, const char *s, int len) +{ + char *old_ptr = NULL; + int ret; + if (jso == NULL || jso->o_type != json_type_string) + return 0; + if (jso->o.c_string.len >= LEN_DIRECT_STRING_DATA) + old_ptr = jso->o.c_string.str.ptr; + ret = set_string_len(jso, s, len); + if (ret != 0 && old_ptr != NULL) + free(old_ptr); + return ret; } /* json_object_array */