Browse Source

optimizations to json_tokener_parse_ex(), printbuf_memappend()

-- Brent Miller, bdmiller at yahoo dash inc dot com


git-svn-id: http://svn.metaparadigm.com/svn/json-c/trunk@34 327403b1-1117-474d-bef2-5cb71233fd97
tags/json-c-0.10-20120530
Michael Clark 16 years ago
parent
commit
95f55a761c
4 changed files with 226 additions and 98 deletions
  1. +2
    -0
      ChangeLog
  2. +197
    -92
      json_tokener.c
  3. +9
    -6
      printbuf.c
  4. +18
    -0
      printbuf.h

+ 2
- 0
ChangeLog View File

@@ -1,4 +1,6 @@
0.9
* optimizations to json_tokener_parse_ex(), printbuf_memappend()
Brent Miller, bdmiller at yahoo dash inc dot com
* Don't use this as a variable, so we can compile with a C++ compiler
* Add casts from void* to type of assignment when using malloc
* Add #ifdef __cplusplus guards to all of the headers


+ 197
- 92
json_tokener.c View File

@@ -7,6 +7,10 @@
* This library is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See COPYING for details.
*
*
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
* The copyrights to the contents of this file are licensed under the MIT License
* (http://www.opensource.org/licenses/mit-license.php)
*/

#include "config.h"
@@ -135,35 +139,68 @@ char* strndup(const char* str, size_t n)
#define current tok->stack[tok->depth].current
#define obj_field_name tok->stack[tok->depth].obj_field_name

/* Optimization:
* json_tokener_parse_ex() consumed a lot of CPU in its main loop,
* iterating character-by character. A large performance boost is
* achieved by using tighter loops to locally handle units such as
* comments and strings. Loops that handle an entire token within
* their scope also gather entire strings and pass them to
* printbuf_memappend() in a single call, rather than calling
* printbuf_memappend() one char at a time.
*
* POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
* common to both the main loop and the tighter loops.
*/

/* POP_CHAR(dest, tok) macro:
* Not really a pop()...peeks at the current char and stores it in dest.
* Returns 1 on success, sets tok->err and returns 0 if no more chars.
* Implicit inputs: str, len vars
*/
#define POP_CHAR(dest, tok) \
(((tok)->char_offset == len) ? \
(((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
(((tok)->err = json_tokener_success), 0) \
: \
(((tok)->err = json_tokener_continue), 0) \
) : \
(((dest) = *str), 1) \
)
/* ADVANCE_CHAR() macro:
* Incrementes str & tok->char_offset.
* For convenience of existing conditionals, returns the old value of c (0 on eof)
* Implicit inputs: c var
*/
#define ADVANCE_CHAR(str, tok) \
( ++(str), ((tok)->char_offset)++, c)

/* End optimization macro defs */


struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
char *str, int len)
{
struct json_object *obj = NULL;
char c;
char c = '\1';

tok->char_offset = 0;
tok->err = json_tokener_success;

do {
if(tok->char_offset == len) {
if(tok->depth == 0 && state == json_tokener_state_eatws &&
saved_state == json_tokener_state_finish)
tok->err = json_tokener_success;
else
tok->err = json_tokener_continue;
goto out;
}
while (POP_CHAR(c, tok)) {

c = *str;
redo_char:
switch(state) {

case json_tokener_state_eatws:
if(isspace(c)) {
/* okay */
} else if(c == '/') {
/* Advance until we change state */
while (isspace(c)) {
if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
goto out;
}
if(c == '/') {
printbuf_reset(tok->pb);
printbuf_memappend(tok->pb, &c, 1);
printbuf_memappend_fast(tok->pb, &c, 1);
state = json_tokener_state_comment_start;
} else {
state = saved_state;
@@ -236,7 +273,7 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
goto redo_char;

case json_tokener_state_null:
printbuf_memappend(tok->pb, &c, 1);
printbuf_memappend_fast(tok->pb, &c, 1);
if(strncasecmp(json_null_str, tok->pb->buf,
min(tok->st_pos+1, strlen(json_null_str))) == 0) {
if(tok->st_pos == strlen(json_null_str)) {
@@ -261,25 +298,42 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
tok->err = json_tokener_error_parse_comment;
goto out;
}
printbuf_memappend(tok->pb, &c, 1);
printbuf_memappend_fast(tok->pb, &c, 1);
break;

case json_tokener_state_comment:
if(c == '*') state = json_tokener_state_comment_end;
printbuf_memappend(tok->pb, &c, 1);
break;
{
/* Advance until we change state */
char *case_start = str;
while(c != '*') {
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;
}
}
printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
state = json_tokener_state_comment_end;
}
break;

case json_tokener_state_comment_eol:
if(c == '\n') {
{
/* Advance until we change state */
char *case_start = str;
while(c != '\n') {
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;
}
}
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
state = json_tokener_state_eatws;
} else {
printbuf_memappend(tok->pb, &c, 1);
}
break;

case json_tokener_state_comment_end:
printbuf_memappend(tok->pb, &c, 1);
printbuf_memappend_fast(tok->pb, &c, 1);
if(c == '/') {
MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
state = json_tokener_state_eatws;
@@ -289,15 +343,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break;

case json_tokener_state_string:
if(c == tok->quote_char) {
current = json_object_new_string(tok->pb->buf);
saved_state = json_tokener_state_finish;
state = json_tokener_state_eatws;
} else if(c == '\\') {
saved_state = json_tokener_state_string;
state = json_tokener_state_string_escape;
} else {
printbuf_memappend(tok->pb, &c, 1);
{
/* Advance until we change state */
char *case_start = str;
while(1) {
if(c == tok->quote_char) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
current = json_object_new_string(tok->pb->buf);
saved_state = json_tokener_state_finish;
state = json_tokener_state_eatws;
break;
} else if(c == '\\') {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
saved_state = json_tokener_state_string;
state = json_tokener_state_string_escape;
break;
}
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;
}
}
}
break;

@@ -306,17 +372,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
case '"':
case '\\':
case '/':
printbuf_memappend(tok->pb, &c, 1);
printbuf_memappend_fast(tok->pb, &c, 1);
state = saved_state;
break;
case 'b':
case 'n':
case 'r':
case 't':
if(c == 'b') printbuf_memappend(tok->pb, "\b", 1);
else if(c == 'n') printbuf_memappend(tok->pb, "\n", 1);
else if(c == 'r') printbuf_memappend(tok->pb, "\r", 1);
else if(c == 't') printbuf_memappend(tok->pb, "\t", 1);
if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
state = saved_state;
break;
case 'u':
@@ -331,33 +397,46 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break;

case json_tokener_state_escape_unicode:
if(strchr(json_hex_chars, c)) {
tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
if(tok->st_pos == 4) {
unsigned char utf_out[3];
if (tok->ucs_char < 0x80) {
utf_out[0] = tok->ucs_char;
printbuf_memappend(tok->pb, (char*)utf_out, 1);
} else if (tok->ucs_char < 0x800) {
utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
printbuf_memappend(tok->pb, (char*)utf_out, 2);
} else {
utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
printbuf_memappend(tok->pb, (char*)utf_out, 3);
}
state = saved_state;
/* Note that the following code is inefficient for handling large
* chunks of extended chars, calling printbuf_memappend() once
* for each multi-byte character of input.
* This is a good area for future optimization.
*/
{
/* Advance until we change state */
while(1) {
if(strchr(json_hex_chars, c)) {
tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
if(tok->st_pos == 4) {
unsigned char utf_out[3];
if (tok->ucs_char < 0x80) {
utf_out[0] = tok->ucs_char;
printbuf_memappend_fast(tok->pb, (char*)utf_out, 1);
} else if (tok->ucs_char < 0x800) {
utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
printbuf_memappend_fast(tok->pb, (char*)utf_out, 2);
} else {
utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
printbuf_memappend_fast(tok->pb, (char*)utf_out, 3);
}
state = saved_state;
break;
}
} else {
tok->err = json_tokener_error_parse_string;
goto out;
}
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok))
goto out;
}
} else {
tok->err = json_tokener_error_parse_string;
goto out;
}
break;

case json_tokener_state_boolean:
printbuf_memappend(tok->pb, &c, 1);
printbuf_memappend_fast(tok->pb, &c, 1);
if(strncasecmp(json_true_str, tok->pb->buf,
min(tok->st_pos+1, strlen(json_true_str))) == 0) {
if(tok->st_pos == strlen(json_true_str)) {
@@ -382,23 +461,35 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break;

case json_tokener_state_number:
if(c && strchr(json_number_chars, c)) {
printbuf_memappend(tok->pb, &c, 1);
if(c == '.' || c == 'e' || c == 'E') tok->is_double = 1;
} else {
int numi;
double numd;
if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
current = json_object_new_int(numi);
} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
current = json_object_new_double(numd);
} else {
tok->err = json_tokener_error_parse_number;
goto out;
{
/* Advance until we change state */
char *case_start = str;
int case_len=0;
while(c && strchr(json_number_chars, c)) {
++case_len;
if(c == '.' || c == 'e') tok->is_double = 1;
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, case_len);
goto out;
}
}
saved_state = json_tokener_state_finish;
state = json_tokener_state_eatws;
goto redo_char;
if (case_len>0)
printbuf_memappend_fast(tok->pb, case_start, case_len);
}
{
int numi;
double numd;
if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
current = json_object_new_int(numi);
} else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
current = json_object_new_double(numd);
} else {
tok->err = json_tokener_error_parse_number;
goto out;
}
saved_state = json_tokener_state_finish;
state = json_tokener_state_eatws;
goto redo_char;
}
break;

@@ -452,15 +543,27 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break;

case json_tokener_state_object_field:
if(c == tok->quote_char) {
obj_field_name = strdup(tok->pb->buf);
saved_state = json_tokener_state_object_field_end;
state = json_tokener_state_eatws;
} else if(c == '\\') {
saved_state = json_tokener_state_object_field;
state = json_tokener_state_string_escape;
} else {
printbuf_memappend(tok->pb, &c, 1);
{
/* Advance until we change state */
char *case_start = str;
while(1) {
if(c == tok->quote_char) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
obj_field_name = strdup(tok->pb->buf);
saved_state = json_tokener_state_object_field_end;
state = json_tokener_state_eatws;
break;
} else if(c == '\\') {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
saved_state = json_tokener_state_object_field;
state = json_tokener_state_string_escape;
break;
}
if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
printbuf_memappend_fast(tok->pb, case_start, str-case_start);
goto out;
}
}
}
break;

@@ -506,15 +609,17 @@ struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
break;

}
str++;
tok->char_offset++;
} while(c);

if(state != json_tokener_state_finish &&
saved_state != json_tokener_state_finish)
tok->err = json_tokener_error_parse_eof;
if (!ADVANCE_CHAR(str, tok))
goto out;
} /* while(POP_CHAR) */

out:
if (!c) { /* We hit an eof char (0) */
if(state != json_tokener_state_finish &&
saved_state != json_tokener_state_finish)
tok->err = json_tokener_error_parse_eof;
}

if(tok->err == json_tokener_success) return json_object_get(current);
MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
json_tokener_errors[tok->err], tok->char_offset);


+ 9
- 6
printbuf.c View File

@@ -7,6 +7,10 @@
* This library is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See COPYING for details.
*
*
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
* The copyrights to the contents of this file are licensed under the MIT License
* (http://www.opensource.org/licenses/mit-license.php)
*/

#include "config.h"
@@ -118,16 +122,15 @@ int sprintbuf(struct printbuf *p, const char *msg, ...)
if output is truncated whereas some return the number of bytes that
would have been writen - this code handles both cases. */
if(size == -1 || size > 127) {
int ret;
va_start(ap, msg);
size = vasprintf(&t, msg, ap);
if((size = vasprintf(&t, msg, ap)) == -1) return -1;
va_end(ap);
if(size == -1) return -1;
ret = printbuf_memappend(p, t, size);
printbuf_memappend(p, t, size);
free(t);
return ret;
return size;
} else {
return printbuf_memappend(p, buf, size);
printbuf_memappend(p, buf, size);
return size;
}
}



+ 18
- 0
printbuf.h View File

@@ -7,6 +7,10 @@
* This library is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See COPYING for details.
*
*
* Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
* The copyrights to the contents of this file are licensed under the MIT License
* (http://www.opensource.org/licenses/mit-license.php)
*/

#ifndef _printbuf_h_
@@ -27,9 +31,23 @@ struct printbuf {
extern struct printbuf*
printbuf_new(void);

/* As an optimization, printbuf_memappend is defined as a macro that
* handles copying data if the buffer is large enough; otherwise it
* invokes printbuf_memappend_real() which performs the heavy lifting
* of realloc()ing the buffer and copying data.
*/
extern int
printbuf_memappend(struct printbuf *p, const char *buf, int size);

#define printbuf_memappend_fast(p, bufptr, bufsize) \
do { \
if ((p->size - p->bpos) > bufsize) { \
memcpy(p->buf + p->bpos, (bufptr), bufsize); \
p->bpos += bufsize; \
p->buf[p->bpos]= '\0'; \
} else { printbuf_memappend(p, (bufptr), bufsize); } \
} while (0)

extern int
sprintbuf(struct printbuf *p, const char *msg, ...);



Loading…
Cancel
Save