Skip to content

Commit

Permalink
Add optional C extension wrapper for Python JSON parsing
Browse files Browse the repository at this point in the history
The pure Python in-tree JSON parser is *much* slower than the
in-tree C JSON parser. A local test parsing a 100Mb JSON file
showed the Python version taking 270 seconds. With the C wrapper,
it took under 4 seconds.

The C extension will be used automatically if it can be built. If
the extension fails to build, a warning is displayed and the build
is restarted without the extension.

The Serializer class is replaced with Python's built-in
JSON library since the ability to process chunked data is not
needed in that case.

The extension should work with both Python 2.7 and Python 3.3+.

Signed-off-by: Terry Wilson <[email protected]>
Signed-off-by: Ben Pfaff <[email protected]>
  • Loading branch information
otherwiseguy authored and blp committed Jun 8, 2016
1 parent 2c362f1 commit c63b04d
Show file tree
Hide file tree
Showing 5 changed files with 332 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ config-h-check:
@cd $(srcdir); \
if test -e .git && (git --version) >/dev/null 2>&1 && \
git --no-pager grep -L '#include <config\.h>' `git ls-files | grep '\.c$$' | \
grep -vE '^datapath|^lib/sflow|^third-party|^datapath-windows'`; \
grep -vE '^datapath|^lib/sflow|^third-party|^datapath-windows|^python'`; \
then \
echo "See above for list of violations of the rule that"; \
echo "every C source file must #include <config.h>."; \
Expand Down
3 changes: 3 additions & 0 deletions python/automake.mk
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ EXTRA_DIST += \
python/README.rst \
python/setup.py

# C extension support.
EXTRA_DIST += python/ovs/_json.c

PYFILES = $(ovs_pyfiles) python/ovs/dirs.py $(ovstest_pyfiles)
EXTRA_DIST += $(PYFILES)
PYCOV_CLEAN_FILES += $(PYFILES:.py=.py,cover)
Expand Down
268 changes: 268 additions & 0 deletions python/ovs/_json.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
#include "Python.h"
#include <openvswitch/lib/json.h>
#include "structmember.h"

#if PY_MAJOR_VERSION >= 3
#define IS_PY3K
#endif

typedef struct {
PyObject_HEAD
struct json_parser *_parser;
} json_ParserObject;

static void
Parser_dealloc(json_ParserObject * p)
{
json_parser_abort(p->_parser);
Py_TYPE(p)->tp_free(p);
}

static PyObject *
Parser_new(PyTypeObject * type, PyObject * args, PyObject * kwargs)
{
json_ParserObject *self;
static char *kwlist[] = { "check_trailer", NULL };
PyObject *check_trailer = NULL;
int ct_int = 0;

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O", kwlist,
&check_trailer)) {
return NULL;
}

if (check_trailer != NULL) {
ct_int = PyObject_IsTrue(check_trailer);
if (ct_int < 0) {
return NULL;
} else if (ct_int) {
ct_int = JSPF_TRAILER;
}
}

self = (json_ParserObject *) type->tp_alloc(type, 0);
if (self != NULL) {
self->_parser = json_parser_create(ct_int);
}

return (PyObject *) self;
}

static PyObject *
Parser_feed(json_ParserObject * self, PyObject * args)
{
Py_ssize_t input_sz;
PyObject *input;
size_t rd;
char *input_str;

if (self->_parser == NULL) {
return NULL;
}

if (!PyArg_UnpackTuple(args, "input", 1, 1, &input)) {
return NULL;
}
#ifdef IS_PY3K
if ((input_str = PyUnicode_AsUTF8AndSize(input, &input_sz)) == NULL) {
#else
if (PyString_AsStringAndSize(input, &input_str, &input_sz) < 0) {
#endif
return NULL;
}

rd = json_parser_feed(self->_parser, input_str, (size_t) input_sz);

#ifdef IS_PY3K
return PyLong_FromSize_t(rd);
#else
return PyInt_FromSize_t(rd);
#endif
}

static PyObject *
Parser_is_done(json_ParserObject * self)
{
if (self->_parser == NULL) {
return NULL;
}
return PyBool_FromLong(json_parser_is_done(self->_parser));
}

static PyObject *
json_to_python(struct json *json)
{
switch (json->type) {
case JSON_NULL:
Py_RETURN_NONE;
case JSON_FALSE:
Py_RETURN_FALSE;
case JSON_TRUE:
Py_RETURN_TRUE;
case JSON_OBJECT:{
struct shash_node *node;
PyObject *dict = PyDict_New();

if (dict == NULL) {
return PyErr_NoMemory();
}
SHASH_FOR_EACH(node, json->u.object) {
PyObject *key = PyUnicode_FromString(node->name);
PyObject *val = json_to_python(node->data);

if (!(key && val) || PyDict_SetItem(dict, key, val)) {
Py_XDECREF(key);
Py_XDECREF(val);
Py_XDECREF(dict);
return NULL;
}

Py_XDECREF(key);
Py_XDECREF(val);
}
return dict;
}
case JSON_ARRAY:{
int i;
PyObject *arr = PyList_New(json->u.array.n);

if (arr == NULL) {
return PyErr_NoMemory();
}
for (i = 0; i < json->u.array.n; i++) {
PyObject *item = json_to_python(json->u.array.elems[i]);

if (!item || PyList_SetItem(arr, i, item)) {
Py_XDECREF(arr);
return NULL;
}
}
return arr;
}
case JSON_REAL:
if (json->u.real != 0) {
return PyFloat_FromDouble(json->u.real);
} /* fall through to treat 0 as int */
case JSON_INTEGER:
#ifdef IS_PY3K
return PyLong_FromLong((long) json->u.integer);
#else
return PyInt_FromLong((long) json->u.integer);
#endif

case JSON_STRING:
return PyUnicode_FromString(json->u.string);
default:
return NULL;
}
}

static PyObject *
Parser_finish(json_ParserObject * self)
{
struct json *json;
PyObject *obj;

if (self->_parser == NULL) {
return NULL;
}

json = json_parser_finish(self->_parser);
self->_parser = NULL;
obj = json_to_python(json);
return obj;
}

static PyMethodDef Parser_methods[] = {
{"feed", (PyCFunction) Parser_feed, METH_VARARGS,
"Feed data to the parser and return the index of the last object."},
{"is_done", (PyCFunction) Parser_is_done, METH_NOARGS,
"Whether the parser has finished decoding an object."},
{"finish", (PyCFunction) Parser_finish, METH_NOARGS,
"Finish parsing and return Python object parsed."},
{NULL},
};

static PyTypeObject json_ParserType = {
PyVarObject_HEAD_INIT(NULL, 0)
"ovs._json.Parser", /* tp_name */
sizeof (json_ParserObject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor) Parser_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
"Parser objects", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Parser_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
Parser_new, /* tp_new */
};

#ifdef IS_PY3K
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"ovs._json", /* m_name */
"OVS JSON Parser module", /* m_doc */
0, /* m_size */
0, /* m_methods */
0, /* m_slots */
0, /* m_traverse */
0, /* m_clear */
0, /* m_free */
};

#define INITERROR return NULL
#else /* !IS_PY3K */
#define INITERROR return
#endif

PyMODINIT_FUNC
#ifdef IS_PY3K
PyInit__json(void)
#else
init_json(void)
#endif
{
PyObject *m;

if (PyType_Ready(&json_ParserType) < 0) {
INITERROR;
}
#ifdef IS_PY3K
m = PyModule_Create(&moduledef);
#else
m = Py_InitModule3("ovs._json", NULL, "OVS JSON Parser module");
#endif

Py_INCREF(&json_ParserType);
PyModule_AddObject(m, "Parser", (PyObject *) & json_ParserType);
#ifdef IS_PY3K
return m;
#endif
}
11 changes: 11 additions & 0 deletions python/ovs/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
import six
from six.moves import range

try:
import ovs._json
except ImportError:
pass

__pychecker__ = 'no-stringiter'

escapes = {ord('"'): u"\\\"",
Expand Down Expand Up @@ -165,6 +170,12 @@ class Parser(object):
# Maximum height of parsing stack. #
MAX_HEIGHT = 1000

def __new__(cls, *args, **kwargs):
try:
return ovs._json.Parser(*args, **kwargs)
except NameError:
return super(Parser, cls).__new__(cls)

def __init__(self, check_trailer=False):
self.check_trailer = check_trailer

Expand Down
51 changes: 49 additions & 2 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
from __future__ import print_function
import sys

from distutils.command.build_ext import build_ext
from distutils.errors import CCompilerError, DistutilsExecError, \
DistutilsPlatformError

import setuptools

VERSION = "unknown"
Expand All @@ -25,8 +29,33 @@
file=sys.stderr)
sys.exit(-1)

ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError)
if sys.platform == 'win32':
ext_errors += (IOError, ValueError)


class BuildFailed(Exception):
pass


class try_build_ext(build_ext):
# This class allows C extension building to fail
# NOTE: build_ext is not a new-style class

def run(self):
try:
build_ext.run(self)
except DistutilsPlatformError:
raise BuildFailed()

setuptools.setup(
def build_extension(self, ext):
try:
build_ext.build_extension(self, ext)
except ext_errors:
raise BuildFailed()


setup_args = dict(
name='ovs',
description='Open vSwitch library',
version=VERSION,
Expand All @@ -46,5 +75,23 @@
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
]
],
ext_modules=[setuptools.Extension("ovs._json", sources=["ovs/_json.c"],
libraries=['openvswitch'])],
cmdclass={'build_ext': try_build_ext},
)

try:
setuptools.setup(**setup_args)
except BuildFailed:
BUILD_EXT_WARNING = ("WARNING: The C extension could not be compiled, "
"speedups are not enabled.")
print("*" * 75)
print(BUILD_EXT_WARNING)
print("Failure information, if any, is above.")
print("Retrying the build without the C extension.")
print("*" * 75)

del(setup_args['cmdclass'])
del(setup_args['ext_modules'])
setuptools.setup(**setup_args)

0 comments on commit c63b04d

Please sign in to comment.