Skip to content

Commit

Permalink
bpo-26219: per opcode cache for LOAD_GLOBAL (pythonGH-12884)
Browse files Browse the repository at this point in the history
This patch implements per opcode cache mechanism, and use it in
only LOAD_GLOBAL opcode.

Based on Yury's opcache3.patch in bpo-26219.
  • Loading branch information
methane authored Jun 3, 2019
1 parent 29ec422 commit 91234a1
Show file tree
Hide file tree
Showing 13 changed files with 285 additions and 16 deletions.
4 changes: 4 additions & 0 deletions Doc/whatsnew/3.8.rst
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,10 @@ Optimizations
methods up to 20--50%. (Contributed by Serhiy Storchaka in :issue:`23867`,
:issue:`35582` and :issue:`36127`.)

* ``LOAD_GLOBAL`` instruction now uses new "per opcode cache" mechanism.
It is about 40% faster now. (Contributed by Yury Selivanov and Inada Naoki in
:issue:`26219`.)


Build and C API Changes
=======================
Expand Down
17 changes: 17 additions & 0 deletions Include/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ typedef uint16_t _Py_CODEUNIT;
# define _Py_OPARG(word) ((word) >> 8)
#endif

typedef struct _PyOpcache _PyOpcache;

/* Bytecode object */
typedef struct {
PyObject_HEAD
Expand Down Expand Up @@ -49,6 +51,21 @@ typedef struct {
Type is a void* to keep the format private in codeobject.c to force
people to go through the proper APIs. */
void *co_extra;

/* Per opcodes just-in-time cache
*
* To reduce cache size, we use indirect mapping from opcode index to
* cache object:
* cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1]
*/

// co_opcache_map is indexed by (next_instr - first_instr).
// * 0 means there is no cache for this opcode.
// * n > 0 means there is cache in co_opcache[n-1].
unsigned char *co_opcache_map;
_PyOpcache *co_opcache;
int co_opcache_flag; // used to determine when create a cache.
unsigned char co_opcache_size; // length of co_opcache.
} PyCodeObject;

/* Masks for co_flags above */
Expand Down
3 changes: 3 additions & 0 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ PyAPI_FUNC(void) _PyEval_SignalAsyncExc(
PyAPI_FUNC(void) _PyEval_ReInitThreads(
_PyRuntimeState *runtime);

/* Private function */
void _PyEval_Fini(void);

#ifdef __cplusplus
}
#endif
Expand Down
27 changes: 27 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef Py_INTERNAL_CODE_H
#define Py_INTERNAL_CODE_H
#ifdef __cplusplus
extern "C" {
#endif

typedef struct {
PyObject *ptr; /* Cached pointer (borrowed reference) */
uint64_t globals_ver; /* ma_version of global dict */
uint64_t builtins_ver; /* ma_version of builtin dict */
} _PyOpcache_LoadGlobal;

struct _PyOpcache {
union {
_PyOpcache_LoadGlobal lg;
} u;
char optimized;
};

/* Private API */
int _PyCode_InitOpcache(PyCodeObject *co);


#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_CODE_H */
6 changes: 3 additions & 3 deletions Lib/test/test_dict_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ def test_setitem_same_value(self):

# setting a key to the same value with dict.__setitem__
# must change the version
self.check_version_changed(d, d.__setitem__, 'key', value)
self.check_version_dont_change(d, d.__setitem__, 'key', value)

# setting a key to the same value with dict.update
# must change the version
self.check_version_changed(d, d.update, key=value)
self.check_version_dont_change(d, d.update, key=value)

d2 = self.new_dict(key=value)
self.check_version_changed(d, d.update, d2)
self.check_version_dont_change(d, d.update, d2)

def test_setitem_equal(self):
class AlwaysEqual:
Expand Down
1 change: 1 addition & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_accu.h \
$(srcdir)/Include/internal/pycore_atomic.h \
$(srcdir)/Include/internal/pycore_ceval.h \
$(srcdir)/Include/internal/pycore_code.h \
$(srcdir)/Include/internal/pycore_condvar.h \
$(srcdir)/Include/internal/pycore_context.h \
$(srcdir)/Include/internal/pycore_fileutils.h \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Implemented per opcode cache mechanism and ``LOAD_GLOBAL`` instruction use
it. ``LOAD_GLOBAL`` is now about 40% faster. Contributed by Yury Selivanov,
and Inada Naoki.
65 changes: 65 additions & 0 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

#include "Python.h"
#include "code.h"
#include "opcode.h"
#include "structmember.h"
#include "pycore_code.h"
#include "pycore_pystate.h"
#include "pycore_tupleobject.h"
#include "clinic/codeobject.c.h"
Expand Down Expand Up @@ -233,9 +235,56 @@ PyCode_New(int argcount, int posonlyargcount, int kwonlyargcount,
co->co_zombieframe = NULL;
co->co_weakreflist = NULL;
co->co_extra = NULL;

co->co_opcache_map = NULL;
co->co_opcache = NULL;
co->co_opcache_flag = 0;
co->co_opcache_size = 0;
return co;
}

int
_PyCode_InitOpcache(PyCodeObject *co)
{
Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT);
co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1);
if (co->co_opcache_map == NULL) {
return -1;
}

_Py_CODEUNIT *opcodes = (_Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code);
Py_ssize_t opts = 0;

for (Py_ssize_t i = 0; i < co_size;) {
unsigned char opcode = _Py_OPCODE(opcodes[i]);
i++; // 'i' is now aligned to (next_instr - first_instr)

// TODO: LOAD_METHOD, LOAD_ATTR
if (opcode == LOAD_GLOBAL) {
co->co_opcache_map[i] = ++opts;
if (opts > 254) {
break;
}
}
}

if (opts) {
co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache));
if (co->co_opcache == NULL) {
PyMem_FREE(co->co_opcache_map);
return -1;
}
}
else {
PyMem_FREE(co->co_opcache_map);
co->co_opcache_map = NULL;
co->co_opcache = NULL;
}

co->co_opcache_size = opts;
return 0;
}

PyCodeObject *
PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno)
{
Expand Down Expand Up @@ -458,6 +507,15 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kw)
static void
code_dealloc(PyCodeObject *co)
{
if (co->co_opcache != NULL) {
PyMem_FREE(co->co_opcache);
}
if (co->co_opcache_map != NULL) {
PyMem_FREE(co->co_opcache_map);
}
co->co_opcache_flag = 0;
co->co_opcache_size = 0;

if (co->co_extra != NULL) {
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
_PyCodeObjectExtra *co_extra = co->co_extra;
Expand Down Expand Up @@ -504,6 +562,13 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
res += sizeof(_PyCodeObjectExtra) +
(co_extra->ce_size-1) * sizeof(co_extra->ce_extras[0]);
}
if (co->co_opcache != NULL) {
assert(co->co_opcache_map != NULL);
// co_opcache_map
res += PyBytes_GET_SIZE(co->co_code) / sizeof(_Py_CODEUNIT);
// co_opcache
res += co->co_opcache_size * sizeof(_PyOpcache);
}
return PyLong_FromSsize_t(res);
}

Expand Down
25 changes: 13 additions & 12 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1080,20 +1080,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
return 0;
}

if (_PyDict_HasSplitTable(mp)) {
mp->ma_values[ix] = value;
if (old_value == NULL) {
/* pending state */
assert(ix == mp->ma_used);
mp->ma_used++;
if (old_value != value) {
if (_PyDict_HasSplitTable(mp)) {
mp->ma_values[ix] = value;
if (old_value == NULL) {
/* pending state */
assert(ix == mp->ma_used);
mp->ma_used++;
}
}
else {
assert(old_value != NULL);
DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
}
mp->ma_version_tag = DICT_NEXT_VERSION();
}
else {
assert(old_value != NULL);
DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
}

mp->ma_version_tag = DICT_NEXT_VERSION();
Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
ASSERT_CONSISTENT(mp);
Py_DECREF(key);
Expand Down
1 change: 1 addition & 0 deletions PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@
<ClInclude Include="..\Include\import.h" />
<ClInclude Include="..\Include\internal\pycore_accu.h" />
<ClInclude Include="..\Include\internal\pycore_atomic.h" />
<ClInclude Include="..\Include\internal\pycore_code.h" />
<ClInclude Include="..\Include\internal\pycore_ceval.h" />
<ClInclude Include="..\Include\internal\pycore_condvar.h" />
<ClInclude Include="..\Include\internal\pycore_context.h" />
Expand Down
3 changes: 3 additions & 0 deletions PCbuild/pythoncore.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@
<ClInclude Include="..\Include\internal\pycore_atomic.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_code.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_ceval.h">
<Filter>Include</Filter>
</ClInclude>
Expand Down
Loading

0 comments on commit 91234a1

Please sign in to comment.