Skip to content

Commit

Permalink
add python support
Browse files Browse the repository at this point in the history
  • Loading branch information
wuxb45 committed Jul 14, 2021
1 parent a8cc3c6 commit 2d5470c
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 2 deletions.
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,17 @@ It compiles on recent Linux/FreeBSD/MacOS and supports x86\_64 and AArch64 CPUs.

This code repository is being actively maintained and contains optimizations beyong the original RemixDB implementation.

# Optimization 1: Minimizing REMIX (Re-)Building Cost
# News

* An experimental Python API is available in `xdb.py`. See the end of `xdb.py` for examples.

* RemixDB now provides `xdb_merge` for atomic read-modify-write operations.

* Two optimizations have been added to boost compaction and point query performance (see below).

# Optimizations

## Optimization 1: Minimizing REMIX (Re-)Building Cost

This implementation employs an optimization to minimize the REMIX building cost.
This optimization improves the throughput by 2x (0.96MOPS vs. 0.50MOPS) in a random-write experiment, compared to the implementation described in the REMIX paper.
Expand All @@ -35,7 +45,7 @@ You should use `remixdb_open` unless it's absolutely necessary to save a little
`remixdb_open_compact` opens a remixdb with the optimization turned off. Each newly created sstable will not contain a CKB.
A store created by one of these functions can be safely opened by the other function.

# Optimization 2: Improving Point Query with Hash Tags
## Optimization 2: Improving Point Query with Hash Tags

A point query in the original RemixDB performs binary search in a segment, which takes about five key comparisons and can cost multiple I/Os.
The current implementation provides a new option, named `tags` (the last argument of `remixdb_open`).
Expand Down
200 changes: 200 additions & 0 deletions xdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#!/usr/bin/python3

#
# Copyright (c) 2016--2021 Wu, Xingbo <[email protected]>
#
# All rights reserved. No warranty, explicit or implicit, provided.
#

import msgpack
from ctypes import * # CDLL and c_xxx types

# libxdb {{{
# Change this path when necessary
libxdb = CDLL("./libremixdb.so")

# open
# dir, cachesz, mtsz, tags -> xdbptr
libxdb.remixdb_open.argtypes = [c_char_p, c_uint, c_uint, c_bool]
libxdb.remixdb_open.restype = c_void_p

# close (no return value)
libxdb.remixdb_close.argtypes = [c_void_p]

# ref
libxdb.remixdb_ref.argtypes = [c_void_p]
libxdb.remixdb_ref.restype = c_void_p

# unref
libxdb.remixdb_unref.argtypes = [c_void_p]
libxdb.remixdb_unref.restype = c_void_p

# put
# xdbptr, keyptr, keylen, vptr, vlen -> bool
libxdb.remixdb_put.argtypes = [c_void_p, c_char_p, c_uint, c_char_p, c_uint]
libxdb.remixdb_put.restype = c_bool

# get
# xdbptr, keyptr, keylen, vptr_out, vlen_out -> bool
libxdb.remixdb_get.argtypes = [c_void_p, c_char_p, c_uint, c_char_p, c_void_p]
libxdb.remixdb_get.restype = c_bool

# probe
libxdb.remixdb_probe.argtypes = [c_void_p, c_char_p, c_uint]
libxdb.remixdb_probe.restype = c_bool

# del
libxdb.remixdb_del.argtypes = [c_void_p, c_char_p, c_uint]
libxdb.remixdb_del.restype = c_bool

# sync
libxdb.remixdb_sync.argtypes = [c_void_p]

# iter_create
libxdb.remixdb_iter_create.argtypes = [c_void_p]
libxdb.remixdb_iter_create.restype = c_void_p

# iter_seek
libxdb.remixdb_iter_seek.argtypes = [c_void_p, c_char_p, c_uint]

# iter_valid
libxdb.remixdb_iter_valid.argtypes = [c_void_p]
libxdb.remixdb_iter_valid.restype = c_bool

# iter_skip1
libxdb.remixdb_iter_skip1.argtypes = [c_void_p]

# iter_skip
libxdb.remixdb_iter_skip.argtypes = [c_void_p, c_uint]

# iter_peek
libxdb.remixdb_iter_peek.argtypes = [c_void_p, c_char_p, c_void_p, c_char_p, c_void_p]
libxdb.remixdb_iter_peek.restype = c_bool

# iter_destroy
libxdb.remixdb_iter_destroy.argtypes = [c_void_p]
# }}} libxdb

# class {{{
class Xdb:
def __init__(self, dirname, cachesz=256, mtsz=256, tags=True):
self.xdbptr = libxdb.remixdb_open(dirname.encode('ascii'), c_uint(cachesz), c_uint(mtsz), c_bool(tags))

# user must call explicitly
def close(self):
libxdb.remixdb_close(self.xdbptr)

def ref(self):
return XdbRef(self.xdbptr)

class XdbRef:
# use xdb.ref()
def __init__(self, xdbptr):
self.refptr = libxdb.remixdb_ref(xdbptr)

# user must call explicitly
def unref(self):
libxdb.remixdb_unref(self.refptr)

def iter(self):
return XdbIter(self.refptr)

# key: python string; value: any (hierarchical) python object
def put(self, key, value):
binkey = key.encode()
binvalue = msgpack.packb(value)
print(key, msgpack.unpackb(binvalue), len(binvalue))

return libxdb.remixdb_put(self.refptr, binkey, c_uint(len(binkey)), binvalue, c_uint(len(binvalue)))


# return the value as a python object
def get(self, key):
binkey = key.encode()
vbuf = create_string_buffer(1024) # TODO: must be large enough
vlen = c_uint()
ret = libxdb.remixdb_get(self.refptr, binkey, len(binkey), vbuf, byref(vlen))
if ret:
#vbuf[vlen.value] = b'\x00'
return msgpack.unpackb(vbuf.value)
else:
return None

def delete(self, key):
binkey = key.encode()
return libxdb.remixdb_del(self.refptr, binkey, c_uint(len(binkey)))

def probe(self, key):
binkey = key.encode()
return libxdb.remixdb_probe(self.refptr, binkey, c_uint(len(binkey)))

def sync(self):
return libxdb.remixdb_sync(self.refptr)

class XdbIter:
def __init__(self, refptr):
self.iptr = libxdb.remixdb_iter_create(refptr)

# user must call explicitly
def destroy(self):
libxdb.remixdb_iter_destroy(self.iptr)

def seek(self, key):
if key is None:
libxdb.remixdb_iter_seek(self.iptr, None, c_uint(0))
else:
binkey = key.encode()
libxdb.remixdb_iter_seek(self.iptr, binkey, c_uint(len(binkey)))

def valid(self):
return libxdb.remixdb_iter_valid(self.iptr)

def skip1(self):
libxdb.remixdb_iter_skip1(self.iptr)

def skip(self, nr):
libxdb.remixdb_iter_skip(self.iptr, c_uint(nr))

# return (key, value) pair or None
def peek(self):
kbuf = create_string_buffer(1024) # TODO: must be large enough
vbuf = create_string_buffer(1024) # TODO: must be large enough
klen = c_uint()
vlen = c_uint()
if libxdb.remixdb_iter_peek(self.iptr, kbuf, byref(klen), vbuf, byref(vlen)):
#kbuf[klen.value] = b'\x00'
#vbuf[vlen.value] = b'\x00'
return (kbuf.value.decode(), klen.value, msgpack.unpackb(vbuf.value), vlen.value)
else:
return None

# }}} class

# examples
xdb1 = Xdb("/tmp/pyxdb") # change this path when necessary
ref1 = xdb1.ref() # take a ref for kv operations

ref1.put("Hello", "pyxdb")
ref1.put("key1", "value1")
ref1.put("key2", "value2")
ref1.put("key3", {"xxx":"valuex", "yyy":"valuey"})
ref1.delete("key2")

rget = ref1.get("Hello")
print(rget)

# don't use ref when iterating
iter1 = ref1.iter()
iter1.seek(None)
while iter1.valid():
r = iter1.peek()
print(r)
iter1.skip1()

iter1.destroy() # must destroy all iters before unref

ref1.sync()
ref1.unref() # must unref all refs before close()
xdb1.close()

# vim:fdm=marker

0 comments on commit 2d5470c

Please sign in to comment.