Key-value storage for Python & Wrapper of Rocksdb and Speedb
Wheels available, just
pip install rocksdict
for rocksdb backend, thenfrom rocksdict import Rdict
pip install speedict
for speedb backend, thenfrom speedict import Rdict
This library has two purposes.
- As an on-disk key-value storage solution for Python.
- As a RocksDB / Speedict interface.
These two purposes operate in different modes:
-
Default mode, which allows storing
int
,float
,bool
,str
,bytes
, and other python objects (withPickle
). -
Raw mode (
options=Options(raw_mode=True)
), which allows storing onlybytes
.
from rocksdict import Rdict
import numpy as np
import pandas as pd
path = str("./test_dict")
# create a Rdict with default options at `path`
db = Rdict(path)
db[1.0] = 1
db["huge integer"] = 2343546543243564534233536434567543
db["good"] = True
db["bytes"] = b"bytes"
db["this is a list"] = [1, 2, 3]
db["store a dict"] = {0: 1}
db[b"numpy"] = np.array([1, 2, 3])
db["a table"] = pd.DataFrame({"a": [1, 2], "b": [2, 1]})
# reopen Rdict from disk
db.close()
db = Rdict(path)
assert db[1.0] == 1
assert db["huge integer"] == 2343546543243564534233536434567543
assert db["good"] == True
assert db["bytes"] == b"bytes"
assert db["this is a list"] == [1, 2, 3]
assert db["store a dict"] == {0: 1}
assert np.all(db[b"numpy"] == np.array([1, 2, 3]))
assert np.all(db["a table"] == pd.DataFrame({"a": [1, 2], "b": [2, 1]}))
# iterate through all elements
for k, v in db.items():
print(f"{k} -> {v}")
# batch get:
print(db[["good", "bad", 1.0]])
# [True, False, 1]
# delete Rdict from dict
db.close()
Rdict.destroy(path)
This mode allows only bytes as keys and values.
from rocksdict import Rdict, Options
PATH_TO_ROCKSDB = str("path")
# open raw_mode, which allows only bytes
db = Rdict(path=PATH_TO_ROCKSDB, options=Options(raw_mode=True))
db[b'a'] = b'a'
db[b'b'] = b'b'
db[b'c'] = b'c'
db[b'd'] = b'd'
for k, v in db.items():
print(f"{k} -> {v}")
# close and delete
db.close()
Rdict.destroy(PATH_TO_ROCKSDB)
Loading Options from RocksDict Path.
from rocksdict import Options, Rdict
path = str("./rocksdict_path")
opts, cols = Options.load_latest(path)
opts.create_missing_column_families(True)
cols["bytes"] = Options()
self.test_dict = Rdict(path, options=opts, column_families=cols)
import shutil
from rocksdict import Rdict, Options, SliceTransform, PlainTableFactoryOptions
import os
def db_options():
opt = Options()
# create table
opt.create_if_missing(True)
# config to more jobs
opt.set_max_background_jobs(os.cpu_count())
# configure mem-table to a large value (256 MB)
opt.set_write_buffer_size(0x10000000)
opt.set_level_zero_file_num_compaction_trigger(4)
# configure l0 and l1 size, let them have the same size (1 GB)
opt.set_max_bytes_for_level_base(0x40000000)
# 256 MB file size
opt.set_target_file_size_base(0x10000000)
# use a smaller compaction multiplier
opt.set_max_bytes_for_level_multiplier(4.0)
# use 8-byte prefix (2 ^ 64 is far enough for transaction counts)
opt.set_prefix_extractor(SliceTransform.create_max_len_prefix(8))
# set to plain-table
opt.set_plain_table_factory(PlainTableFactoryOptions())
return opt
# create DB
db = Rdict("./some_path", db_options())
db[0] = 1
db.close()
# automatic reloading all options on reopening
db = Rdict("./some_path")
assert db[0] == 1
# destroy
db.close()
Rdict.destroy("./some_path")
Go to example folder.
Currently, do not support merge operation and custom comparator.