Skip to content

Commit

Permalink
add tpcc
Browse files Browse the repository at this point in the history
  • Loading branch information
guowentian committed Oct 4, 2018
1 parent 5eda2e8 commit f0ed972
Show file tree
Hide file tree
Showing 62 changed files with 6,744 additions and 0 deletions.
8 changes: 8 additions & 0 deletions database/Meta.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#include "Meta.h"

namespace Database {
GAlloc** gallocators = NULL;
GAlloc* default_gallocator = NULL;
size_t gThreadCount = 0;
size_t gParamBatchSize = 1000;
}
47 changes: 47 additions & 0 deletions database/Meta.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#ifndef __DATABASE_META_H__
#define __DATABASE_META_H__

#include <cstring>
#include <cstdint>

#include "gallocator.h"

namespace Database {
typedef uint32_t HashcodeType;
typedef uint64_t IndexKey;

enum LockType
: size_t {NO_LOCK,
READ_LOCK,
WRITE_LOCK,
};
enum AccessType
: size_t {READ_ONLY,
READ_WRITE,
INSERT_ONLY,
DELETE_ONLY
};
enum SourceType
: size_t {RANDOM_SOURCE,
PARTITION_SOURCE
};

// storage
const size_t kMaxTableNum = 16;
const size_t kMaxColumnNum = 32;
const size_t kMaxSecondaryIndexNum = 5;
const uint64_t kHashIndexBucketHeaderNum = 1000007;
// txn
const size_t kTryLockLimit = 1;
const size_t kMaxAccessLimit = 256;

extern GAlloc* default_gallocator;
extern GAlloc** gallocators;
extern size_t gThreadCount;

// source
extern size_t gParamBatchSize;

}

#endif
65 changes: 65 additions & 0 deletions database/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
A transactional database engine built on top of Global addressable memory.
This engine is benchmarked with TPCC.

# Feature #
Since GAM maintains the cache coherence, i.e., all records to access would be cached locally before the accesses, two phase commit is not needed and the transactions are executed similar to the case of single machines.
Therefore, we use two phase locking protocol for concurrency control

# Compile #
It is required that the GAM library is built (in 'code/src').

To build the tpcc benchmark, just
```
cd tpcc; make clean; make -j
```
To build the test for tpcc, simply
```
cd test; make clean; make -j
```
The file 'scripts/compile.sh' automate this compilation process.
After compilation, the binary files are generated in the folder 'tpcc' and 'test' respectively.

# Flag #
There are two flags used for compilation to enforce different concurrency control options.
* LOCK: two phase locking
* ST: no concurrency control

# Run #
1. Specify the cluster setup in a configuration file, say 'config.txt'.
This file should be written in the format as a number of lines of "HOST_NAME PORT_NUMBER".

Note that the master node should be written in the first line.
The file 'tpcc/config.txt' provides an example for such a configuration file.

2. Start the master node with the specified arguments.

3. Start the remaining nodes with the specified arguments.


The arguments for 'tpcc' are described as follows.
* -p: the port number for this node (required).
* -c: Number of cores (threads) used (required).
* -sf: the scale factors for populating the tpcc benchmark.

There are two arguments for '-sf'.
The first argument for '-sf' indicates the number of warehouses, while the second argument for '-sf' is to scale the size of the database.

* -t: Number of transactions run for each thread
* -d: The distributed ratios.
* -f: The configuration file for the cluster setup
* -r: The read ratio, used to modify the workload
* -l: The time locality, used to modify the workload

You can also simply run 'tpcc' without any arguments to show the description for each argument.

The folder 'scripts' automate the runnning of tpcc benchmark.

Under this folder, 'experiment.sh' can run the benchmark under varying distributed ratios, read ratios, and time locality;
'run_test.sh' can automate the running of our tests.

Notes: To have a large global memory space, you may need to modify size in 'include/struture.h'.


# Acknowledgement #
This implementation is adapted from an open-source transactional database prototype on single machines:
https://github.com/Cavalia/Cavalia
117 changes: 117 additions & 0 deletions database/bench/BenchmarkArguments.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#ifndef __DATABASE_BENCHMARK_ARGUMENTS_H__
#define __DATABASE_BENCHMARK_ARGUMENTS_H__

#include <iostream>
#include <cassert>
#include "Meta.h"

namespace Database {
static int app_type = -1;
static double scale_factors[2] = { -1, -1 };
static int factor_count = 0;
static int dist_ratio = 1;
static int num_txn = -1;
static int num_core = -1; // number of cores utilized in a single numa node.
static int port = -1;
static std::string config_filename = "config.txt";
// To modify tpcc workload
static size_t gReadRatio = 0;
static size_t gTimeLocality = 0;
static bool gForceRandomAccess = false; // fixed
static bool gStandard = true; // true if follow standard specification


static void PrintUsage() {
std::cout << "==========[USAGE]==========" << std::endl;
std::cout << "\t-pINT: PORT(required)" << std::endl;
std::cout << "\t-cINT: CORE_COUNT(required)" << std::endl;
std::cout << "\t-sfINT: SCALE_FACTOR(required)" << std::endl;
std::cout << "\t-sfDOUBLE: SCALE_FACTOR(required)" << std::endl;
std::cout << "\t-tINT: TXN_COUNT(required)" << std::endl;
std::cout << "\t-dINT: DIST_TXN_RATIO(optional,default=1)" << std::endl;
//std::cout << "\t-zINT: BATCH_SIZE(optional)" << std::endl;
std::cout << "\t-fSTRING: CONFIG_FILENAME(optional,default=config.txt)" << std::endl;
std::cout << "\t-rINT: READ_RATIO(optional, [0,100])" << std::endl;
std::cout << "\t-lINT: TIME_LOCALITY(optional, [0,100])" << std::endl;
std::cout << "===========================" << std::endl;
std::cout << "==========[EXAMPLES]==========" << std::endl;
std::cout << "Benchmark -p11111 -c4 -sf10 -sf100 -t100000" << std::endl;
std::cout << "==============================" << std::endl;
}

static void ArgumentsChecker() {
if (port == -1) {
std::cout << "PORT (-p) should be set" << std::endl;
exit(0);
}
if (factor_count == 0) {
std::cout << "SCALE_FACTOR (-sf) should be set." << std::endl;
exit(0);
}
if (num_core == -1) {
std::cout << "CORE_COUNT (-c) should be set." << std::endl;
exit(0);
}
if (num_txn == -1) {
std::cout << "TXN_COUNT (-t) should be set." << std::endl;
exit(0);
}
if (!(dist_ratio >= 0 && dist_ratio <= 100)) {
std::cout << "DIST_TXN_RATIO should be [0,100]." << std::endl;
exit(0);
}
if (!(gReadRatio >= 0 && gReadRatio <= 100)) {
std::cout << "READ_RATIO should be [0,100]." << std::endl;
exit(0);
}
if (!(gTimeLocality >= 0 && gTimeLocality <= 100)) {
std::cout << "TIME_LOCALITY should be [0,100]." << std::endl;
exit(0);
}
}

static void ArgumentsParser(int argc, char *argv[]) {
if (argc <= 4) {
PrintUsage();
exit(0);
}
for (int i = 1; i < argc; ++i) {
if (argv[i][0] != '-') {
PrintUsage();
exit(0);
}
if (argv[i][1] == 'p') {
port = atoi(&argv[i][2]);
} else if (argv[i][1] == 's' && argv[i][2] == 'f') {
scale_factors[factor_count] = atof(&argv[i][3]);
++factor_count;
} else if (argv[i][1] == 't') {
num_txn = atoi(&argv[i][2]);
} else if (argv[i][1] == 'd') {
dist_ratio = atoi(&argv[i][2]);
} else if (argv[i][1] == 'c') {
num_core = atoi(&argv[i][2]);
gThreadCount = num_core;
} else if (argv[i][1] == 'f') {
config_filename = std::string(&argv[i][2]);
} else if (argv[i][1] == 'z') {
gParamBatchSize = atoi(&argv[i][2]);
} else if (argv[i][1] == 'r') {
gReadRatio = atoi(&argv[i][2]);
gStandard = false;
} else if (argv[i][1] == 'l') {
gTimeLocality = atoi(&argv[i][2]);
gStandard = false;
} else if (argv[i][1] == 'h') {
PrintUsage();
exit(0);
} else {
PrintUsage();
exit(0);
}
}
ArgumentsChecker();
}
}

#endif
73 changes: 73 additions & 0 deletions database/bench/BenchmarkInitiator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#ifndef __DATABASE_BENCHMARK_INITIATOR_H__
#define __DATABASE_BENCHMARK_INITIATOR_H__

#include "gallocator.h"
#include "ClusterHelper.h"
#include "ClusterConfig.h"
#include "StorageManager.h"
#include "Profiler.h"
#include "PerfStatistics.h"

namespace Database {
class BenchmarkInitiator {
public:
BenchmarkInitiator(const size_t& thread_count,
ClusterConfig* config)
: thread_count_(thread_count),
config_(config) {
}

void InitGAllocator() {
ServerInfo master = config_->GetMasterHostInfo();
ServerInfo myhost = config_->GetMyHostInfo();

Conf* conf = new Conf();
conf->loglevel = LOG_WARNING;
conf->is_master = config_->IsMaster();
conf->master_ip = ClusterHelper::GetIpByHostName(master.addr_);
conf->worker_ip = ClusterHelper::GetIpByHostName(myhost.addr_);
int partition_id = config_->GetMyPartitionId();
// to avoid port conflicts on the same node
conf->worker_port += partition_id;

std::cout << "GAllocator config info: is_master=" << conf->is_master
<< ",master_ip=" << conf->master_ip << ",master_port="
<< conf->master_port << ",worker_ip=" << conf->worker_ip
<< ",worker_port=" << conf->worker_port << std::endl;

default_gallocator = GAllocFactory::CreateAllocator(conf);
std::cout << "create default gallocator" << std::endl;
gallocators = new GAlloc*[thread_count_];
for (size_t i = 0; i < thread_count_; ++i) {
gallocators[i] = GAllocFactory::CreateAllocator(conf);
}
}

GAddr InitStorage() {
GAddr storage_addr = Gnullptr;
int my_partition_id = config_->GetMyPartitionId();
int partition_num = config_->GetPartitionNum();
if (config_->IsMaster()) {
// RecordSchema
std::vector<RecordSchema*> schemas;
this->RegisterSchemas(schemas);
// StorageManager
storage_addr = default_gallocator->AlignedMalloc(
StorageManager::GetSerializeSize());
this->RegisterTables(storage_addr, schemas);
}
return storage_addr;
}

protected:
virtual void RegisterTables(const GAddr& storage_addr,
const std::vector<RecordSchema*>& schemas) {}

virtual void RegisterSchemas(std::vector<RecordSchema*>& schemas) {}

const size_t thread_count_;
ClusterConfig* config_;
};
}

#endif
41 changes: 41 additions & 0 deletions database/bench/BenchmarkPopulator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef __DATABASE_BENCHMARK_POPULATOR_H__
#define __DATABASE_BENCHMARK_POPULATOR_H__

#include <iostream>

#include "StorageManager.h"
#include "Meta.h"
#include "TimeMeasurer.h"

namespace Database {
class BenchmarkPopulator {
public:
BenchmarkPopulator(StorageManager *storage_manager)
: storage_manager_(storage_manager) {
}

virtual ~BenchmarkPopulator() {
}

void Start() {
std::cout << "start population" << std::endl;
TimeMeasurer timer;
timer.StartTimer();
StartPopulate();
timer.EndTimer();
std::cout << "populate elapsed time=" << timer.GetElapsedMilliSeconds()
<< "ms" << std::endl;
}

virtual void StartPopulate() = 0;

private:
BenchmarkPopulator(const BenchmarkPopulator &);
BenchmarkPopulator& operator=(const BenchmarkPopulator &);

protected:
StorageManager *storage_manager_;
};
}

#endif
Loading

0 comments on commit f0ed972

Please sign in to comment.