Skip to content

Commit

Permalink
Phase 1 multithreading and fast sort (#65)
Browse files Browse the repository at this point in the history
Combination of stripeswjb and ms.fastSort++.

>70% decreased writes to disk
>25-50% increased speed 

* Enables multithreading for phase 1
* Adds new arguments: u: number of buckets, h: number of threads, s: stripe size for forward prop
* Adds a new sort on disk algorithm: sort_manager.hpp. Before, we were writing out all entries naively, and then sorting. Now, we write out entries into 16 different files, depending on the bucket, or first 4 bits of the entry. Then we sort each one, as we read them in the next table. This saves total two write passes and two read passes over each table.
* Recursive sort has been removed
* Dynamically sets the number of buckets so that an entire bucket fits in memory. Num buckets can vary from 16 to 128. The small one is preferred, since it reduces seeks on disk. For HDDs, more memory is important, so that we can keep the bucket size low.
* Due to these limitations, there is a now a minimum buffer size.
* Fixes the memory spike issue
* Fixes a bug in compression which dropped some entries, this changes the final file slightly (everything is still compatible)
* Use quicksort if we don't have enough memory for SortInMemory (which now is called UniformSort).
* Divided all the sort and disk objects into separate files (disk.hpp, uniformsort.hpp, quicksort.hpp.hpp, fast_sort_on_disk.hpp).
* Splits phases into their own files
* reduce seeks in WriteParkToFile (credit to xorinox for finding the issue)
* remove logging of memo which contains plot private key
* Does not use more memory than what is passed in with -b

Co-authored-by: Bill Blanke <[email protected]>
  • Loading branch information
mariano54 and wjblanke authored Sep 29, 2020
1 parent 952ed5b commit 094f3be
Show file tree
Hide file tree
Showing 23 changed files with 3,215 additions and 2,811 deletions.
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ elseif (WIN32)
target_link_libraries(ProofOfSpace fse)
target_link_libraries(RunTests fse)
else()
target_link_libraries(chiapos PRIVATE fse stdc++fs)
target_link_libraries(ProofOfSpace fse stdc++fs)
target_link_libraries(RunTests fse stdc++fs)
target_link_libraries(chiapos PRIVATE rt pthread fse stdc++fs)
target_link_libraries(ProofOfSpace rt pthread fse stdc++fs)
target_link_libraries(RunTests rt pthread fse stdc++fs)
endif()
10 changes: 8 additions & 2 deletions python-bindings/chiapos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ PYBIND11_MODULE(chiapos, m)
uint8_t k,
const py::bytes &memo,
const py::bytes &id,
uint32_t buffmegabytes) {
uint32_t buffmegabytes,
uint32_t num_buckets,
uint32_t stripe_size,
uint8_t num_threads) {
std::string memo_str(memo);
const uint8_t *memo_ptr = reinterpret_cast<const uint8_t *>(memo_str.data());
std::string id_str(id);
Expand All @@ -60,7 +63,10 @@ PYBIND11_MODULE(chiapos, m)
len(memo),
id_ptr,
len(id),
buffmegabytes);
buffmegabytes,
num_buckets,
stripe_size,
num_threads);
});

py::class_<DiskProver>(m, "DiskProver")
Expand Down
10 changes: 6 additions & 4 deletions src/bits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <vector>

#include "./util.hpp"
#include "exceptions.hpp"

using namespace std;

Expand Down Expand Up @@ -360,7 +361,7 @@ class BitsGeneric {
}
}
if (all_zero) {
throw std::string("Overflow, negative number");
throw InvalidStateException("Overflow, negative number");
}
return *this;
}
Expand Down Expand Up @@ -514,7 +515,8 @@ class BitsGeneric {
if (values_.size() > 2) {
std::cout << "Number of 64 bit values is: " << values_.size() << std::endl;
std::cout << "Size of bits is: " << GetSize() << std::endl;
throw std::string("Number doesn't fit into a 128-bit type.");
throw InvalidStateException(
"Number doesn't fit into a 128-bit type. " + std::to_string(GetSize()));
}

if (GetSize() <= 64)
Expand Down Expand Up @@ -637,7 +639,7 @@ template <class T>
bool operator<(const BitsGeneric<T>& lhs, const BitsGeneric<T>& rhs)
{
if (lhs.GetSize() != rhs.GetSize())
throw std::string("Different sizes!");
throw InvalidStateException("Different sizes!");
for (uint32_t i = 0; i < lhs.values_.size(); i++) {
if (lhs.values_[i] < rhs.values_[i])
return true;
Expand All @@ -651,7 +653,7 @@ template <class T>
bool operator>(const BitsGeneric<T>& lhs, const BitsGeneric<T>& rhs)
{
if (lhs.GetSize() != rhs.GetSize())
throw std::string("Different sizes!");
throw InvalidStateException("Different sizes!");
for (uint32_t i = 0; i < lhs.values_.size(); i++) {
if (lhs.values_[i] > rhs.values_[i])
return true;
Expand Down
17 changes: 13 additions & 4 deletions src/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,24 @@ int main(int argc, char *argv[])

// Default values
uint8_t k = 20;
uint32_t num_buckets = 0;
uint32_t num_stripes = 0;
uint8_t num_threads = 0;
string filename = "plot.dat";
string tempdir = ".";
string tempdir2 = ".";
string finaldir = ".";
string operation = "help";
string memo = "0102030405";
string id = "022fb42c08c12de3a6af053880199806532e79515f94e83461612101f9412f9e";
uint32_t buffmegabytes = 2 * 1024; // 2 gigabytes
uint32_t buffmegabytes = 0;

options.allow_unrecognised_options().add_options()(
"k, size", "Plot size", cxxopts::value<uint8_t>(k))(
"t, tempdir", "Temporary directory", cxxopts::value<string>(tempdir))(
"k, size", "Plot size", cxxopts::value<uint8_t>(k))(
"h, threads", "Number of threads", cxxopts::value<uint8_t>(num_threads))(
"u, buckets", "Number of buckets", cxxopts::value<uint32_t>(num_buckets))(
"s, stripes", "Size of stripes", cxxopts::value<uint32_t>(num_stripes))(
"t, tempdir", "Temporary directory", cxxopts::value<string>(tempdir))(
"2, tempdir2", "Second Temporary directory", cxxopts::value<string>(tempdir2))(
"d, finaldir", "Final directory", cxxopts::value<string>(finaldir))(
"f, file", "Filename", cxxopts::value<string>(filename))(
Expand Down Expand Up @@ -126,7 +132,10 @@ int main(int argc, char *argv[])
memo.size() / 2,
id_bytes,
32,
buffmegabytes);
buffmegabytes,
num_buckets,
num_stripes,
num_threads);
delete[] memo_bytes;
} else if (operation == "prove") {
if (argc < 3) {
Expand Down
168 changes: 168 additions & 0 deletions src/disk.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// Copyright 2018 Chia Network Inc

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef SRC_CPP_DISK_HPP_
#define SRC_CPP_DISK_HPP_

#include <algorithm>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>

// Gulrak filesystem brings in Windows headers that cause some issues with std
#define _HAS_STD_BYTE 0
#define NOMINMAX

#include "../lib/include/filesystem.hh"

namespace fs = ghc::filesystem;

#include "./bits.hpp"
#include "./util.hpp"

class Disk {
public:
virtual void Read(uint64_t begin, uint8_t *memcache, uint64_t length) = 0;

virtual void Write(uint64_t begin, const uint8_t *memcache, uint64_t length) = 0;

virtual void Truncate(uint64_t new_size) = 0;

virtual std::string GetFileName() = 0;

virtual ~Disk(){};
};

class FileDisk : public Disk {
public:
inline explicit FileDisk(const fs::path &filename)
{
filename_ = filename;

// Opens the file for reading and writing
f_ = fopen(filename.c_str(), "w+b");
}

FileDisk(FileDisk &&fd)
{
filename_ = fd.filename_;
f_ = fd.f_;
fd.f_ = NULL;
}

bool isOpen() { return (f_ != NULL); }

void Close()
{
if (f_ != NULL) {
fclose(f_);
f_ = NULL;
}
}

~FileDisk()
{
if (f_ != NULL) {
fclose(f_);
f_ = NULL;
}
}

inline void Read(uint64_t begin, uint8_t *memcache, uint64_t length) override
{
// Seek, read, and replace into memcache
uint64_t amtread;
do {
if ((!bReading) || (begin != readPos)) {
#ifdef WIN32
_fseeki64(f_, begin, SEEK_SET);
#else
fseek(f_, begin, SEEK_SET);
#endif
bReading = true;
}
amtread = fread(reinterpret_cast<char *>(memcache), sizeof(uint8_t), length, f_);
readPos = begin + amtread;
if (amtread != length) {
std::cout << "Only read " << amtread << " of " << length << " bytes at offset "
<< begin << " from " << filename_ << "with length " << writeMax
<< ". Error " << ferror(f_) << ". Retrying in five minutes." << std::endl;
#ifdef WIN32
Sleep(5 * 60000);
#else
sleep(5 * 60);
#endif
}
} while (amtread != length);
}

inline void Write(uint64_t begin, const uint8_t *memcache, uint64_t length) override
{
// Seek and write from memcache
uint64_t amtwritten;
do {
if ((bReading) || (begin != writePos)) {
#ifdef WIN32
_fseeki64(f_, begin, SEEK_SET);
#else
fseek(f_, begin, SEEK_SET);
#endif
bReading = false;
}
amtwritten =
fwrite(reinterpret_cast<const char *>(memcache), sizeof(uint8_t), length, f_);
writePos = begin + amtwritten;
if (writePos > writeMax)
writeMax = writePos;
if (amtwritten != length) {
std::cout << "Only wrote " << amtwritten << " of " << length << " bytes at offset "
<< begin << " to " << filename_ << "with length " << writeMax
<< ". Error " << ferror(f_) << ". Retrying in five minutes." << std::endl;
#ifdef WIN32
Sleep(5 * 60000);
#else
sleep(5 * 60);
#endif
}
} while (amtwritten != length);
}

inline std::string GetFileName() override { return filename_.string(); }

inline uint64_t GetWriteMax() const noexcept { return writeMax; }

inline void Truncate(uint64_t new_size) override
{
if (f_ != NULL)
fclose(f_);
fs::resize_file(filename_, new_size);
f_ = fopen(filename_.c_str(), "r+b");
}

private:
FileDisk(const FileDisk &);

FileDisk &operator=(const FileDisk &);

uint64_t readPos = 0;
uint64_t writePos = 0;
uint64_t writeMax = 0;
bool bReading = true;

fs::path filename_;
FILE *f_;
};

#endif // SRC_CPP_DISK_HPP_
3 changes: 2 additions & 1 deletion src/encoding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "../lib/FiniteStateEntropy/lib/fse.h"
#include "../lib/FiniteStateEntropy/lib/hist.h"
#include "bits.hpp"
#include "exceptions.hpp"
#include "util.hpp"

std::map<double, FSE_CTable *> CT_MEMO = {};
Expand Down Expand Up @@ -176,7 +177,7 @@ class Encoding {

for (uint32_t i = 0; i < deltas.size(); i++) {
if (deltas[i] == 0xff) {
throw std::string("Bad delta detected");
throw InvalidStateException("Bad delta detected");
}
}
return deltas;
Expand Down
Loading

0 comments on commit 094f3be

Please sign in to comment.