Skip to content

Commit

Permalink
[async] Compute offloaded IR hash once and cache it (taichi-dev#1608)
Browse files Browse the repository at this point in the history
  • Loading branch information
k-ye authored Jul 30, 2020
1 parent c13670d commit 00f1e88
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 25 deletions.
35 changes: 23 additions & 12 deletions taichi/program/async_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@ std::unique_ptr<IRNode> clone_offloaded_task(OffloadedStmt *from,

KernelLaunchRecord::KernelLaunchRecord(Context context,
Kernel *kernel,
std::unique_ptr<IRNode> &&stmt_)
std::unique_ptr<IRNode> &&stmt_,
uint64 h)
: context(context),
kernel(kernel),
stmt(dynamic_cast<OffloadedStmt *>(stmt_.get())),
stmt_holder(std::move(stmt_)),
h(hash(stmt)) {
h(h),
stmt_holder_(std::move(stmt_)) {
TI_ASSERT(stmt != nullptr);
TI_ASSERT(stmt->get_kernel() != nullptr);
}
Expand Down Expand Up @@ -130,25 +131,35 @@ void AsyncEngine::launch(Kernel *kernel) {
kernel->lower(/*to_executable=*/false);
auto block = dynamic_cast<Block *>(kernel->ir.get());
TI_ASSERT(block);

auto &offloads = block->statements;
auto &dummy_root = kernel_to_dummy_roots_[kernel];
if (dummy_root == nullptr) {
dummy_root = std::make_unique<Block>();
dummy_root->kernel = kernel;
auto &kmeta = kernel_metas_[kernel];
const bool kmeta_inited = kmeta.initialized();
if (!kmeta_inited) {
kmeta.dummy_root = std::make_unique<Block>();
kmeta.dummy_root->kernel = kernel;
}
for (std::size_t i = 0; i < offloads.size(); i++) {
auto offload = offloads[i]->as<OffloadedStmt>();
KernelLaunchRecord rec(
kernel->program.get_context(), kernel,
clone_offloaded_task(offload, kernel, dummy_root.get()));
auto cloned = clone_offloaded_task(offload, kernel, kmeta.dummy_root.get());
uint64 h;
if (kmeta_inited) {
h = kmeta.offloaded_hashes[i];
} else {
h = hash(cloned.get());
TI_ASSERT(kmeta.offloaded_hashes.size() == i);
kmeta.offloaded_hashes.push_back(h);
}
KernelLaunchRecord rec(kernel->program.get_context(), kernel,
std::move(cloned), h);
enqueue(std::move(rec));
}
}

void AsyncEngine::enqueue(KernelLaunchRecord &&t) {
using namespace irpass::analysis;

auto &meta = metas[t.h];
auto &meta = offloaded_metas_[t.h];
// TODO: this is an abuse since it gathers nothing...
auto root_stmt = t.stmt;
gather_statements(root_stmt, [&](Stmt *stmt) {
Expand Down Expand Up @@ -213,7 +224,7 @@ bool AsyncEngine::optimize_listgen() {
for (int i = 0; i < task_queue.size(); i++) {
// Try to eliminate unused listgens
auto &t = task_queue[i];
auto meta = metas[t.h];
auto meta = offloaded_metas_[t.h];
auto offload = t.stmt;
bool keep = true;
if (offload->task_type == OffloadedStmt::TaskType::listgen) {
Expand Down
37 changes: 24 additions & 13 deletions taichi/program/async_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,15 @@ class KernelLaunchRecord {
Context context;
Kernel *kernel; // TODO: remove this
OffloadedStmt *stmt;
std::unique_ptr<IRNode> stmt_holder;
uint64 h;
uint64 h; // hash of |stmt|

KernelLaunchRecord(Context contxet,
KernelLaunchRecord(Context context,
Kernel *kernel,
std::unique_ptr<IRNode> &&stmt);
std::unique_ptr<IRNode> &&stmt,
uint64 h);

private:
std::unique_ptr<IRNode> stmt_holder_;
};

// In charge of (parallel) compilation to binary and (serial) kernel launching
Expand Down Expand Up @@ -154,13 +157,6 @@ class AsyncEngine {
public:
// TODO: state machine

struct TaskMeta {
std::unordered_set<SNode *> input_snodes, output_snodes;
std::unordered_set<SNode *> activation_snodes;
};

std::unordered_map<std::uint64_t, TaskMeta> metas;

ExecutionQueue queue;

std::deque<KernelLaunchRecord> task_queue;
Expand All @@ -183,11 +179,26 @@ class AsyncEngine {
void synchronize();

private:
struct KernelMeta {
std::unique_ptr<Block> dummy_root;
std::vector<uint64> offloaded_hashes;

inline bool initialized() const {
return dummy_root != nullptr;
}
};

struct TaskMeta {
std::unordered_set<SNode *> input_snodes, output_snodes;
std::unordered_set<SNode *> activation_snodes;
};

// In async mode, the root of an AST is an OffloadedStmt instead of a Block.
// This map provides a dummy Block root for these OffloadedStmt, so that
// get_kernel() could still work correctly.
std::unordered_map<const Kernel *, std::unique_ptr<Block>>
kernel_to_dummy_roots_;
std::unordered_map<const Kernel *, KernelMeta> kernel_metas_;

std::unordered_map<std::uint64_t, TaskMeta> offloaded_metas_;
};

TLANG_NAMESPACE_END

0 comments on commit 00f1e88

Please sign in to comment.