Skip to content

Commit

Permalink
[llvm] Establish a correspondence between SNodes and DevicePtr (taich…
Browse files Browse the repository at this point in the history
…i-dev#3120)

* correspondence

* fix mac build?

* fix max build, take 2

* resolve convo

* fix

* Update taichi/ui/common/field_info.cpp

Co-authored-by: Ye Kuang <[email protected]>

* format

* get_ptr(uint64_t offset = 0)

* fix requirements?

* fix requirements, second try

* ok wth is up with this setuptools-rust thingy

Co-authored-by: Ye Kuang <[email protected]>
  • Loading branch information
AmesingFlank and k-ye authored Oct 9, 2021
1 parent bac269e commit 434607f
Show file tree
Hide file tree
Showing 11 changed files with 139 additions and 5 deletions.
13 changes: 13 additions & 0 deletions taichi/backends/cpu/cpu_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,19 @@ void CpuDevice::dealloc_memory(DeviceAllocation handle) {
info.ptr = nullptr;
}

DeviceAllocation CpuDevice::import_memory(void *ptr, size_t size) {
AllocInfo info;
info.ptr = ptr;
info.size = size;

DeviceAllocation alloc;
alloc.alloc_id = allocations_.size();
alloc.device = this;

allocations_.push_back(info);
return alloc;
}

} // namespace cpu
} // namespace lang

Expand Down
2 changes: 2 additions & 0 deletions taichi/backends/cpu/cpu_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class CpuDevice : public Device {
void unmap(DevicePtr ptr) override{TI_NOT_IMPLEMENTED};
void unmap(DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED};

DeviceAllocation import_memory(void *ptr, size_t size);

void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override{
TI_NOT_IMPLEMENTED};

Expand Down
13 changes: 13 additions & 0 deletions taichi/backends/cuda/cuda_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,19 @@ void CudaDevice::dealloc_memory(DeviceAllocation handle) {
info.ptr = nullptr;
}

DeviceAllocation CudaDevice::import_memory(void *ptr, size_t size) {
AllocInfo info;
info.ptr = ptr;
info.size = size;

DeviceAllocation alloc;
alloc.alloc_id = allocations_.size();
alloc.device = this;

allocations_.push_back(info);
return alloc;
}

} // namespace cuda
} // namespace lang

Expand Down
2 changes: 2 additions & 0 deletions taichi/backends/cuda/cuda_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ class CudaDevice : public Device {
void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override{
TI_NOT_IMPLEMENTED};

DeviceAllocation import_memory(void *ptr, size_t size);

Stream *get_compute_stream() override{TI_NOT_IMPLEMENTED};

private:
Expand Down
2 changes: 1 addition & 1 deletion taichi/backends/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ struct DeviceAllocation {
Device *device{nullptr};
uint32_t alloc_id{0};

DevicePtr get_ptr(uint64_t offset) const;
DevicePtr get_ptr(uint64_t offset = 0) const;

bool operator==(const DeviceAllocation &other) const {
return other.device == device && other.alloc_id == alloc_id;
Expand Down
35 changes: 31 additions & 4 deletions taichi/llvm/llvm_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,29 @@ void LlvmProgramImpl::initialize_llvm_runtime_snodes(const SNodeTree *tree,
TI_TRACE("Allocating data structure of size {} bytes", scomp->root_size);
std::size_t rounded_size =
taichi::iroundup(scomp->root_size, taichi_page_size);

Ptr root_buffer = snode_tree_buffer_manager->allocate(
runtime_jit, llvm_runtime, rounded_size, taichi_page_size, tree->id(),
result_buffer);

DeviceAllocation alloc{kDeviceNullAllocation};

if (config->arch == Arch::cuda) {
#if defined(TI_WITH_CUDA)
alloc = cuda_device()->import_memory(root_buffer, rounded_size);
#else
TI_NOT_IMPLEMENTED
#endif
} else {
alloc = cpu_device()->import_memory(root_buffer, rounded_size);
}

snode_tree_allocs_[tree->id()] = alloc;

runtime_jit->call<void *, std::size_t, int, int, int, std::size_t, Ptr>(
"runtime_initialize_snodes", llvm_runtime, scomp->root_size, root_id,
(int)snodes.size(), tree->id(), rounded_size,
snode_tree_buffer_manager->allocate(runtime_jit, llvm_runtime,
rounded_size, taichi_page_size,
tree->id(), result_buffer));
(int)snodes.size(), tree->id(), rounded_size, root_buffer);

for (int i = 0; i < (int)snodes.size(); i++) {
if (is_gc_able(snodes[i]->type)) {
std::size_t node_size;
Expand Down Expand Up @@ -527,5 +544,15 @@ cuda::CudaDevice *LlvmProgramImpl::cuda_device() {
return static_cast<cuda::CudaDevice *>(device_.get());
}

cpu::CpuDevice *LlvmProgramImpl::cpu_device() {
TI_ERROR_IF(!arch_is_cpu(config->arch), "arch is not cpu");
return static_cast<cpu::CpuDevice *>(device_.get());
}

DevicePtr LlvmProgramImpl::get_snode_tree_device_ptr(int tree_id) {
DeviceAllocation tree_alloc = snode_tree_allocs_[tree_id];
return tree_alloc.get_ptr();
}

} // namespace lang
} // namespace taichi
9 changes: 9 additions & 0 deletions taichi/llvm/llvm_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ namespace cuda {
class CudaDevice;
}

namespace cpu {
class CpuDevice;
}

class LlvmProgramImpl : public ProgramImpl {
public:
LlvmProgramImpl(CompileConfig &config, KernelProfilerBase *profiler);
Expand Down Expand Up @@ -132,6 +136,8 @@ class LlvmProgramImpl : public ProgramImpl {
return device_.get();
}

DevicePtr get_snode_tree_device_ptr(int tree_id) override;

private:
std::unique_ptr<TaichiLLVMContext> llvm_context_host{nullptr};
std::unique_ptr<TaichiLLVMContext> llvm_context_device{nullptr};
Expand All @@ -143,8 +149,11 @@ class LlvmProgramImpl : public ProgramImpl {

DeviceAllocation preallocated_device_buffer_alloc{kDeviceNullAllocation};

std::unordered_map<int, DeviceAllocation> snode_tree_allocs_;

std::unique_ptr<Device> device_;
cuda::CudaDevice *cuda_device();
cpu::CpuDevice *cpu_device();
};
} // namespace lang
} // namespace taichi
12 changes: 12 additions & 0 deletions taichi/program/program.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,18 @@ class Program {

LlvmProgramImpl *get_llvm_program_impl();

DevicePtr get_snode_tree_device_ptr(int tree_id) {
return program_impl_->get_snode_tree_device_ptr(tree_id);
}

Device *get_compute_device() {
return program_impl_->get_compute_device();
}

Device *get_graphics_device() {
return program_impl_->get_graphics_device();
}

private:
/**
* Materializes a new SNodeTree.
Expand Down
4 changes: 4 additions & 0 deletions taichi/program/program_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ class ProgramImpl {
return nullptr;
}

virtual DevicePtr get_snode_tree_device_ptr(int tree_id) {
return kDeviceNullPtr;
}

virtual ~ProgramImpl() {
}

Expand Down
46 changes: 46 additions & 0 deletions taichi/ui/common/field_info.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include "taichi/ui/common/field_info.h"
#include "taichi/program/program.h"

namespace taichi {

namespace ui {

using namespace taichi::lang;

DevicePtr get_device_ptr(taichi::lang::Program *program, SNode *snode) {
/*
GGUI makes the assumption that the input fields are created directly from
ti.field() or ti.Vector field with `shape` specified. In other words, we
assume that the fields are created via ti.root.dense.place() That is, the
parent of the snode is a dense, and the parent of that node is a root. Note
that, GGUI's python-side code creates a staging buffer to construct the VBO,
which obeys this assumption. Thus, the only situation where this assumption
may be violated is for set_image(), because the image isn't part of the VBO.
Using this assumption, we will compute the offset of this field relative to
the begin of the root buffer.
*/

SNode *dense_parent = snode->parent;
SNode *root = dense_parent->parent;

int tree_id = root->get_snode_tree_id();
DevicePtr root_ptr = program->get_snode_tree_device_ptr(tree_id);

size_t offset = 0;

int child_id = root->child_id(dense_parent);

TI_ASSERT_INFO(root == program->get_snode_root(tree_id),
"SNode roots don't match");

for (int i = 0; i < child_id; ++i) {
SNode *child = root->ch[i].get();
offset += child->cell_size_bytes * child->num_cells_per_container;
}

return root_ptr.get_ptr(offset);
}

} // namespace ui

} // namespace taichi
6 changes: 6 additions & 0 deletions taichi/ui/common/field_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#include "taichi/ui/utils/utils.h"

#include "taichi/ir/type_utils.h"
#include "taichi/ir/snode.h"
#include "taichi/backends/device.h"
#include "taichi/program/program.h"

TI_UI_NAMESPACE_BEGIN

Expand Down Expand Up @@ -30,4 +33,7 @@ struct FieldInfo {
}
};

taichi::lang::DevicePtr get_device_ptr(taichi::lang::Program *program,
taichi::lang::SNode *snode);

TI_UI_NAMESPACE_END

0 comments on commit 434607f

Please sign in to comment.