diff --git a/taichi/backends/cpu/cpu_device.cpp b/taichi/backends/cpu/cpu_device.cpp index 20ac5bf7013d4..0b0317e0628c9 100644 --- a/taichi/backends/cpu/cpu_device.cpp +++ b/taichi/backends/cpu/cpu_device.cpp @@ -36,6 +36,19 @@ void CpuDevice::dealloc_memory(DeviceAllocation handle) { info.ptr = nullptr; } +DeviceAllocation CpuDevice::import_memory(void *ptr, size_t size) { + AllocInfo info; + info.ptr = ptr; + info.size = size; + + DeviceAllocation alloc; + alloc.alloc_id = allocations_.size(); + alloc.device = this; + + allocations_.push_back(info); + return alloc; +} + } // namespace cpu } // namespace lang diff --git a/taichi/backends/cpu/cpu_device.h b/taichi/backends/cpu/cpu_device.h index 82f88f9483c1b..3b9198adde232 100644 --- a/taichi/backends/cpu/cpu_device.h +++ b/taichi/backends/cpu/cpu_device.h @@ -97,6 +97,8 @@ class CpuDevice : public Device { void unmap(DevicePtr ptr) override{TI_NOT_IMPLEMENTED}; void unmap(DeviceAllocation alloc) override{TI_NOT_IMPLEMENTED}; + DeviceAllocation import_memory(void *ptr, size_t size); + void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override{ TI_NOT_IMPLEMENTED}; diff --git a/taichi/backends/cuda/cuda_device.cpp b/taichi/backends/cuda/cuda_device.cpp index 36040b6f1f807..65903e49aa166 100644 --- a/taichi/backends/cuda/cuda_device.cpp +++ b/taichi/backends/cuda/cuda_device.cpp @@ -40,6 +40,19 @@ void CudaDevice::dealloc_memory(DeviceAllocation handle) { info.ptr = nullptr; } +DeviceAllocation CudaDevice::import_memory(void *ptr, size_t size) { + AllocInfo info; + info.ptr = ptr; + info.size = size; + + DeviceAllocation alloc; + alloc.alloc_id = allocations_.size(); + alloc.device = this; + + allocations_.push_back(info); + return alloc; +} + } // namespace cuda } // namespace lang diff --git a/taichi/backends/cuda/cuda_device.h b/taichi/backends/cuda/cuda_device.h index 2a9c745ce50dc..c1bbe5e99b387 100644 --- a/taichi/backends/cuda/cuda_device.h +++ b/taichi/backends/cuda/cuda_device.h @@ -101,6 +101,8 @@ class CudaDevice : public Device { void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override{ TI_NOT_IMPLEMENTED}; + DeviceAllocation import_memory(void *ptr, size_t size); + Stream *get_compute_stream() override{TI_NOT_IMPLEMENTED}; private: diff --git a/taichi/backends/device.h b/taichi/backends/device.h index a87db93d22ab4..467462fdadb4d 100644 --- a/taichi/backends/device.h +++ b/taichi/backends/device.h @@ -43,7 +43,7 @@ struct DeviceAllocation { Device *device{nullptr}; uint32_t alloc_id{0}; - DevicePtr get_ptr(uint64_t offset) const; + DevicePtr get_ptr(uint64_t offset = 0) const; bool operator==(const DeviceAllocation &other) const { return other.device == device && other.alloc_id == alloc_id; diff --git a/taichi/llvm/llvm_program.cpp b/taichi/llvm/llvm_program.cpp index 92e1e7fcebb0a..b1dd3651050c4 100644 --- a/taichi/llvm/llvm_program.cpp +++ b/taichi/llvm/llvm_program.cpp @@ -166,12 +166,29 @@ void LlvmProgramImpl::initialize_llvm_runtime_snodes(const SNodeTree *tree, TI_TRACE("Allocating data structure of size {} bytes", scomp->root_size); std::size_t rounded_size = taichi::iroundup(scomp->root_size, taichi_page_size); + + Ptr root_buffer = snode_tree_buffer_manager->allocate( + runtime_jit, llvm_runtime, rounded_size, taichi_page_size, tree->id(), + result_buffer); + + DeviceAllocation alloc{kDeviceNullAllocation}; + + if (config->arch == Arch::cuda) { +#if defined(TI_WITH_CUDA) + alloc = cuda_device()->import_memory(root_buffer, rounded_size); +#else + TI_NOT_IMPLEMENTED +#endif + } else { + alloc = cpu_device()->import_memory(root_buffer, rounded_size); + } + + snode_tree_allocs_[tree->id()] = alloc; + runtime_jit->call( "runtime_initialize_snodes", llvm_runtime, scomp->root_size, root_id, - (int)snodes.size(), tree->id(), rounded_size, - snode_tree_buffer_manager->allocate(runtime_jit, llvm_runtime, - rounded_size, taichi_page_size, - tree->id(), result_buffer)); + (int)snodes.size(), tree->id(), rounded_size, root_buffer); + for (int i = 0; i < (int)snodes.size(); i++) { if (is_gc_able(snodes[i]->type)) { std::size_t node_size; @@ -527,5 +544,15 @@ cuda::CudaDevice *LlvmProgramImpl::cuda_device() { return static_cast(device_.get()); } +cpu::CpuDevice *LlvmProgramImpl::cpu_device() { + TI_ERROR_IF(!arch_is_cpu(config->arch), "arch is not cpu"); + return static_cast(device_.get()); +} + +DevicePtr LlvmProgramImpl::get_snode_tree_device_ptr(int tree_id) { + DeviceAllocation tree_alloc = snode_tree_allocs_[tree_id]; + return tree_alloc.get_ptr(); +} + } // namespace lang } // namespace taichi diff --git a/taichi/llvm/llvm_program.h b/taichi/llvm/llvm_program.h index 3557e3ba094a1..3b7aefc15218e 100644 --- a/taichi/llvm/llvm_program.h +++ b/taichi/llvm/llvm_program.h @@ -26,6 +26,10 @@ namespace cuda { class CudaDevice; } +namespace cpu { +class CpuDevice; +} + class LlvmProgramImpl : public ProgramImpl { public: LlvmProgramImpl(CompileConfig &config, KernelProfilerBase *profiler); @@ -132,6 +136,8 @@ class LlvmProgramImpl : public ProgramImpl { return device_.get(); } + DevicePtr get_snode_tree_device_ptr(int tree_id) override; + private: std::unique_ptr llvm_context_host{nullptr}; std::unique_ptr llvm_context_device{nullptr}; @@ -143,8 +149,11 @@ class LlvmProgramImpl : public ProgramImpl { DeviceAllocation preallocated_device_buffer_alloc{kDeviceNullAllocation}; + std::unordered_map snode_tree_allocs_; + std::unique_ptr device_; cuda::CudaDevice *cuda_device(); + cpu::CpuDevice *cpu_device(); }; } // namespace lang } // namespace taichi diff --git a/taichi/program/program.h b/taichi/program/program.h index 2fb5c82998ee4..f590c0607dda7 100644 --- a/taichi/program/program.h +++ b/taichi/program/program.h @@ -267,6 +267,18 @@ class Program { LlvmProgramImpl *get_llvm_program_impl(); + DevicePtr get_snode_tree_device_ptr(int tree_id) { + return program_impl_->get_snode_tree_device_ptr(tree_id); + } + + Device *get_compute_device() { + return program_impl_->get_compute_device(); + } + + Device *get_graphics_device() { + return program_impl_->get_graphics_device(); + } + private: /** * Materializes a new SNodeTree. diff --git a/taichi/program/program_impl.h b/taichi/program/program_impl.h index 0ae9183586d1a..5fab64ef62aa0 100644 --- a/taichi/program/program_impl.h +++ b/taichi/program/program_impl.h @@ -65,6 +65,10 @@ class ProgramImpl { return nullptr; } + virtual DevicePtr get_snode_tree_device_ptr(int tree_id) { + return kDeviceNullPtr; + } + virtual ~ProgramImpl() { } diff --git a/taichi/ui/common/field_info.cpp b/taichi/ui/common/field_info.cpp new file mode 100644 index 0000000000000..db84184b3b882 --- /dev/null +++ b/taichi/ui/common/field_info.cpp @@ -0,0 +1,46 @@ +#include "taichi/ui/common/field_info.h" +#include "taichi/program/program.h" + +namespace taichi { + +namespace ui { + +using namespace taichi::lang; + +DevicePtr get_device_ptr(taichi::lang::Program *program, SNode *snode) { + /* + GGUI makes the assumption that the input fields are created directly from + ti.field() or ti.Vector field with `shape` specified. In other words, we + assume that the fields are created via ti.root.dense.place() That is, the + parent of the snode is a dense, and the parent of that node is a root. Note + that, GGUI's python-side code creates a staging buffer to construct the VBO, + which obeys this assumption. Thus, the only situation where this assumption + may be violated is for set_image(), because the image isn't part of the VBO. + Using this assumption, we will compute the offset of this field relative to + the begin of the root buffer. + */ + + SNode *dense_parent = snode->parent; + SNode *root = dense_parent->parent; + + int tree_id = root->get_snode_tree_id(); + DevicePtr root_ptr = program->get_snode_tree_device_ptr(tree_id); + + size_t offset = 0; + + int child_id = root->child_id(dense_parent); + + TI_ASSERT_INFO(root == program->get_snode_root(tree_id), + "SNode roots don't match"); + + for (int i = 0; i < child_id; ++i) { + SNode *child = root->ch[i].get(); + offset += child->cell_size_bytes * child->num_cells_per_container; + } + + return root_ptr.get_ptr(offset); +} + +} // namespace ui + +} // namespace taichi diff --git a/taichi/ui/common/field_info.h b/taichi/ui/common/field_info.h index 1de960db86336..21f262492e3a2 100644 --- a/taichi/ui/common/field_info.h +++ b/taichi/ui/common/field_info.h @@ -2,6 +2,9 @@ #include "taichi/ui/utils/utils.h" #include "taichi/ir/type_utils.h" +#include "taichi/ir/snode.h" +#include "taichi/backends/device.h" +#include "taichi/program/program.h" TI_UI_NAMESPACE_BEGIN @@ -30,4 +33,7 @@ struct FieldInfo { } }; +taichi::lang::DevicePtr get_device_ptr(taichi::lang::Program *program, + taichi::lang::SNode *snode); + TI_UI_NAMESPACE_END