From 0d7168fb5be3ded16c804d801e8b5fff0a191084 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Thu, 7 Jul 2022 22:08:12 -0700 Subject: [PATCH] [llvm] Drop code for llvm 15. (#5313) * Drop code for llvm 15 Still use clang10 for COMPILE_LLVM_RUNTIME. Most changes are related to the opaque ptr change in llvm 15 which requires type when create Load and GEP. It would be nice to generate the type from taichi type. Known issue. Still some getPointerElementType use which is Deprecated for llvm 15. Also some cases use hack to get llvm type for Load/GEP. Only tested python tests/run_tests.py -v -t3 -a cpu -s on windows. 2 crashes crashes: fp16 crash when hw not support fp16 for vulkan. element_wise crashed in llvm pass, need more time to debug. --- CMakeLists.txt | 5 +- cmake/TaichiCore.cmake | 5 + taichi/codegen/cpu/codegen_cpu.cpp | 20 +- taichi/codegen/cuda/codegen_cuda.cpp | 55 +++- taichi/codegen/llvm/codegen_llvm.cpp | 301 ++++++++++++++++++--- taichi/codegen/llvm/codegen_llvm.h | 5 +- taichi/codegen/llvm/codegen_llvm_quant.cpp | 29 +- taichi/codegen/llvm/llvm_codegen_utils.cpp | 8 +- taichi/codegen/llvm/llvm_codegen_utils.h | 17 +- taichi/codegen/llvm/struct_llvm.cpp | 15 +- taichi/codegen/wasm/codegen_wasm.cpp | 9 +- taichi/runtime/cpu/jit_cpu.cpp | 62 ++++- taichi/runtime/cuda/jit_cuda.cpp | 10 +- taichi/runtime/cuda/jit_cuda.h | 4 + taichi/runtime/llvm/llvm_context.cpp | 32 ++- 15 files changed, 498 insertions(+), 79 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 55fcf2fe2faae..69d5ed5d6b845 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,7 +80,10 @@ if (WIN32) # # FIXME: (penguinliong) This is fixed in later releases of LLVM so maybe # someday we can distribute `Debug` libraries, if it's ever needed. - SET(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreadedDLL) + if (NOT TI_LLVM_15) + SET(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreadedDLL) + endif() + message("CMAKE_MSVC_RUNTIME_LIBRARY: ${CMAKE_MSVC_RUNTIME_LIBRARY}") endif() # No support of Python for Android build; or in any case taichi is integrated diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake index edc0c5394078e..50ab405060fb8 100644 --- a/cmake/TaichiCore.cmake +++ b/cmake/TaichiCore.cmake @@ -1,5 +1,6 @@ option(USE_STDCPP "Use -stdlib=libc++" OFF) option(TI_WITH_LLVM "Build with LLVM backends" ON) +option(TI_LLVM_15 "Switch to LLVM 15" OFF) option(TI_WITH_METAL "Build with the Metal backend" ON) option(TI_WITH_CUDA "Build with the CUDA backend" ON) option(TI_WITH_CUDA_TOOLKIT "Build with the CUDA toolkit" OFF) @@ -142,6 +143,10 @@ else() list(REMOVE_ITEM TAICHI_CORE_SOURCE ${TAICHI_LLVM_SOURCE}) endif() +if (TI_LLVM_15) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_LLVM_15") +endif() + if (TI_WITH_CUDA) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_WITH_CUDA") file(GLOB TAICHI_CUDA_RUNTIME_SOURCE "taichi/runtime/cuda/runtime.cpp") diff --git a/taichi/codegen/cpu/codegen_cpu.cpp b/taichi/codegen/cpu/codegen_cpu.cpp index 79a62a6b093aa..1da58dcceb13b 100644 --- a/taichi/codegen/cpu/codegen_cpu.cpp +++ b/taichi/codegen/cpu/codegen_cpu.cpp @@ -107,8 +107,14 @@ class CodeGenLLVMCPU : public CodeGenLLVM { { builder->SetInsertPoint(loop_test_bb); +#ifdef TI_LLVM_15 + auto *loop_index_load = + builder->CreateLoad(builder->getInt32Ty(), loop_index); +#else + auto *loop_index_load = builder->CreateLoad(loop_index); +#endif auto cond = builder->CreateICmp( - llvm::CmpInst::Predicate::ICMP_SLT, builder->CreateLoad(loop_index), + llvm::CmpInst::Predicate::ICMP_SLT, loop_index_load, llvm_val[stmt->owned_num_local.find(stmt->major_from_type) ->second]); builder->CreateCondBr(cond, loop_body_bb, func_exit); @@ -121,9 +127,15 @@ class CodeGenLLVMCPU : public CodeGenLLVM { auto &s = stmt->body->statements[i]; s->accept(this); } - builder->CreateStore(builder->CreateAdd(builder->CreateLoad(loop_index), - tlctx->get_constant(1)), - loop_index); +#ifdef TI_LLVM_15 + auto *loop_index_load = + builder->CreateLoad(builder->getInt32Ty(), loop_index); +#else + auto *loop_index_load = builder->CreateLoad(loop_index); +#endif + builder->CreateStore( + builder->CreateAdd(loop_index_load, tlctx->get_constant(1)), + loop_index); builder->CreateBr(loop_test_bb); builder->SetInsertPoint(func_exit); } diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp index 063679a7068be..836bc0e243756 100644 --- a/taichi/codegen/cuda/codegen_cuda.cpp +++ b/taichi/codegen/cuda/codegen_cuda.cpp @@ -65,6 +65,9 @@ class CodeGenLLVMCUDA : public CodeGenLLVM { auto value_arr = builder->CreateAlloca(stype); for (int i = 0; i < values.size(); i++) { auto value_ptr = builder->CreateGEP( +#ifdef TI_LLVM_15 + stype, +#endif value_arr, {tlctx->get_constant(0), tlctx->get_constant(i)}); builder->CreateStore(values[i], value_ptr); } @@ -324,8 +327,11 @@ class CodeGenLLVMCUDA : public CodeGenLLVM { // Use the value from the memory that atomicCAS operates on to initialize // cas_old_output. - llvm::Value *cas_old_output = - builder->CreateLoad(atomic_memory_address, "cas_old_output"); + llvm::Value *cas_old_output = builder->CreateLoad( +#ifdef TI_LLVM_15 + atomic_type, +#endif + atomic_memory_address, "cas_old_output"); builder->CreateStore(cas_old_output, cas_old_output_address); llvm::BasicBlock *loop_body_bb = @@ -338,21 +344,35 @@ class CodeGenLLVMCUDA : public CodeGenLLVM { // loop body for one atomicCAS { // Use cas_old_output to initialize cas_new_output. - cas_old_output = - builder->CreateLoad(cas_old_output_address, "cas_old_output"); + cas_old_output = builder->CreateLoad( +#ifdef TI_LLVM_15 + atomic_type, +#endif + cas_old_output_address, "cas_old_output"); builder->CreateStore(cas_old_output, cas_new_output_address); - auto binop_output = op(builder->CreateLoad(binop_output_address), val); + auto binop_output = op(builder->CreateLoad( +#ifdef TI_LLVM_15 + atomic_type, +#endif + binop_output_address), + val); builder->CreateStore(binop_output, binop_output_address); - llvm::Value *cas_new_output = - builder->CreateLoad(cas_new_output_address, "cas_new_output"); + llvm::Value *cas_new_output = builder->CreateLoad( +#ifdef TI_LLVM_15 + atomic_type, +#endif + cas_new_output_address, "cas_new_output"); // Emit code to perform the atomicCAS operation // (cas_old_output, success) = atomicCAS(memory_address, cas_old_output, // cas_new_output); llvm::Value *ret_value = builder->CreateAtomicCmpXchg( atomic_memory_address, cas_old_output, cas_new_output, +#ifdef TI_LLVM_15 + llvm::MaybeAlign(0), +#endif llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering::SequentiallyConsistent); @@ -427,8 +447,8 @@ class CodeGenLLVMCUDA : public CodeGenLLVM { llvm::BasicBlock::Create(*llvm_context, "loop_body", func); auto func_exit = llvm::BasicBlock::Create(*llvm_context, "func_exit", func); - auto loop_index = - create_entry_block_alloca(llvm::Type::getInt32Ty(*llvm_context)); + auto i32_ty = llvm::Type::getInt32Ty(*llvm_context); + auto loop_index = create_entry_block_alloca(i32_ty); llvm::Value *thread_idx = builder->CreateIntrinsic(Intrinsic::nvvm_read_ptx_sreg_tid_x, {}, {}); llvm::Value *block_dim = builder->CreateIntrinsic( @@ -439,7 +459,12 @@ class CodeGenLLVMCUDA : public CodeGenLLVM { { builder->SetInsertPoint(loop_test_bb); auto cond = builder->CreateICmp( - llvm::CmpInst::Predicate::ICMP_SLT, builder->CreateLoad(loop_index), + llvm::CmpInst::Predicate::ICMP_SLT, + builder->CreateLoad( +#ifdef TI_LLVM_15 + i32_ty, +#endif + loop_index), llvm_val[stmt->owned_num_local.find(stmt->major_from_type) ->second]); builder->CreateCondBr(cond, loop_body_bb, func_exit); @@ -452,9 +477,13 @@ class CodeGenLLVMCUDA : public CodeGenLLVM { auto &s = stmt->body->statements[i]; s->accept(this); } - builder->CreateStore( - builder->CreateAdd(builder->CreateLoad(loop_index), block_dim), - loop_index); + builder->CreateStore(builder->CreateAdd(builder->CreateLoad( +#ifdef TI_LLVM_15 + i32_ty, +#endif + loop_index), + block_dim), + loop_index); builder->CreateBr(loop_test_bb); builder->SetInsertPoint(func_exit); } diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp index 8ddc91261ee75..8afff0a2383f7 100644 --- a/taichi/codegen/llvm/codegen_llvm.cpp +++ b/taichi/codegen/llvm/codegen_llvm.cpp @@ -927,7 +927,22 @@ void CodeGenLLVM::emit_gc(OffloadedStmt *stmt) { llvm::Value *CodeGenLLVM::create_call(llvm::Value *func, llvm::ArrayRef args) { check_func_call_signature(func, args); +#ifdef TI_LLVM_15 + llvm::FunctionType *func_ty = nullptr; + if (auto *fn = llvm::dyn_cast(func)) { + func_ty = fn->getFunctionType(); + } else if (auto *fn_ptr = llvm::dyn_cast(func)) { + auto *fn_ret_ty = fn_ptr->getCalledFunction()->getReturnType(); + fn_ret_ty = fn_ret_ty->getPointerElementType(); + func_ty = llvm::cast(fn_ret_ty); + } else if (auto *fn_global = llvm::dyn_cast(func)) { + func_ty = llvm::cast(fn_global->getValueType()); + } + TI_ASSERT(func_ty); + return builder->CreateCall(func_ty, func, args); +#else return builder->CreateCall(func, args); +#endif } llvm::Value *CodeGenLLVM::create_call(std::string func_name, @@ -937,8 +952,12 @@ llvm::Value *CodeGenLLVM::create_call(std::string func_name, } void CodeGenLLVM::create_increment(llvm::Value *ptr, llvm::Value *value) { - builder->CreateStore(builder->CreateAdd(builder->CreateLoad(ptr), value), - ptr); + auto original_value = builder->CreateLoad( +#ifdef TI_LLVM_15 + value->getType(), +#endif + ptr); + builder->CreateStore(builder->CreateAdd(original_value, value), ptr); } void CodeGenLLVM::create_naive_range_for(RangeForStmt *for_stmt) { @@ -950,6 +969,10 @@ void CodeGenLLVM::create_naive_range_for(RangeForStmt *for_stmt) { BasicBlock *loop_test = BasicBlock::Create(*llvm_context, "for_loop_test", func); +#ifdef TI_LLVM_15 + auto loop_var_ty = llvm_type(PrimitiveType::i32); +#endif + auto loop_var = create_entry_block_alloca(PrimitiveType::i32); loop_vars_llvm[for_stmt].push_back(loop_var); @@ -968,11 +991,19 @@ void CodeGenLLVM::create_naive_range_for(RangeForStmt *for_stmt) { llvm::Value *cond; if (!for_stmt->reversed) { cond = builder->CreateICmp(llvm::CmpInst::Predicate::ICMP_SLT, - builder->CreateLoad(loop_var), + builder->CreateLoad( +#ifdef TI_LLVM_15 + loop_var_ty, +#endif + loop_var), llvm_val[for_stmt->end]); } else { cond = builder->CreateICmp(llvm::CmpInst::Predicate::ICMP_SGE, - builder->CreateLoad(loop_var), + builder->CreateLoad( +#ifdef TI_LLVM_15 + loop_var_ty, +#endif + loop_var), llvm_val[for_stmt->begin]); } builder->CreateCondBr(cond, body, after_loop); @@ -1097,7 +1128,20 @@ void CodeGenLLVM::visit(ReturnStmt *stmt) { void CodeGenLLVM::visit(LocalLoadStmt *stmt) { TI_ASSERT(stmt->width() == 1); +#ifdef TI_LLVM_15 + // FIXME: get ptr_ty from taichi instead of llvm. + llvm::Type *ptr_ty = nullptr; + auto *val = llvm_val[stmt->src[0].var]; + if (auto *alloc = llvm::dyn_cast(val)) + ptr_ty = alloc->getAllocatedType(); + if (!ptr_ty && stmt->src[0].var->element_type().is_pointer()) { + ptr_ty = llvm_type(stmt->src[0].var->element_type().ptr_removed()); + } + TI_ASSERT(ptr_ty); + llvm_val[stmt] = builder->CreateLoad(ptr_ty, llvm_val[stmt->src[0].var]); +#else llvm_val[stmt] = builder->CreateLoad(llvm_val[stmt->src[0].var]); +#endif } void CodeGenLLVM::visit(LocalStoreStmt *stmt) { @@ -1133,12 +1177,19 @@ void CodeGenLLVM::visit(AssertStmt *stmt) { // Finally store the int64 value to the argument buffer: builder->CreateStore( - cast_int64, builder->CreateGEP(arguments, {tlctx->get_constant(0), - tlctx->get_constant(i)})); + cast_int64, + builder->CreateGEP( +#ifdef TI_LLVM_15 + argument_buffer_size, +#endif + arguments, {tlctx->get_constant(0), tlctx->get_constant(i)})); } args.emplace_back(tlctx->get_constant((int)stmt->args.size())); args.emplace_back(builder->CreateGEP( +#ifdef TI_LLVM_15 + argument_buffer_size, +#endif arguments, {tlctx->get_constant(0), tlctx->get_constant(0)})); llvm_val[stmt] = create_call("taichi_assert_format", args); @@ -1213,6 +1264,9 @@ llvm::Value *CodeGenLLVM::integral_type_atomic(AtomicOpStmt *stmt) { TI_ASSERT(bin_op.find(stmt->op_type) != bin_op.end()); return builder->CreateAtomicRMW(bin_op.at(stmt->op_type), llvm_val[stmt->dest], llvm_val[stmt->val], +#ifdef TI_LLVM_15 + llvm::MaybeAlign(0), +#endif llvm::AtomicOrdering::SequentiallyConsistent); } @@ -1231,7 +1285,11 @@ llvm::Value *CodeGenLLVM::atomic_op_using_cas( llvm::Value *old_val; { - old_val = builder->CreateLoad(dest); + old_val = builder->CreateLoad( +#ifdef TI_LLVM_15 + val->getType(), +#endif + dest); auto new_val = op(old_val, val); dest = builder->CreateBitCast(dest, llvm::Type::getInt16PtrTy(*llvm_context)); @@ -1239,6 +1297,9 @@ llvm::Value *CodeGenLLVM::atomic_op_using_cas( dest, builder->CreateBitCast(old_val, llvm::Type::getInt16Ty(*llvm_context)), builder->CreateBitCast(new_val, llvm::Type::getInt16Ty(*llvm_context)), +#ifdef TI_LLVM_15 + llvm::MaybeAlign(0), +#endif AtomicOrdering::SequentiallyConsistent, AtomicOrdering::SequentiallyConsistent); // Check whether CAS was succussful @@ -1280,6 +1341,9 @@ llvm::Value *CodeGenLLVM::real_type_atomic(AtomicOpStmt *stmt) { if (op == AtomicOpType::add) { return builder->CreateAtomicRMW( llvm::AtomicRMWInst::FAdd, llvm_val[stmt->dest], llvm_val[stmt->val], +#ifdef TI_LLVM_15 + llvm::MaybeAlign(0), +#endif llvm::AtomicOrdering::SequentiallyConsistent); } @@ -1355,7 +1419,9 @@ void CodeGenLLVM::visit(GlobalLoadStmt *stmt) { if (ptr_type->is_bit_pointer()) { auto val_type = ptr_type->get_pointee_type(); if (auto qit = val_type->cast()) { - llvm_val[stmt] = load_quant_int(llvm_val[stmt->src], qit); + llvm_val[stmt] = load_quant_int( + llvm_val[stmt->src], qit, + stmt->src->as()->input_snode->physical_type); } else { TI_ASSERT(val_type->is() || val_type->is()); @@ -1479,20 +1545,45 @@ llvm::Value *CodeGenLLVM::create_bit_ptr(llvm::Value *byte_ptr, // 3. store `byte_ptr` builder->CreateStore( byte_ptr, builder->CreateGEP( +#ifdef TI_LLVM_15 + struct_type, +#endif bit_ptr, {tlctx->get_constant(0), tlctx->get_constant(0)})); // 4. store `bit_offset - builder->CreateStore(bit_offset, - builder->CreateGEP(bit_ptr, {tlctx->get_constant(0), - tlctx->get_constant(1)})); + builder->CreateStore( + bit_offset, + builder->CreateGEP( +#ifdef TI_LLVM_15 + struct_type, +#endif + bit_ptr, {tlctx->get_constant(0), tlctx->get_constant(1)})); return bit_ptr; } std::tuple CodeGenLLVM::load_bit_ptr( llvm::Value *bit_ptr) { +#ifdef TI_LLVM_15 + // FIXME: get ptr_ty from taichi instead of llvm. + llvm::Type *ptr_ty = nullptr; + if (auto *AI = llvm::dyn_cast(bit_ptr)) + ptr_ty = AI->getAllocatedType(); + TI_ASSERT(ptr_ty); + auto *struct_ty = llvm::cast(ptr_ty); + auto byte_ptr = builder->CreateLoad( + struct_ty->getElementType(0), + builder->CreateGEP(ptr_ty, bit_ptr, + {tlctx->get_constant(0), tlctx->get_constant(0)})); + auto bit_offset = builder->CreateLoad( + struct_ty->getElementType(1), + builder->CreateGEP(ptr_ty, bit_ptr, + {tlctx->get_constant(0), tlctx->get_constant(1)})); +#else auto byte_ptr = builder->CreateLoad(builder->CreateGEP( bit_ptr, {tlctx->get_constant(0), tlctx->get_constant(0)})); auto bit_offset = builder->CreateLoad(builder->CreateGEP( bit_ptr, {tlctx->get_constant(0), tlctx->get_constant(1)})); +#endif + return std::make_tuple(byte_ptr, bit_offset); } @@ -1510,7 +1601,19 @@ void CodeGenLLVM::visit(SNodeLookupStmt *stmt) { TI_ASSERT(parent); auto snode = stmt->snode; if (snode->type == SNodeType::root) { +#ifdef TI_LLVM_15 + // FIXME: get parent_type from taichi instead of llvm. + llvm::Type *parent_ty = builder->getInt8Ty(); + if (auto bit_cast = llvm::dyn_cast(parent)) { + parent_ty = bit_cast->getDestTy(); + if (auto ptr_ty = llvm::dyn_cast(parent_ty)) + parent_ty = ptr_ty->getPointerElementType(); + } + llvm_val[stmt] = + builder->CreateGEP(parent_ty, parent, llvm_val[stmt->input_index]); +#else llvm_val[stmt] = builder->CreateGEP(parent, llvm_val[stmt->input_index]); +#endif } else if (snode->type == SNodeType::dense || snode->type == SNodeType::pointer || snode->type == SNodeType::dynamic || @@ -1558,8 +1661,31 @@ void CodeGenLLVM::visit(GetChStmt *stmt) { void CodeGenLLVM::visit(PtrOffsetStmt *stmt) { if (stmt->is_local_ptr()) { +#ifdef TI_LLVM_15 + // FIXME: get ptr_ty from taichi instead of llvm. + llvm::Type *ptr_ty = nullptr; + auto *val = llvm_val[stmt->origin]; + if (auto *alloc = llvm::dyn_cast(val)) + ptr_ty = alloc->getAllocatedType(); + else if (auto *gv = llvm::dyn_cast(val)) + ptr_ty = gv->getValueType(); + else if (stmt->origin->is()) { + auto *tmpo_stmt = stmt->origin->cast(); + if (tmpo_stmt->ret_type->is()) { + ptr_ty = tlctx->get_data_type( + tmpo_stmt->ret_type->cast()->get_element_type()); + } else { + ptr_ty = tlctx->get_data_type(tmpo_stmt->ret_type.ptr_removed()); + } + } + TI_ASSERT(ptr_ty); + + llvm_val[stmt] = builder->CreateGEP(ptr_ty, llvm_val[stmt->origin], + llvm_val[stmt->offset]); +#else llvm_val[stmt] = builder->CreateGEP(llvm_val[stmt->origin], llvm_val[stmt->offset]); +#endif } else { auto origin_address = builder->CreatePtrToInt( llvm_val[stmt->origin], llvm::Type::getInt64Ty(*llvm_context)); @@ -1594,9 +1720,9 @@ void CodeGenLLVM::visit(ExternalPtrStmt *stmt) { } auto dt = stmt->ret_type.ptr_removed(); - auto base = builder->CreateBitCast( - llvm_val[stmt->base_ptrs[0]], - llvm::PointerType::get(tlctx->get_data_type(dt), 0)); + auto base_ty = tlctx->get_data_type(dt); + auto base = builder->CreateBitCast(llvm_val[stmt->base_ptrs[0]], + llvm::PointerType::get(base_ty, 0)); auto linear_index = tlctx->get_constant(0); size_t size_var_index = 0; @@ -1612,7 +1738,11 @@ void CodeGenLLVM::visit(ExternalPtrStmt *stmt) { linear_index = builder->CreateAdd(linear_index, llvm_val[stmt->indices[i]]); } TI_ASSERT(size_var_index == num_indices - element_shape.size()) - llvm_val[stmt] = builder->CreateGEP(base, linear_index); + llvm_val[stmt] = builder->CreateGEP( +#ifdef TI_LLVM_15 + base_ty, +#endif + base, linear_index); } void CodeGenLLVM::visit(ExternalTensorShapeAlongAxisStmt *stmt) { @@ -1691,7 +1821,11 @@ std::tuple CodeGenLLVM::get_range_for_bounds( stmt->begin_offset, TypeFactory::create_vector_or_scalar_type(1, PrimitiveType::i32)); begin_stmt->accept(this); - begin = builder->CreateLoad(llvm_val[begin_stmt.get()]); + begin = builder->CreateLoad( +#ifdef TI_LLVM_15 + llvm_type(PrimitiveType::i32), +#endif + llvm_val[begin_stmt.get()]); } if (stmt->const_end) { end = tlctx->get_constant(stmt->end_value); @@ -1700,7 +1834,11 @@ std::tuple CodeGenLLVM::get_range_for_bounds( stmt->end_offset, TypeFactory::create_vector_or_scalar_type(1, PrimitiveType::i32)); end_stmt->accept(this); - end = builder->CreateLoad(llvm_val[end_stmt.get()]); + end = builder->CreateLoad( +#ifdef TI_LLVM_15 + llvm_type(PrimitiveType::i32), +#endif + llvm_val[end_stmt.get()]); } return std::tuple(begin, end); } @@ -1773,9 +1911,8 @@ void CodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt, bool spmd) { * tls_epilogue() * return */ - - auto loop_index = - create_entry_block_alloca(llvm::Type::getInt32Ty(*llvm_context)); + auto loop_index_ty = llvm::Type::getInt32Ty(*llvm_context); + auto loop_index = create_entry_block_alloca(loop_index_ty); RuntimeObject element("Element", this, builder.get(), get_arg(2)); @@ -1839,9 +1976,13 @@ void CodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt, bool spmd) { // goto func_exit builder->SetInsertPoint(loop_test_bb); - auto cond = - builder->CreateICmp(llvm::CmpInst::Predicate::ICMP_SLT, - builder->CreateLoad(loop_index), upper_bound); + auto cond = builder->CreateICmp(llvm::CmpInst::Predicate::ICMP_SLT, + builder->CreateLoad( +#ifdef TI_LLVM_15 + loop_index_ty, +#endif + loop_index), + upper_bound); builder->CreateCondBr(cond, loop_body_bb, func_exit); } @@ -1853,7 +1994,11 @@ void CodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt, bool spmd) { auto new_coordinates = create_entry_block_alloca(physical_coordinate_ty); create_call(refine, {parent_coordinates, new_coordinates, - builder->CreateLoad(loop_index)}); + builder->CreateLoad( +#ifdef TI_LLVM_15 + loop_index_ty, +#endif + loop_index)}); // For a bit-vectorized loop over a quant array, one more refine step is // needed to make final coordinates non-consecutive, since each thread will @@ -1894,7 +2039,11 @@ void CodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt, bool spmd) { leaf_block->type == SNodeType::pointer) { // test whether the current voxel is active or not auto is_active = call(leaf_block, element.get("element"), "is_active", - {builder->CreateLoad(loop_index)}); + {builder->CreateLoad( +#ifdef TI_LLVM_15 + loop_index_ty, +#endif + loop_index)}); is_active = builder->CreateTrunc(is_active, llvm::Type::getInt1Ty(*llvm_context)); exec_cond = builder->CreateAnd(exec_cond, is_active); @@ -1960,7 +2109,13 @@ void CodeGenLLVM::create_offload_struct_for(OffloadedStmt *stmt, bool spmd) { for (auto &bb : *patched_struct_for_func) { for (llvm::Instruction &inst : bb) { auto alloca = llvm::dyn_cast(&inst); - if (!alloca || alloca->getAlignment() != 8) + if (!alloca || +#ifdef TI_LLVM_15 + alloca->getAlign().value() != 8 +#else + alloca->getAlignment() != 8 +#endif + ) continue; auto alloca_type = alloca->getAllocatedType(); auto char_type = llvm::Type::getInt8Ty(*llvm_context); @@ -1998,12 +2153,32 @@ void CodeGenLLVM::visit(LoopIndexStmt *stmt) { if (stmt->loop->is() && stmt->loop->as()->task_type == OffloadedStmt::TaskType::struct_for) { +#ifdef TI_LLVM_15 + llvm::Type *struct_ty = nullptr; + // FIXME: get struct_ty from taichi instead of llvm. + if (auto *alloca = llvm::dyn_cast(current_coordinates)) { + struct_ty = alloca->getAllocatedType(); + } + TI_ASSERT(struct_ty); + auto *GEP = + builder->CreateGEP(struct_ty, current_coordinates, + {tlctx->get_constant(0), tlctx->get_constant(0), + tlctx->get_constant(stmt->index)}); + if (stmt->index == 0 && !llvm::isa(GEP)) + GEP = builder->CreateBitCast(GEP, struct_ty->getPointerTo()); + llvm_val[stmt] = + builder->CreateLoad(llvm::Type::getInt32Ty(*llvm_context), GEP); +#else llvm_val[stmt] = builder->CreateLoad(builder->CreateGEP( current_coordinates, {tlctx->get_constant(0), tlctx->get_constant(0), tlctx->get_constant(stmt->index)})); +#endif } else { - llvm_val[stmt] = - builder->CreateLoad(loop_vars_llvm[stmt->loop][stmt->index]); + llvm_val[stmt] = builder->CreateLoad( +#ifdef TI_LLVM_15 + llvm::Type::getInt32Ty(*llvm_context), +#endif + loop_vars_llvm[stmt->loop][stmt->index]); } } @@ -2024,10 +2199,30 @@ void CodeGenLLVM::visit(BlockCornerIndexStmt *stmt) { stmt->loop->as()->task_type == OffloadedStmt::TaskType::struct_for) { TI_ASSERT(block_corner_coordinates); +#ifdef TI_LLVM_15 + // Make sure physical_coordinate_ty matches + // struct PhysicalCoordinates { + // i32 val[taichi_max_num_indices]; + // }; + TI_ASSERT(physical_coordinate_ty->isStructTy()); + auto physical_coordinate_ty_as_struct = + llvm::cast(physical_coordinate_ty); + TI_ASSERT(physical_coordinate_ty_as_struct); + TI_ASSERT(physical_coordinate_ty_as_struct->getNumElements() == 1); + auto val_ty = physical_coordinate_ty_as_struct->getElementType(0); + TI_ASSERT(val_ty->isArrayTy()); + auto val_ty_as_array = llvm::cast(val_ty); + llvm_val[stmt] = builder->CreateLoad( + val_ty_as_array->getElementType(), + builder->CreateGEP(physical_coordinate_ty, block_corner_coordinates, + {tlctx->get_constant(0), tlctx->get_constant(0), + tlctx->get_constant(stmt->index)})); +#else llvm_val[stmt] = builder->CreateLoad( builder->CreateGEP(block_corner_coordinates, {tlctx->get_constant(0), tlctx->get_constant(0), tlctx->get_constant(stmt->index)})); +#endif } else { TI_NOT_IMPLEMENTED; } @@ -2055,7 +2250,11 @@ void CodeGenLLVM::visit(GlobalTemporaryStmt *stmt) { void CodeGenLLVM::visit(ThreadLocalPtrStmt *stmt) { auto base = get_tls_base_ptr(); TI_ASSERT(stmt->width() == 1); - auto ptr = builder->CreateGEP(base, tlctx->get_constant(stmt->offset)); + auto ptr = builder->CreateGEP( +#ifdef TI_LLVM_15 + llvm::Type::getInt8Ty(*llvm_context), +#endif + base, tlctx->get_constant(stmt->offset)); auto ptr_type = llvm::PointerType::get( tlctx->get_data_type(stmt->ret_type.ptr_removed()), 0); llvm_val[stmt] = builder->CreatePointerCast(ptr, ptr_type); @@ -2066,6 +2265,9 @@ void CodeGenLLVM::visit(BlockLocalPtrStmt *stmt) { auto base = bls_buffer; TI_ASSERT(stmt->width() == 1); auto ptr = builder->CreateGEP( +#ifdef TI_LLVM_15 + base->getValueType(), +#endif base, {tlctx->get_constant(0), llvm_val[stmt->offset]}); auto ptr_type = llvm::PointerType::get( tlctx->get_data_type(stmt->ret_type.ptr_removed()), 0); @@ -2124,29 +2326,42 @@ void CodeGenLLVM::visit(AdStackLoadTopStmt *stmt) { auto stack = stmt->stack->as(); auto primal_ptr = call("stack_top_primal", llvm_val[stack], tlctx->get_constant(stack->element_size_in_bytes())); - primal_ptr = builder->CreateBitCast( - primal_ptr, - llvm::PointerType::get(tlctx->get_data_type(stmt->ret_type), 0)); - llvm_val[stmt] = builder->CreateLoad(primal_ptr); + auto primal_ty = tlctx->get_data_type(stmt->ret_type); + primal_ptr = + builder->CreateBitCast(primal_ptr, llvm::PointerType::get(primal_ty, 0)); + llvm_val[stmt] = builder->CreateLoad( +#ifdef TI_LLVM_15 + primal_ty, +#endif + primal_ptr); } void CodeGenLLVM::visit(AdStackLoadTopAdjStmt *stmt) { auto stack = stmt->stack->as(); auto adjoint = call("stack_top_adjoint", llvm_val[stack], tlctx->get_constant(stack->element_size_in_bytes())); - adjoint = builder->CreateBitCast( - adjoint, llvm::PointerType::get(tlctx->get_data_type(stmt->ret_type), 0)); - llvm_val[stmt] = builder->CreateLoad(adjoint); + auto adjoint_ty = tlctx->get_data_type(stmt->ret_type); + adjoint = + builder->CreateBitCast(adjoint, llvm::PointerType::get(adjoint_ty, 0)); + llvm_val[stmt] = builder->CreateLoad( +#ifdef TI_LLVM_15 + adjoint_ty, +#endif + adjoint); } void CodeGenLLVM::visit(AdStackAccAdjointStmt *stmt) { auto stack = stmt->stack->as(); auto adjoint_ptr = call("stack_top_adjoint", llvm_val[stack], tlctx->get_constant(stack->element_size_in_bytes())); - adjoint_ptr = builder->CreateBitCast( - adjoint_ptr, - llvm::PointerType::get(tlctx->get_data_type(stack->ret_type), 0)); - auto old_val = builder->CreateLoad(adjoint_ptr); + auto adjoint_ty = tlctx->get_data_type(stack->ret_type); + adjoint_ptr = builder->CreateBitCast(adjoint_ptr, + llvm::PointerType::get(adjoint_ty, 0)); + auto old_val = builder->CreateLoad( +#ifdef TI_LLVM_15 + adjoint_ty, +#endif + adjoint_ptr); TI_ASSERT(is_real(stmt->v->ret_type)); auto new_val = builder->CreateFAdd(old_val, llvm_val[stmt->v]); builder->CreateStore(new_val, adjoint_ptr); @@ -2443,7 +2658,11 @@ void CodeGenLLVM::visit(FuncCallStmt *stmt) { result_buffer = builder->CreateAlloca(tlctx->get_data_type()); call("RuntimeContext_set_result_buffer", new_ctx, result_buffer); create_call(llvm_func, {new_ctx}); - auto *ret_val_u64 = builder->CreateLoad(result_buffer); + auto *ret_val_u64 = builder->CreateLoad( +#ifdef TI_LLVM_15 + builder->getInt64Ty(), +#endif + result_buffer); llvm_val[stmt] = bitcast_from_u64(ret_val_u64, stmt->ret_type); } else { create_call(llvm_func, {new_ctx}); diff --git a/taichi/codegen/llvm/codegen_llvm.h b/taichi/codegen/llvm/codegen_llvm.h index 15e98019da4cf..ca08fb19be8df 100644 --- a/taichi/codegen/llvm/codegen_llvm.h +++ b/taichi/codegen/llvm/codegen_llvm.h @@ -268,7 +268,9 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder { llvm::Value *extract_quant_float(llvm::Value *local_bit_struct, SNode *digits_snode); - llvm::Value *load_quant_int(llvm::Value *ptr, QuantIntType *qit); + llvm::Value *load_quant_int(llvm::Value *ptr, + QuantIntType *qit, + Type *physical_type); llvm::Value *extract_quant_int(llvm::Value *physical_value, llvm::Value *bit_offset, @@ -280,6 +282,7 @@ class CodeGenLLVM : public IRVisitor, public LLVMModuleBuilder { llvm::Value *load_quant_float(llvm::Value *digits_bit_ptr, llvm::Value *exponent_bit_ptr, QuantFloatType *qflt, + Type *physical_type, bool shared_exponent); llvm::Value *reconstruct_quant_float(llvm::Value *input_digits, diff --git a/taichi/codegen/llvm/codegen_llvm_quant.cpp b/taichi/codegen/llvm/codegen_llvm_quant.cpp index 1f461b2bb7549..1fd3d1ea2b1b4 100644 --- a/taichi/codegen/llvm/codegen_llvm_quant.cpp +++ b/taichi/codegen/llvm/codegen_llvm_quant.cpp @@ -295,7 +295,11 @@ void CodeGenLLVM::store_quant_floats_with_shared_exponents( auto snode = stmt->get_bit_struct_snode(); auto bit_struct_physical_type = snode->dt->as()->get_physical_type(); - auto local_bit_struct = builder->CreateLoad(llvm_val[stmt->ptr]); + auto local_bit_struct = builder->CreateLoad( +#ifdef TI_LLVM_15 + llvm_type(bit_struct_physical_type), +#endif + llvm_val[stmt->ptr]); // fuse all stores into a masked store llvm::Value *masked_val = nullptr; uint64 mask = 0; @@ -458,9 +462,15 @@ llvm::Value *CodeGenLLVM::extract_quant_float(llvm::Value *local_bit_struct, digits_snode->owns_shared_exponent); } -llvm::Value *CodeGenLLVM::load_quant_int(llvm::Value *ptr, QuantIntType *qit) { +llvm::Value *CodeGenLLVM::load_quant_int(llvm::Value *ptr, + QuantIntType *qit, + Type *physical_type) { auto [byte_ptr, bit_offset] = load_bit_ptr(ptr); - auto physical_value = builder->CreateLoad(byte_ptr); + auto physical_value = builder->CreateLoad( +#ifdef TI_LLVM_15 + llvm_type(physical_type), +#endif + byte_ptr); return extract_quant_int(physical_value, bit_offset, qit); } @@ -510,11 +520,14 @@ llvm::Value *CodeGenLLVM::reconstruct_quant_fixed(llvm::Value *digits, llvm::Value *CodeGenLLVM::load_quant_float(llvm::Value *digits_bit_ptr, llvm::Value *exponent_bit_ptr, QuantFloatType *qflt, + Type *physical_type, bool shared_exponent) { auto digits = load_quant_int(digits_bit_ptr, - qflt->get_digits_type()->as()); + qflt->get_digits_type()->as(), + physical_type); auto exponent_val = load_quant_int( - exponent_bit_ptr, qflt->get_exponent_type()->as()); + exponent_bit_ptr, qflt->get_exponent_type()->as(), + physical_type); return reconstruct_quant_float(digits, exponent_val, qflt, shared_exponent); } @@ -620,6 +633,7 @@ llvm::Value *CodeGenLLVM::reconstruct_quant_float( llvm::Value *CodeGenLLVM::load_quant_fixed_or_quant_float(Stmt *ptr_stmt) { auto ptr = ptr_stmt->as(); auto load_type = ptr->ret_type->as()->get_pointee_type(); + auto physical_type = ptr->input_snode->physical_type; if (auto qflt = load_type->cast()) { TI_ASSERT(ptr->width() == 1); auto digits_bit_ptr = llvm_val[ptr]; @@ -630,11 +644,12 @@ llvm::Value *CodeGenLLVM::load_quant_fixed_or_quant_float(Stmt *ptr_stmt) { auto exponent_bit_ptr = offset_bit_ptr( digits_bit_ptr, exponent_snode->bit_offset - digits_snode->bit_offset); return load_quant_float(digits_bit_ptr, exponent_bit_ptr, qflt, - digits_snode->owns_shared_exponent); + physical_type, digits_snode->owns_shared_exponent); } else { auto qfxt = load_type->as(); auto digits = load_quant_int(llvm_val[ptr], - qfxt->get_digits_type()->as()); + qfxt->get_digits_type()->as(), + physical_type); return reconstruct_quant_fixed(digits, qfxt); } } diff --git a/taichi/codegen/llvm/llvm_codegen_utils.cpp b/taichi/codegen/llvm/llvm_codegen_utils.cpp index da5797a0a0056..a7aed880b230b 100644 --- a/taichi/codegen/llvm/llvm_codegen_utils.cpp +++ b/taichi/codegen/llvm/llvm_codegen_utils.cpp @@ -12,7 +12,13 @@ std::string type_name(llvm::Type *type) { void check_func_call_signature(llvm::Value *func, std::vector arglist) { - auto func_type = func->getType()->getPointerElementType(); + llvm::FunctionType *func_type = nullptr; + if (llvm::Function *fn = llvm::dyn_cast(func)) { + func_type = fn->getFunctionType(); + } else if (auto *call = llvm::dyn_cast(func)) { + func_type = llvm::cast_or_null( + func->getType()->getPointerElementType()); + } int num_params = func_type->getFunctionNumParams(); if (func_type->isFunctionVarArg()) { TI_ASSERT(num_params <= arglist.size()); diff --git a/taichi/codegen/llvm/llvm_codegen_utils.h b/taichi/codegen/llvm/llvm_codegen_utils.h index 61870dc8ecf32..324f585449a1d 100644 --- a/taichi/codegen/llvm/llvm_codegen_utils.h +++ b/taichi/codegen/llvm/llvm_codegen_utils.h @@ -68,7 +68,11 @@ class LLVMModuleBuilder { builder->SetInsertPoint(entry_block); auto alloca = builder->CreateAlloca(type, (unsigned)0, array_size); if (alignment != 0) { +#ifdef TI_LLVM_15 + alloca->setAlignment(llvm::Align(alignment)); +#else alloca->setAlignment(llvm::MaybeAlign(alignment)); +#endif } return alloca; } @@ -81,7 +85,12 @@ class LLVMModuleBuilder { } llvm::Type *get_runtime_type(const std::string &name) { +#ifdef TI_LLVM_15 + auto ty = llvm::StructType::getTypeByName(module->getContext(), + ("struct." + name)); +#else auto ty = module->getTypeByName("struct." + name); +#endif if (!ty) { TI_ERROR("LLVMRuntime type {} not found.", name); } @@ -93,12 +102,18 @@ class LLVMModuleBuilder { if (!f) { TI_ERROR("LLVMRuntime function {} not found.", name); } +#ifdef TI_LLVM_15 + f->removeFnAttr(llvm::Attribute::OptimizeNone); + f->removeFnAttr(llvm::Attribute::NoInline); + f->addFnAttr(llvm::Attribute::AlwaysInline); +#else f->removeAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::OptimizeNone); f->removeAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoInline); f->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); +#endif return f; } @@ -175,7 +190,7 @@ class RuntimeObject { return builder->CreateCall(func, arglist); } - llvm::Value *get_func(const std::string &func_name) const { + llvm::Function *get_func(const std::string &func_name) const { return mb->get_runtime_function(fmt::format("{}_{}", cls_name, func_name)); } }; diff --git a/taichi/codegen/llvm/struct_llvm.cpp b/taichi/codegen/llvm/struct_llvm.cpp index 5fc18c2d1a508..09b1b16c1eb5f 100644 --- a/taichi/codegen/llvm/struct_llvm.cpp +++ b/taichi/codegen/llvm/struct_llvm.cpp @@ -141,7 +141,7 @@ void StructCompilerLLVM::generate_types(SNode &snode) { // Create a dummy function in the module with the type stub as return type // so that the type is referenced in the module - auto ft = llvm::FunctionType::get(llvm::PointerType::get(stub, 0), false); + auto ft = llvm::FunctionType::get(stub, false); create_function(ft, type_stub_name(&snode) + "_func"); } @@ -240,10 +240,18 @@ void StructCompilerLLVM::generate_child_accessors(SNode &snode) { args.push_back(&arg); } llvm::Value *ret; +#ifdef TI_LLVM_15 + ret = builder.CreateGEP(get_llvm_element_type(module.get(), parent), + builder.CreateBitCast(args[0], inp_type), + {tlctx_->get_constant(0), + tlctx_->get_constant(parent->child_id(&snode))}, + "getch"); +#else ret = builder.CreateGEP(builder.CreateBitCast(args[0], inp_type), {tlctx_->get_constant(0), tlctx_->get_constant(parent->child_id(&snode))}, "getch"); +#endif builder.CreateRet( builder.CreateBitCast(ret, llvm::Type::getInt8PtrTy(*llvm_ctx_))); @@ -293,7 +301,12 @@ llvm::Type *StructCompilerLLVM::get_stub(llvm::Module *module, uint32 index) { TI_ASSERT(module); TI_ASSERT(snode); +#ifdef TI_LLVM_15 + auto stub = llvm::StructType::getTypeByName(module->getContext(), + type_stub_name(snode)); +#else auto stub = module->getTypeByName(type_stub_name(snode)); +#endif TI_ASSERT(stub); TI_ASSERT(stub->getStructNumElements() == 4); TI_ASSERT(0 <= index && index < 4); diff --git a/taichi/codegen/wasm/codegen_wasm.cpp b/taichi/codegen/wasm/codegen_wasm.cpp index 278cea4ebf702..46c7f29df65e7 100644 --- a/taichi/codegen/wasm/codegen_wasm.cpp +++ b/taichi/codegen/wasm/codegen_wasm.cpp @@ -64,12 +64,17 @@ class CodeGenLLVMWASM : public CodeGenLLVM { // test block builder->SetInsertPoint(loop_test); llvm::Value *cond; +#ifdef TI_LLVM_15 + auto *loop_var_load = builder->CreateLoad(begin->getType(), loop_var); +#else + auto *loop_var_load = builder->CreateLoad(loop_var); +#endif if (!stmt->reversed) { cond = builder->CreateICmp(llvm::CmpInst::Predicate::ICMP_SLT, - builder->CreateLoad(loop_var), end); + loop_var_load, end); } else { cond = builder->CreateICmp(llvm::CmpInst::Predicate::ICMP_SGE, - builder->CreateLoad(loop_var), begin); + loop_var_load, begin); } builder->CreateCondBr(cond, body, after_loop); } diff --git a/taichi/runtime/cpu/jit_cpu.cpp b/taichi/runtime/cpu/jit_cpu.cpp index 067bf393f095b..807488238ee78 100644 --- a/taichi/runtime/cpu/jit_cpu.cpp +++ b/taichi/runtime/cpu/jit_cpu.cpp @@ -13,7 +13,6 @@ #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" -#include "llvm/ExecutionEngine/Orc/LambdaResolver.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" @@ -24,7 +23,6 @@ #include "llvm/IR/Verifier.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Error.h" @@ -34,6 +32,15 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/IPO.h" + +#ifdef TI_LLVM_15 +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Host.h" +#else +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#endif + #endif #include "taichi/lang_util.h" @@ -102,6 +109,33 @@ class JITSessionCPU : public JITSession { SectionMemoryManager *memory_manager_; public: +#ifdef TI_LLVM_15 + JITSessionCPU(TaichiLLVMContext *tlctx, + std::unique_ptr EPC, + CompileConfig *config, + JITTargetMachineBuilder JTMB, + DataLayout DL) + : JITSession(tlctx, config), + es_(std::move(EPC)), + object_layer_(es_, + [&]() { + auto smgr = std::make_unique(); + memory_manager_ = smgr.get(); + return smgr; + }), + compile_layer_(es_, + object_layer_, + std::make_unique(JTMB)), + dl_(DL), + mangle_(es_, this->dl_), + module_counter_(0), + memory_manager_(nullptr) { + if (JTMB.getTargetTriple().isOSBinFormatCOFF()) { + object_layer_.setOverrideObjectFlagsWithResponsibilityFlags(true); + object_layer_.setAutoClaimResponsibilityForObjectSymbols(true); + } + } +#else JITSessionCPU(TaichiLLVMContext *tlctx, CompileConfig *config, JITTargetMachineBuilder JTMB, @@ -125,11 +159,16 @@ class JITSessionCPU : public JITSession { object_layer_.setAutoClaimResponsibilityForObjectSymbols(true); } } +#endif ~JITSessionCPU() override { std::lock_guard _(mut_); if (memory_manager_) memory_manager_->deregisterEHFrames(); +#ifdef TI_LLVM_15 + if (auto Err = es_.endSession()) + es_.reportError(std::move(Err)); +#endif } DataLayout get_data_layout() override { @@ -145,7 +184,13 @@ class JITSessionCPU : public JITSession { TI_ASSERT(M); global_optimize_module_cpu(M.get()); std::lock_guard _(mut_); +#ifdef TI_LLVM_15 + auto dylib_expect = es_.createJITDylib(fmt::format("{}", module_counter_)); + TI_ASSERT(dylib_expect); + auto &dylib = dylib_expect.get(); +#else auto &dylib = es_.createJITDylib(fmt::format("{}", module_counter_)); +#endif dylib.addGenerator( cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( dl_.getGlobalPrefix()))); @@ -209,7 +254,10 @@ void JITSessionCPU::global_optimize_module_cpu(llvm::Module *module) { TI_ERROR_UNLESS(target, err_str); TargetOptions options; +#ifndef TI_LLVM_15 + // PrintMachineCode is removed in https://reviews.llvm.org/D83275. options.PrintMachineCode = false; +#endif if (this->config_->fast_math) { options.AllowFPOpFusion = FPOpFusion::Fast; options.UnsafeFPMath = 1; @@ -224,7 +272,10 @@ void JITSessionCPU::global_optimize_module_cpu(llvm::Module *module) { options.HonorSignDependentRoundingFPMathOption = false; options.NoZerosInBSS = false; options.GuaranteedTailCallOpt = false; +#ifndef TI_LLVM_15 + // StackAlignmentOverride is removed in https://reviews.llvm.org/D103048. options.StackAlignmentOverride = 0; +#endif legacy::FunctionPassManager function_pass_manager(module); legacy::PassManager module_pass_manager; @@ -286,8 +337,15 @@ std::unique_ptr create_llvm_jit_session_cpu( Arch arch) { TI_ASSERT(arch_is_cpu(arch)); auto target_info = get_host_target_info(); +#ifdef TI_LLVM_15 + auto EPC = SelfExecutorProcessControl::Create(); + TI_ASSERT(EPC); + return std::make_unique(tlctx, std::move(*EPC), config, + target_info.first, target_info.second); +#else return std::make_unique(tlctx, config, target_info.first, target_info.second); +#endif } TLANG_NAMESPACE_END diff --git a/taichi/runtime/cuda/jit_cuda.cpp b/taichi/runtime/cuda/jit_cuda.cpp index 037ba999611a5..5d2b4e2d12e31 100644 --- a/taichi/runtime/cuda/jit_cuda.cpp +++ b/taichi/runtime/cuda/jit_cuda.cpp @@ -89,9 +89,9 @@ std::string JITSessionCUDA::compile_module_to_ptx( } for (auto &f : module->globals()) - f.setName(convert(f.getName())); + f.setName(convert(f.getName().str())); for (auto &f : *module) - f.setName(convert(f.getName())); + f.setName(convert(f.getName().str())); llvm::Triple triple(module->getTargetTriple()); @@ -103,7 +103,10 @@ std::string JITSessionCUDA::compile_module_to_ptx( TI_ERROR_UNLESS(target, err_str); TargetOptions options; +#ifndef TI_LLVM_15 + // PrintMachineCode is removed in https://reviews.llvm.org/D83275. options.PrintMachineCode = 0; +#endif if (this->config_->fast_math) { options.AllowFPOpFusion = FPOpFusion::Fast; // See NVPTXISelLowering.cpp @@ -121,7 +124,10 @@ std::string JITSessionCUDA::compile_module_to_ptx( options.HonorSignDependentRoundingFPMathOption = 0; options.NoZerosInBSS = 0; options.GuaranteedTailCallOpt = 0; +#ifndef TI_LLVM_15 + // StackAlignmentOverride is removed in https://reviews.llvm.org/D103048. options.StackAlignmentOverride = 0; +#endif std::unique_ptr target_machine(target->createTargetMachine( triple.str(), CUDAContext::get_instance().get_mcpu(), cuda_mattrs(), diff --git a/taichi/runtime/cuda/jit_cuda.h b/taichi/runtime/cuda/jit_cuda.h index 58db311e22b8f..fd24976b6b26c 100644 --- a/taichi/runtime/cuda/jit_cuda.h +++ b/taichi/runtime/cuda/jit_cuda.h @@ -15,7 +15,11 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Analysis/TargetTransformInfo.h" +#ifdef TI_LLVM_15 +#include "llvm/MC/TargetRegistry.h" +#else #include "llvm/Support/TargetRegistry.h" +#endif #include "llvm/Target/TargetMachine.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" diff --git a/taichi/runtime/llvm/llvm_context.cpp b/taichi/runtime/llvm/llvm_context.cpp index da72361300de9..303a30fe97f21 100644 --- a/taichi/runtime/llvm/llvm_context.cpp +++ b/taichi/runtime/llvm/llvm_context.cpp @@ -20,7 +20,11 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/TargetSelect.h" +#ifdef TI_LLVM_15 +#include "llvm/Support/FileSystem.h" +#else #include "llvm/Support/TargetRegistry.h" +#endif #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" @@ -66,9 +70,15 @@ TaichiLLVMContext::TaichiLLVMContext(CompileConfig *config, Arch arch) main_thread_data_ = get_this_thread_data(); llvm::remove_fatal_error_handler(); llvm::install_fatal_error_handler( +#ifdef TI_LLVM_15 + [](void *user_data, const char *reason, bool gen_crash_diag) { + TI_ERROR("LLVM Fatal Error: {}", reason); + }, +#else [](void *user_data, const std::string &reason, bool gen_crash_diag) { TI_ERROR("LLVM Fatal Error: {}", reason); }, +#endif nullptr); if (arch_is_cpu(arch)) { @@ -359,8 +369,14 @@ std::unique_ptr TaichiLLVMContext::clone_module( std::vector args; for (auto &arg : func->args()) args.push_back(&arg); +#ifdef TI_LLVM_15 + builder.CreateRet(builder.CreateAtomicRMW( + op, args[0], args[1], llvm::MaybeAlign(0), + llvm::AtomicOrdering::SequentiallyConsistent)); +#else builder.CreateRet(builder.CreateAtomicRMW( op, args[0], args[1], llvm::AtomicOrdering::SequentiallyConsistent)); +#endif TaichiLLVMContext::mark_inline(func); }; @@ -467,7 +483,7 @@ void TaichiLLVMContext::link_module_with_cuda_libdevice( std::vector libdevice_function_names; for (auto &f : *libdevice_module) { if (!f.isDeclaration()) { - libdevice_function_names.push_back(f.getName()); + libdevice_function_names.push_back(f.getName().str()); } } @@ -588,12 +604,18 @@ void TaichiLLVMContext::mark_inline(llvm::Function *f) { } } } +#ifdef TI_LLVM_15 + f->removeFnAttr(llvm::Attribute::OptimizeNone); + f->removeFnAttr(llvm::Attribute::NoInline); + f->addFnAttr(llvm::Attribute::AlwaysInline); +#else f->removeAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::OptimizeNone); f->removeAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoInline); f->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); +#endif } int TaichiLLVMContext::num_instructions(llvm::Function *func) { @@ -684,7 +706,7 @@ void TaichiLLVMContext::eliminate_unused_functions( llvm::PassBuilder pb; pb.registerModuleAnalyses(ana); manager.addPass(llvm::InternalizePass([&](const GlobalValue &val) -> bool { - return export_indicator(val.getName()); + return export_indicator(val.getName().str()); })); manager.addPass(GlobalDCEPass()); manager.run(*module, ana); @@ -755,7 +777,11 @@ auto make_slim_libdevice = [](const std::vector &args) { std::error_code ec; auto output_fn = "slim_" + args[0]; +#ifdef TI_LLVM_15 + llvm::raw_fd_ostream os(output_fn, ec, llvm::sys::fs::OF_None); +#else llvm::raw_fd_ostream os(output_fn, ec, llvm::sys::fs::F_None); +#endif llvm::WriteBitcodeToFile(*libdevice_module, os); os.flush(); TI_INFO("Slimmed libdevice written to {}", output_fn); @@ -766,7 +792,7 @@ void TaichiLLVMContext::update_runtime_jit_module( if (arch_ == Arch::cuda) { for (auto &f : *module) { bool is_kernel = false; - const std::string func_name = f.getName(); + const std::string func_name = f.getName().str(); if (starts_with(func_name, "runtime_")) { mark_function_as_cuda_kernel(&f); is_kernel = true;