diff --git a/Cargo.lock b/Cargo.lock index 3404980917b..1b730bc2fa6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -972,21 +972,6 @@ dependencies = [ "serde", ] -[[package]] -name = "inkwell" -version = "0.1.0-beta.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5fe0be1e47c0c0f3da4397693e08f5d78329ae095c25d529e12ade78420fb41" -dependencies = [ - "either", - "inkwell_internals", - "libc", - "llvm-sys", - "once_cell", - "parking_lot", - "regex", -] - [[package]] name = "inkwell_internals" version = "0.3.0" @@ -2585,7 +2570,6 @@ version = "1.0.2" dependencies = [ "byteorder", "cc", - "inkwell", "itertools 0.10.0", "lazy_static", "libc", @@ -2600,6 +2584,7 @@ dependencies = [ "wasmer-compiler", "wasmer-types", "wasmer-vm", + "wasmer_inkwell", ] [[package]] @@ -2872,6 +2857,21 @@ dependencies = [ "wasmer-wast", ] +[[package]] +name = "wasmer_inkwell" +version = "0.2.0-alpha.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eca826323f39b29a38cd31c8eb33de76945c6193f30a2806c6cde6f6cd42cb1" +dependencies = [ + "either", + "inkwell_internals", + "libc", + "llvm-sys", + "once_cell", + "parking_lot", + "regex", +] + [[package]] name = "wasmparser" version = "0.78.2" diff --git a/lib/c-api/build.rs b/lib/c-api/build.rs index 8d72e5d4777..522b3f8ae53 100644 --- a/lib/c-api/build.rs +++ b/lib/c-api/build.rs @@ -474,6 +474,7 @@ fn exclude_items_from_wasm_c_api(builder: Builder) -> Builder { .exclude_item("wasi_get_unordered_imports") .exclude_item("wasi_get_wasi_version") .exclude_item("wasi_version_t") + .exclude_item("wasm_config_canonicalize_nans") .exclude_item("wasm_config_push_middleware") .exclude_item("wasm_config_set_compiler") .exclude_item("wasm_config_set_engine") diff --git a/lib/c-api/src/wasm_c_api/engine.rs b/lib/c-api/src/wasm_c_api/engine.rs index 72c47864bbe..90b1674318c 100644 --- a/lib/c-api/src/wasm_c_api/engine.rs +++ b/lib/c-api/src/wasm_c_api/engine.rs @@ -105,6 +105,7 @@ pub struct wasm_config_t { compiler: wasmer_compiler_t, #[cfg(feature = "middlewares")] pub(super) middlewares: Vec, + pub(super) nan_canonicalization: bool, pub(super) features: Option>, pub(super) target: Option>, } @@ -482,6 +483,10 @@ pub extern "C" fn wasm_engine_new_with_config( compiler_config.push_middleware(middleware.inner); } + if config.nan_canonicalization { + compiler_config.canonicalize_nans(true); + } + let inner: Arc = match config.engine { wasmer_engine_t::UNIVERSAL => { cfg_if! { diff --git a/lib/c-api/src/wasm_c_api/unstable/engine.rs b/lib/c-api/src/wasm_c_api/unstable/engine.rs index 66c53dfe733..8fbce36a68e 100644 --- a/lib/c-api/src/wasm_c_api/unstable/engine.rs +++ b/lib/c-api/src/wasm_c_api/unstable/engine.rs @@ -95,6 +95,45 @@ pub extern "C" fn wasm_config_set_features( config.features = Some(features); } +/// Updates the configuration to enable NaN canonicalization. +/// +/// This is a Wasmer-specific function. +/// +/// # Example +/// +/// ```rust +/// # use inline_c::assert_c; +/// # fn main() { +/// # (assert_c! { +/// # #include "tests/wasmer_wasm.h" +/// # +/// int main() { +/// // Create the configuration. +/// wasm_config_t* config = wasm_config_new(); +/// +/// // Enable NaN canonicalization. +/// wasm_config_canonicalize_nans(config, true); +/// +/// // Create the engine. +/// wasm_engine_t* engine = wasm_engine_new_with_config(config); +/// +/// // Check we have an engine! +/// assert(engine); +/// +/// // Free everything. +/// wasm_engine_delete(engine); +/// +/// return 0; +/// } +/// # }) +/// # .success(); +/// # } +/// ``` +#[no_mangle] +pub extern "C" fn wasm_config_canonicalize_nans(config: &mut wasm_config_t, enable: bool) { + config.nan_canonicalization = enable; +} + /// Check whether the given compiler is available, i.e. part of this /// compiled library. #[no_mangle] diff --git a/lib/c-api/wasmer_wasm.h b/lib/c-api/wasmer_wasm.h index 13d01c11809..8e0a1f6621e 100644 --- a/lib/c-api/wasmer_wasm.h +++ b/lib/c-api/wasmer_wasm.h @@ -772,6 +772,8 @@ bool wasi_get_unordered_imports(const wasm_store_t *store, enum wasi_version_t wasi_get_wasi_version(const wasm_module_t *module); #endif +void wasm_config_canonicalize_nans(wasm_config_t *config, bool enable); + void wasm_config_push_middleware(wasm_config_t *config, struct wasmer_middleware_t *middleware); #if defined(WASMER_COMPILER_ENABLED) diff --git a/lib/compiler-cranelift/src/config.rs b/lib/compiler-cranelift/src/config.rs index 3f7a2f6a017..397b405931d 100644 --- a/lib/compiler-cranelift/src/config.rs +++ b/lib/compiler-cranelift/src/config.rs @@ -185,6 +185,14 @@ impl CompilerConfig for Cranelift { self.enable_verifier = true; } + fn enable_nan_canonicalization(&mut self) { + self.enable_nan_canonicalization = true; + } + + fn canonicalize_nans(&mut self, enable: bool) { + self.enable_nan_canonicalization = enable; + } + /// Transform it into the compiler fn compiler(self: Box) -> Box { Box::new(CraneliftCompiler::new(*self)) diff --git a/lib/compiler-llvm/Cargo.toml b/lib/compiler-llvm/Cargo.toml index 320c73daeb7..19902376dc6 100644 --- a/lib/compiler-llvm/Cargo.toml +++ b/lib/compiler-llvm/Cargo.toml @@ -25,7 +25,8 @@ rayon = "1.5" loupe = "0.1" [dependencies.inkwell] -version = "=0.1.0-beta.2" +package = "wasmer_inkwell" +version = "0.2.0-alpha.2" default-features = false features = ["llvm11-0", "target-x86", "target-aarch64"] diff --git a/lib/compiler-llvm/src/abi/aarch64_systemv.rs b/lib/compiler-llvm/src/abi/aarch64_systemv.rs index dc54a54127a..cd4f1c3ab23 100644 --- a/lib/compiler-llvm/src/abi/aarch64_systemv.rs +++ b/lib/compiler-llvm/src/abi/aarch64_systemv.rs @@ -4,7 +4,7 @@ use inkwell::{ attributes::{Attribute, AttributeLoc}, builder::Builder, context::Context, - types::{BasicType, FunctionType, StructType}, + types::{BasicMetadataTypeEnum, BasicType, FunctionType, StructType}, values::{BasicValue, BasicValueEnum, CallSiteValue, FunctionValue, IntValue, PointerValue}, AddressSpace, }; @@ -83,34 +83,51 @@ impl Abi for Aarch64SystemV { Ok(match sig.results() { [] => ( - intrinsics - .void_ty - .fn_type(¶m_types.collect::, _>>()?, false), + intrinsics.void_ty.fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), [_] => { let single_value = sig.results()[0]; ( - type_to_llvm(intrinsics, single_value)? - .fn_type(¶m_types.collect::, _>>()?, false), + type_to_llvm(intrinsics, single_value)?.fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ) } [Type::F32, Type::F32] => { let f32_ty = intrinsics.f32_ty.as_basic_type_enum(); ( - context - .struct_type(&[f32_ty, f32_ty], false) - .fn_type(¶m_types.collect::, _>>()?, false), + context.struct_type(&[f32_ty, f32_ty], false).fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ) } [Type::F64, Type::F64] => { let f64_ty = intrinsics.f64_ty.as_basic_type_enum(); ( - context - .struct_type(&[f64_ty, f64_ty], false) - .fn_type(¶m_types.collect::, _>>()?, false), + context.struct_type(&[f64_ty, f64_ty], false).fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ) } @@ -119,7 +136,13 @@ impl Abi for Aarch64SystemV { ( context .struct_type(&[f32_ty, f32_ty, f32_ty], false) - .fn_type(¶m_types.collect::, _>>()?, false), + .fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ) } @@ -128,7 +151,13 @@ impl Abi for Aarch64SystemV { ( context .struct_type(&[f32_ty, f32_ty, f32_ty, f32_ty], false) - .fn_type(¶m_types.collect::, _>>()?, false), + .fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ) } @@ -145,9 +174,13 @@ impl Abi for Aarch64SystemV { .collect::>(); match sig_returns_bitwidths.as_slice() { [32, 32] => ( - intrinsics - .i64_ty - .fn_type(¶m_types.collect::, _>>()?, false), + intrinsics.i64_ty.fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), [32, 64] @@ -157,10 +190,13 @@ impl Abi for Aarch64SystemV { | [64, 32, 32] | [32, 32, 64] | [32, 32, 32, 32] => ( - intrinsics - .i64_ty - .array_type(2) - .fn_type(¶m_types.collect::, _>>()?, false), + intrinsics.i64_ty.array_type(2).fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), _ => { @@ -187,9 +223,13 @@ impl Abi for Aarch64SystemV { attributes.append(&mut vmctx_attributes(1)); ( - intrinsics - .void_ty - .fn_type(¶m_types.collect::, _>>()?, false), + intrinsics.void_ty.fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), attributes, ) } diff --git a/lib/compiler-llvm/src/abi/x86_64_systemv.rs b/lib/compiler-llvm/src/abi/x86_64_systemv.rs index abf48b8ce69..695cdf3bdb5 100644 --- a/lib/compiler-llvm/src/abi/x86_64_systemv.rs +++ b/lib/compiler-llvm/src/abi/x86_64_systemv.rs @@ -4,7 +4,7 @@ use inkwell::{ attributes::{Attribute, AttributeLoc}, builder::Builder, context::Context, - types::{BasicType, FunctionType, StructType}, + types::{BasicMetadataTypeEnum, BasicType, FunctionType, StructType}, values::{ BasicValue, BasicValueEnum, CallSiteValue, FloatValue, FunctionValue, IntValue, PointerValue, VectorValue, @@ -98,16 +98,25 @@ impl Abi for X86_64SystemV { Ok(match sig_returns_bitwidths.as_slice() { [] => ( - intrinsics - .void_ty - .fn_type(¶m_types.collect::, _>>()?, false), + intrinsics.void_ty.fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), [_] => { let single_value = sig.results()[0]; ( - type_to_llvm(intrinsics, single_value)? - .fn_type(¶m_types.collect::, _>>()?, false), + type_to_llvm(intrinsics, single_value)?.fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ) } @@ -119,23 +128,34 @@ impl Abi for X86_64SystemV { .collect::>()?; ( - context - .struct_type(&basic_types, false) - .fn_type(¶m_types.collect::, _>>()?, false), + context.struct_type(&basic_types, false).fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ) } [32, 32] if sig.results()[0] == Type::F32 && sig.results()[1] == Type::F32 => ( - intrinsics - .f32_ty - .vec_type(2) - .fn_type(¶m_types.collect::, _>>()?, false), + intrinsics.f32_ty.vec_type(2).fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), [32, 32] => ( - intrinsics - .i64_ty - .fn_type(¶m_types.collect::, _>>()?, false), + intrinsics.i64_ty.fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), [32, 32, _] if sig.results()[0] == Type::F32 && sig.results()[1] == Type::F32 => ( @@ -147,7 +167,13 @@ impl Abi for X86_64SystemV { ], false, ) - .fn_type(¶m_types.collect::, _>>()?, false), + .fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), [32, 32, _] => ( @@ -159,7 +185,13 @@ impl Abi for X86_64SystemV { ], false, ) - .fn_type(¶m_types.collect::, _>>()?, false), + .fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), [64, 32, 32] if sig.results()[1] == Type::F32 && sig.results()[2] == Type::F32 => ( @@ -171,7 +203,13 @@ impl Abi for X86_64SystemV { ], false, ) - .fn_type(¶m_types.collect::, _>>()?, false), + .fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), [64, 32, 32] => ( @@ -183,7 +221,13 @@ impl Abi for X86_64SystemV { ], false, ) - .fn_type(¶m_types.collect::, _>>()?, false), + .fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), [32, 32, 32, 32] => ( @@ -203,7 +247,13 @@ impl Abi for X86_64SystemV { ], false, ) - .fn_type(¶m_types.collect::, _>>()?, false), + .fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), vmctx_attributes(0), ), _ => { @@ -226,9 +276,13 @@ impl Abi for X86_64SystemV { attributes.append(&mut vmctx_attributes(1)); ( - intrinsics - .void_ty - .fn_type(¶m_types.collect::, _>>()?, false), + intrinsics.void_ty.fn_type( + param_types + .map(|v| v.map(Into::into)) + .collect::, _>>()? + .as_slice(), + false, + ), attributes, ) } diff --git a/lib/compiler-llvm/src/config.rs b/lib/compiler-llvm/src/config.rs index 245a27e92f6..a67f368d023 100644 --- a/lib/compiler-llvm/src/config.rs +++ b/lib/compiler-llvm/src/config.rs @@ -65,15 +65,6 @@ impl LLVM { } } - /// Enable NaN canonicalization. - /// - /// NaN canonicalization is useful when trying to run WebAssembly - /// deterministically across different architectures. - pub fn canonicalize_nans(&mut self, enable: bool) -> &mut Self { - self.enable_nan_canonicalization = enable; - self - } - /// The optimization levels when optimizing the IR. pub fn opt_level(&mut self, opt_level: LLVMOptLevel) -> &mut Self { self.opt_level = opt_level; @@ -209,6 +200,14 @@ impl CompilerConfig for LLVM { self.enable_verifier = true; } + fn enable_nan_canonicalization(&mut self) { + self.enable_nan_canonicalization = true; + } + + fn canonicalize_nans(&mut self, enable: bool) { + self.enable_nan_canonicalization = enable; + } + /// Transform it into the compiler. fn compiler(self: Box) -> Box { Box::new(LLVMCompiler::new(*self)) diff --git a/lib/compiler-llvm/src/trampoline/wasm.rs b/lib/compiler-llvm/src/trampoline/wasm.rs index 20f8d5c975e..eeea64d7edc 100644 --- a/lib/compiler-llvm/src/trampoline/wasm.rs +++ b/lib/compiler-llvm/src/trampoline/wasm.rs @@ -2,6 +2,7 @@ use crate::abi::{get_abi, Abi}; use crate::config::{CompiledKind, LLVM}; use crate::object_file::{load_object_file, CompiledFunction}; use crate::translator::intrinsics::{type_to_llvm, type_to_llvm_ptr, Intrinsics}; +use inkwell::values::BasicMetadataValueEnum; use inkwell::{ attributes::{Attribute, AttributeLoc}, context::Context, @@ -9,7 +10,7 @@ use inkwell::{ passes::PassManager, targets::{FileType, TargetMachine}, types::BasicType, - values::{BasicValue, FunctionValue}, + values::FunctionValue, AddressSpace, DLLStorageClass, }; use std::cmp; @@ -55,11 +56,9 @@ impl FuncTrampoline { .func_type_to_llvm(&self.ctx, &intrinsics, None, ty)?; let trampoline_ty = intrinsics.void_ty.fn_type( &[ - intrinsics.ctx_ptr_ty.as_basic_type_enum(), // vmctx ptr - callee_ty - .ptr_type(AddressSpace::Generic) - .as_basic_type_enum(), // callee function address - intrinsics.i128_ptr_ty.as_basic_type_enum(), // in/out values ptr + intrinsics.ctx_ptr_ty.into(), // vmctx ptr + callee_ty.ptr_type(AddressSpace::Generic).into(), // callee function address + intrinsics.i128_ptr_ty.into(), // in/out values ptr ], false, ); @@ -317,7 +316,8 @@ impl FuncTrampoline { } }; - let mut args_vec = Vec::with_capacity(func_sig.params().len() + 1); + let mut args_vec: Vec = + Vec::with_capacity(func_sig.params().len() + 1); if self.abi.is_sret(func_sig)? { let basic_types: Vec<_> = func_sig @@ -330,7 +330,7 @@ impl FuncTrampoline { args_vec.push(builder.build_alloca(sret_ty, "sret").into()); } - args_vec.push(callee_vmctx_ptr); + args_vec.push(callee_vmctx_ptr.into()); for (i, param_ty) in func_sig.params().iter().enumerate() { let index = intrinsics.i32_ty.const_int(i as _, false); @@ -343,10 +343,10 @@ impl FuncTrampoline { builder.build_pointer_cast(item_pointer, casted_pointer_type, "typed_arg_pointer"); let arg = builder.build_load(typed_item_pointer, "arg"); - args_vec.push(arg); + args_vec.push(arg.into()); } - let call_site = builder.build_call(func_ptr, &args_vec, "call"); + let call_site = builder.build_call(func_ptr, args_vec.as_slice().into(), "call"); for (attr, attr_loc) in func_attrs { call_site.add_attribute(*attr_loc, *attr); } @@ -424,8 +424,8 @@ impl FuncTrampoline { .void_ty .fn_type( &[ - intrinsics.ctx_ptr_ty.as_basic_type_enum(), // vmctx ptr - intrinsics.i128_ptr_ty.as_basic_type_enum(), // in/out values ptr + intrinsics.ctx_ptr_ty.into(), // vmctx ptr + intrinsics.i128_ptr_ty.into(), // in/out values ptr ], false, ) @@ -441,14 +441,7 @@ impl FuncTrampoline { .into_pointer_value(); let values_ptr = builder.build_pointer_cast(values, intrinsics.i128_ptr_ty, ""); - builder.build_call( - callee, - &[ - vmctx.as_basic_value_enum(), - values_ptr.as_basic_value_enum(), - ], - "", - ); + builder.build_call(callee, &[vmctx.into(), values_ptr.into()], ""); if func_sig.results().is_empty() { builder.build_return(None); diff --git a/lib/compiler-llvm/src/translator/code.rs b/lib/compiler-llvm/src/translator/code.rs index b31161e5643..24503fb02ce 100644 --- a/lib/compiler-llvm/src/translator/code.rs +++ b/lib/compiler-llvm/src/translator/code.rs @@ -14,8 +14,8 @@ use inkwell::{ targets::{FileType, TargetMachine}, types::{BasicType, FloatMathType, IntType, PointerType, VectorType}, values::{ - BasicValue, BasicValueEnum, FloatValue, FunctionValue, InstructionOpcode, InstructionValue, - IntValue, PhiValue, PointerValue, VectorValue, + BasicMetadataValueEnum, BasicValue, BasicValueEnum, FloatValue, FunctionValue, + InstructionOpcode, InstructionValue, IntValue, PhiValue, PointerValue, VectorValue, }, AddressSpace, AtomicOrdering, AtomicRMWBinOp, DLLStorageClass, FloatPredicate, IntPredicate, }; @@ -201,6 +201,7 @@ impl FuncTranslator { wasm_module, symbol_registry, abi: &*self.abi, + config, }; fcg.ctx.add_func( func_index, @@ -651,7 +652,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { "", ); self.builder - .build_call(self.intrinsics.throw_trap, &[trap_code], "throw"); + .build_call(self.intrinsics.throw_trap, &[trap_code.into()], "throw"); self.builder.build_unreachable(); self.builder.position_at_end(continue_block); } @@ -698,8 +699,8 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { .build_call( self.intrinsics.expect_i1, &[ - should_trap.as_basic_value_enum(), - self.intrinsics.i1_ty.const_zero().as_basic_value_enum(), + should_trap.into(), + self.intrinsics.i1_ty.const_zero().into(), ], "should_trap_expect", ) @@ -724,7 +725,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { "", ); self.builder - .build_call(self.intrinsics.throw_trap, &[trap_code], "throw"); + .build_call(self.intrinsics.throw_trap, &[trap_code.into()], "throw"); self.builder.build_unreachable(); self.builder.position_at_end(shouldnt_trap_block); } @@ -743,8 +744,8 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { .build_call( self.intrinsics.expect_i1, &[ - should_trap.as_basic_value_enum(), - self.intrinsics.i1_ty.const_zero().as_basic_value_enum(), + should_trap.into(), + self.intrinsics.i1_ty.const_zero().into(), ], "should_trap_expect", ) @@ -764,7 +765,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { self.builder.position_at_end(should_trap_block); self.builder.build_call( self.intrinsics.throw_trap, - &[self.intrinsics.trap_integer_division_by_zero], + &[self.intrinsics.trap_integer_division_by_zero.into()], "throw", ); self.builder.build_unreachable(); @@ -881,6 +882,10 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { value: BasicValueEnum<'ctx>, info: ExtraInfo, ) -> BasicValueEnum<'ctx> { + if !self.config.enable_nan_canonicalization { + return value; + } + if info.has_pending_f32_nan() { if value.get_type().is_vector_type() || value.get_type() == self.intrinsics.i128_ty.as_basic_type_enum() @@ -914,6 +919,10 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { // Replaces any NaN with the canonical QNaN, otherwise leaves the value alone. fn canonicalize_nans(&self, value: BasicValueEnum<'ctx>) -> BasicValueEnum<'ctx> { + if !self.config.enable_nan_canonicalization { + return value; + } + let f_ty = value.get_type(); if f_ty.is_vector_type() { let value = value.into_vector_value(); @@ -944,6 +953,51 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { } } + fn quiet_nan(&self, value: BasicValueEnum<'ctx>) -> BasicValueEnum<'ctx> { + let intrinsic = if value + .get_type() + .eq(&self.intrinsics.f32_ty.as_basic_type_enum()) + { + Some(self.intrinsics.add_f32) + } else if value + .get_type() + .eq(&self.intrinsics.f64_ty.as_basic_type_enum()) + { + Some(self.intrinsics.add_f64) + } else if value + .get_type() + .eq(&self.intrinsics.f32x4_ty.as_basic_type_enum()) + { + Some(self.intrinsics.add_f32x4) + } else if value + .get_type() + .eq(&self.intrinsics.f64x2_ty.as_basic_type_enum()) + { + Some(self.intrinsics.add_f64x2) + } else { + None + }; + + match intrinsic { + Some(intrinsic) => self + .builder + .build_call( + intrinsic, + &[ + value.into(), + value.get_type().const_zero().into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(), + None => value, + } + } + // If this memory access must trap when out of bounds (i.e. it is a memory // access written in the user program as opposed to one used by our VM) // then mark that it can't be delete. @@ -1069,8 +1123,8 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { .build_call( intrinsics.expect_i1, &[ - ptr_in_bounds.as_basic_value_enum(), - intrinsics.i1_ty.const_int(1, true).as_basic_value_enum(), + ptr_in_bounds.into(), + intrinsics.i1_ty.const_int(1, true).into(), ], "ptr_in_bounds_expect", ) @@ -1091,7 +1145,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { builder.position_at_end(not_in_bounds_block); builder.build_call( intrinsics.throw_trap, - &[intrinsics.trap_memory_oob], + &[intrinsics.trap_memory_oob.into()], "throw", ); builder.build_unreachable(); @@ -1132,11 +1186,8 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { .build_call( self.intrinsics.expect_i1, &[ - aligned.as_basic_value_enum(), - self.intrinsics - .i1_ty - .const_int(1, false) - .as_basic_value_enum(), + aligned.into(), + self.intrinsics.i1_ty.const_int(1, false).into(), ], "", ) @@ -1157,7 +1208,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { self.builder.position_at_end(not_aligned_block); self.builder.build_call( self.intrinsics.throw_trap, - &[self.intrinsics.trap_unaligned_atomic], + &[self.intrinsics.trap_unaligned_atomic.into()], "throw", ); self.builder.build_unreachable(); @@ -1322,6 +1373,7 @@ pub struct LLVMFunctionCodeGenerator<'ctx, 'a> { wasm_module: &'a ModuleInfo, symbol_registry: &'a dyn SymbolRegistry, abi: &'a dyn Abi, + config: &'a LLVM, } impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { @@ -1816,7 +1868,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { self.builder.build_call( self.intrinsics.throw_trap, - &[self.intrinsics.trap_unreachable], + &[self.intrinsics.trap_unreachable.into()], "throw", ); self.builder.build_unreachable(); @@ -2173,7 +2225,16 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { } */ - let call_site = self.builder.build_call(func, ¶ms, ""); + let call_site = self.builder.build_call( + func, + params + .iter() + .copied() + .map(Into::into) + .collect::>() + .as_slice(), + "", + ); for (attr, attr_loc) in attrs { call_site.add_attribute(attr_loc, attr); } @@ -2230,11 +2291,8 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { .build_call( self.intrinsics.expect_i1, &[ - index_in_bounds.as_basic_value_enum(), - self.intrinsics - .i1_ty - .const_int(1, false) - .as_basic_value_enum(), + index_in_bounds.into(), + self.intrinsics.i1_ty.const_int(1, false).into(), ], "index_in_bounds_expect", ) @@ -2257,7 +2315,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { self.builder.position_at_end(not_in_bounds_block); self.builder.build_call( self.intrinsics.throw_trap, - &[self.intrinsics.trap_table_access_oob], + &[self.intrinsics.trap_table_access_oob.into()], "throw", ); self.builder.build_unreachable(); @@ -2306,7 +2364,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { self.builder.position_at_end(funcref_is_null_block); self.builder.build_call( self.intrinsics.throw_trap, - &[self.intrinsics.trap_call_indirect_null], + &[self.intrinsics.trap_call_indirect_null.into()], "throw", ); self.builder.build_unreachable(); @@ -2363,11 +2421,8 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { .build_call( self.intrinsics.expect_i1, &[ - initialized_and_sigindices_match.as_basic_value_enum(), - self.intrinsics - .i1_ty - .const_int(1, false) - .as_basic_value_enum(), + initialized_and_sigindices_match.into(), + self.intrinsics.i1_ty.const_int(1, false).into(), ], "initialized_and_sigindices_match_expect", ) @@ -2396,7 +2451,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { "", ); self.builder - .build_call(self.intrinsics.throw_trap, &[trap_code], "throw"); + .build_call(self.intrinsics.throw_trap, &[trap_code.into()], "throw"); self.builder.build_unreachable(); self.builder.position_at_end(continue_block); @@ -2462,9 +2517,16 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { } } */ - let call_site = self - .builder - .build_call(typed_func_ptr, ¶ms, "indirect_call"); + let call_site = self.builder.build_call( + typed_func_ptr, + params + .iter() + .copied() + .map(Into::into) + .collect::>() + .as_slice(), + "indirect_call", + ); for (attr, attr_loc) in llvm_func_attrs { call_site.add_attribute(attr_loc, attr); } @@ -2632,11 +2694,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v2, _) = self.v128_into_i8x16(v2, i2); let res = self .builder - .build_call( - self.intrinsics.sadd_sat_i8x16, - &[v1.as_basic_value_enum(), v2.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.sadd_sat_i8x16, &[v1.into(), v2.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -2649,11 +2707,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v2, _) = self.v128_into_i16x8(v2, i2); let res = self .builder - .build_call( - self.intrinsics.sadd_sat_i16x8, - &[v1.as_basic_value_enum(), v2.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.sadd_sat_i16x8, &[v1.into(), v2.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -2666,11 +2720,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v2, _) = self.v128_into_i8x16(v2, i2); let res = self .builder - .build_call( - self.intrinsics.uadd_sat_i8x16, - &[v1.as_basic_value_enum(), v2.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.uadd_sat_i8x16, &[v1.into(), v2.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -2683,11 +2733,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v2, _) = self.v128_into_i16x8(v2, i2); let res = self .builder - .build_call( - self.intrinsics.uadd_sat_i16x8, - &[v1.as_basic_value_enum(), v2.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.uadd_sat_i16x8, &[v1.into(), v2.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -2740,11 +2786,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v2, _) = self.v128_into_i8x16(v2, i2); let res = self .builder - .build_call( - self.intrinsics.ssub_sat_i8x16, - &[v1.as_basic_value_enum(), v2.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.ssub_sat_i8x16, &[v1.into(), v2.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -2757,11 +2799,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v2, _) = self.v128_into_i16x8(v2, i2); let res = self .builder - .build_call( - self.intrinsics.ssub_sat_i16x8, - &[v1.as_basic_value_enum(), v2.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.ssub_sat_i16x8, &[v1.into(), v2.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -2774,11 +2812,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v2, _) = self.v128_into_i8x16(v2, i2); let res = self .builder - .build_call( - self.intrinsics.usub_sat_i8x16, - &[v1.as_basic_value_enum(), v2.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.usub_sat_i8x16, &[v1.into(), v2.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -2791,11 +2825,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v2, _) = self.v128_into_i16x8(v2, i2); let res = self .builder - .build_call( - self.intrinsics.usub_sat_i16x8, - &[v1.as_basic_value_enum(), v2.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.usub_sat_i16x8, &[v1.into(), v2.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -3606,10 +3636,14 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::I32Clz => { let (input, info) = self.state.pop1_extra()?; let input = self.apply_pending_canonicalization(input, info); - let is_zero_undef = self.intrinsics.i1_zero.as_basic_value_enum(); + let is_zero_undef = self.intrinsics.i1_zero; let res = self .builder - .build_call(self.intrinsics.ctlz_i32, &[input, is_zero_undef], "") + .build_call( + self.intrinsics.ctlz_i32, + &[input.into(), is_zero_undef.into()], + "", + ) .try_as_basic_value() .left() .unwrap(); @@ -3618,10 +3652,14 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::I64Clz => { let (input, info) = self.state.pop1_extra()?; let input = self.apply_pending_canonicalization(input, info); - let is_zero_undef = self.intrinsics.i1_zero.as_basic_value_enum(); + let is_zero_undef = self.intrinsics.i1_zero; let res = self .builder - .build_call(self.intrinsics.ctlz_i64, &[input, is_zero_undef], "") + .build_call( + self.intrinsics.ctlz_i64, + &[input.into(), is_zero_undef.into()], + "", + ) .try_as_basic_value() .left() .unwrap(); @@ -3630,10 +3668,14 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::I32Ctz => { let (input, info) = self.state.pop1_extra()?; let input = self.apply_pending_canonicalization(input, info); - let is_zero_undef = self.intrinsics.i1_zero.as_basic_value_enum(); + let is_zero_undef = self.intrinsics.i1_zero; let res = self .builder - .build_call(self.intrinsics.cttz_i32, &[input, is_zero_undef], "") + .build_call( + self.intrinsics.cttz_i32, + &[input.into(), is_zero_undef.into()], + "", + ) .try_as_basic_value() .left() .unwrap(); @@ -3642,10 +3684,14 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::I64Ctz => { let (input, info) = self.state.pop1_extra()?; let input = self.apply_pending_canonicalization(input, info); - let is_zero_undef = self.intrinsics.i1_zero.as_basic_value_enum(); + let is_zero_undef = self.intrinsics.i1_zero; let res = self .builder - .build_call(self.intrinsics.cttz_i64, &[input, is_zero_undef], "") + .build_call( + self.intrinsics.cttz_i64, + &[input.into(), is_zero_undef.into()], + "", + ) .try_as_basic_value() .left() .unwrap(); @@ -3656,7 +3702,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_i8x16(v, i); let res = self .builder - .build_call(self.intrinsics.ctpop_i8x16, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.ctpop_i8x16, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -3668,7 +3714,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let input = self.apply_pending_canonicalization(input, info); let res = self .builder - .build_call(self.intrinsics.ctpop_i32, &[input], "") + .build_call(self.intrinsics.ctpop_i32, &[input.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -3679,7 +3725,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let input = self.apply_pending_canonicalization(input, info); let res = self .builder - .build_call(self.intrinsics.ctpop_i64, &[input], "") + .build_call(self.intrinsics.ctpop_i64, &[input.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -3960,8 +4006,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { ***************************/ Operator::F32Add => { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; - let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let res = self.builder.build_float_add(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.add_f32, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra( res, (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f32_nan(), @@ -3970,7 +4029,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::F64Add => { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let res = self.builder.build_float_add(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.add_f64, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra( res, (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f64_nan(), @@ -3980,7 +4053,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, i1) = self.v128_into_f32x4(v1, i1); let (v2, i2) = self.v128_into_f32x4(v2, i2); - let res = self.builder.build_float_add(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.add_f32x4, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1_extra( res, @@ -3991,7 +4078,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, i1) = self.v128_into_f64x2(v1, i1); let (v2, i2) = self.v128_into_f64x2(v2, i2); - let res = self.builder.build_float_add(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.add_f64x2, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1_extra( res, @@ -4001,7 +4102,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::F32Sub => { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let res = self.builder.build_float_sub(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.sub_f32, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra( res, (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f32_nan(), @@ -4010,7 +4125,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::F64Sub => { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let res = self.builder.build_float_sub(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.sub_f64, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra( res, (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f64_nan(), @@ -4020,7 +4149,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, i1) = self.v128_into_f32x4(v1, i1); let (v2, i2) = self.v128_into_f32x4(v2, i2); - let res = self.builder.build_float_sub(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.sub_f32x4, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1_extra( res, @@ -4031,7 +4174,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, i1) = self.v128_into_f64x2(v1, i1); let (v2, i2) = self.v128_into_f64x2(v2, i2); - let res = self.builder.build_float_sub(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.sub_f64x2, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1_extra( res, @@ -4041,7 +4198,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::F32Mul => { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let res = self.builder.build_float_mul(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.mul_f32, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra( res, (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f32_nan(), @@ -4050,7 +4221,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::F64Mul => { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let res = self.builder.build_float_mul(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.mul_f64, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra( res, (i1.strip_pending() & i2.strip_pending()) | ExtraInfo::pending_f64_nan(), @@ -4060,7 +4245,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, i1) = self.v128_into_f32x4(v1, i1); let (v2, i2) = self.v128_into_f32x4(v2, i2); - let res = self.builder.build_float_mul(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.mul_f32x4, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1_extra( res, @@ -4071,7 +4270,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, i1) = self.v128_into_f64x2(v1, i1); let (v2, i2) = self.v128_into_f64x2(v2, i2); - let res = self.builder.build_float_mul(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.mul_f64x2, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1_extra( res, @@ -4081,20 +4294,62 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::F32Div => { let (v1, v2) = self.state.pop2()?; let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let res = self.builder.build_float_div(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.div_f32, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra(res, ExtraInfo::pending_f32_nan()); } Operator::F64Div => { let (v1, v2) = self.state.pop2()?; let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let res = self.builder.build_float_div(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.div_f64, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra(res, ExtraInfo::pending_f64_nan()); } Operator::F32x4Div => { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, _) = self.v128_into_f32x4(v1, i1); let (v2, _) = self.v128_into_f32x4(v2, i2); - let res = self.builder.build_float_div(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.div_f32x4, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1_extra(res, ExtraInfo::pending_f32_nan()); } @@ -4102,7 +4357,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; let (v1, _) = self.v128_into_f64x2(v1, i1); let (v2, _) = self.v128_into_f64x2(v2, i2); - let res = self.builder.build_float_div(v1, v2, ""); + let res = self + .builder + .build_call( + self.intrinsics.div_f64x2, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1_extra(res, ExtraInfo::pending_f64_nan()); } @@ -4110,7 +4379,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let input = self.state.pop1()?; let res = self .builder - .build_call(self.intrinsics.sqrt_f32, &[input], "") + .build_call(self.intrinsics.sqrt_f32, &[input.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4120,7 +4389,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let input = self.state.pop1()?; let res = self .builder - .build_call(self.intrinsics.sqrt_f64, &[input], "") + .build_call(self.intrinsics.sqrt_f64, &[input.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4131,7 +4400,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f32x4(v, i); let res = self .builder - .build_call(self.intrinsics.sqrt_f32x4, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.sqrt_f32x4, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4145,7 +4414,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f64x2(v, i); let res = self .builder - .build_call(self.intrinsics.sqrt_f64x2, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.sqrt_f64x2, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4157,230 +4426,332 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::F32Min => { // This implements the same logic as LLVM's @llvm.minimum // intrinsic would, but x86 lowering of that intrinsic - // encounters a fatal error in LLVM 8 and LLVM 9. + // encounters a fatal error in LLVM 11. let (v1, v2) = self.state.pop2()?; - - // To detect min(-0.0, 0.0), we check whether the integer - // representations are equal. There's one other case where that - // can happen: non-canonical NaNs. Here we unconditionally - // canonicalize the NaNs. let v1 = self.canonicalize_nans(v1); let v2 = self.canonicalize_nans(v2); - let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let v1_is_nan = self.builder.build_float_compare( - FloatPredicate::UNO, - v1, - self.intrinsics.f32_zero, - "nan", - ); - let v2_is_not_nan = self.builder.build_float_compare( - FloatPredicate::ORD, - v2, - self.intrinsics.f32_zero, - "notnan", - ); - let v1_repr = self - .builder - .build_bitcast(v1, self.intrinsics.i32_ty, "") - .into_int_value(); - let v2_repr = self + let v1_is_nan = self .builder - .build_bitcast(v2, self.intrinsics.i32_ty, "") + .build_call( + self.intrinsics.cmp_f32, + &[ + v1.into(), + self.intrinsics.f32_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() .into_int_value(); - let repr_ne = - self.builder - .build_int_compare(IntPredicate::NE, v1_repr, v2_repr, ""); - let float_eq = self - .builder - .build_float_compare(FloatPredicate::OEQ, v1, v2, ""); - let min_cmp = self - .builder - .build_float_compare(FloatPredicate::OLT, v1, v2, ""); - let negative_zero = self.intrinsics.f32_ty.const_float(-0.0); - let v2 = self + let v2_is_nan = self .builder - .build_select( - self.builder.build_and( - self.builder.build_and(float_eq, repr_ne, ""), - v2_is_not_nan, - "", - ), - negative_zero, - v2, + .build_call( + self.intrinsics.cmp_f32, + &[ + v2.into(), + self.intrinsics.f32_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], "", ) - .into_float_value(); - let res = self.builder.build_select( - self.builder.build_or(v1_is_nan, min_cmp, ""), - v1, - v2, + .try_as_basic_value() + .left() + .unwrap() + .into_int_value(); + let v1_lt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f32, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_olt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_int_value(); + let v1_gt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f32, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_ogt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_int_value(); + + let res = self.builder.build_select( + v1_is_nan, + self.quiet_nan(v1), + self.builder.build_select( + v2_is_nan, + self.quiet_nan(v2), + self.builder.build_select( + v1_lt_v2, + v1, + self.builder.build_select( + v1_gt_v2, + v2, + self.builder.build_bitcast( + self.builder.build_or( + self.builder + .build_bitcast(v1, self.intrinsics.i32_ty, "") + .into_int_value(), + self.builder + .build_bitcast(v2, self.intrinsics.i32_ty, "") + .into_int_value(), + "", + ), + self.intrinsics.f32_ty, + "", + ), + "", + ), + "", + ), + "", + ), "", ); - // Because inputs were canonicalized, we always produce - // canonical NaN outputs. No pending NaN cleanup. - self.state.push1_extra(res, ExtraInfo::arithmetic_f32()); + + self.state.push1(res); } Operator::F64Min => { // This implements the same logic as LLVM's @llvm.minimum // intrinsic would, but x86 lowering of that intrinsic - // encounters a fatal error in LLVM 8 and LLVM 9. + // encounters a fatal error in LLVM 11. let (v1, v2) = self.state.pop2()?; - - // To detect min(-0.0, 0.0), we check whether the integer - // representations are equal. There's one other case where that - // can happen: non-canonical NaNs. Here we unconditionally - // canonicalize the NaNs. let v1 = self.canonicalize_nans(v1); let v2 = self.canonicalize_nans(v2); - let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let v1_is_nan = self.builder.build_float_compare( - FloatPredicate::UNO, - v1, - self.intrinsics.f64_zero, - "nan", - ); - let v2_is_not_nan = self.builder.build_float_compare( - FloatPredicate::ORD, - v2, - self.intrinsics.f64_zero, - "notnan", - ); - let v1_repr = self + let v1_is_nan = self .builder - .build_bitcast(v1, self.intrinsics.i64_ty, "") + .build_call( + self.intrinsics.cmp_f64, + &[ + v1.into(), + self.intrinsics.f64_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() .into_int_value(); - let v2_repr = self + let v2_is_nan = self .builder - .build_bitcast(v2, self.intrinsics.i64_ty, "") + .build_call( + self.intrinsics.cmp_f64, + &[ + v2.into(), + self.intrinsics.f64_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() .into_int_value(); - let repr_ne = - self.builder - .build_int_compare(IntPredicate::NE, v1_repr, v2_repr, ""); - let float_eq = self - .builder - .build_float_compare(FloatPredicate::OEQ, v1, v2, ""); - let min_cmp = self + let v1_lt_v2 = self .builder - .build_float_compare(FloatPredicate::OLT, v1, v2, ""); - let negative_zero = self.intrinsics.f64_ty.const_float(-0.0); - let v2 = self + .build_call( + self.intrinsics.cmp_f64, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_olt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_int_value(); + let v1_gt_v2 = self .builder - .build_select( - self.builder.build_and( - self.builder.build_and(float_eq, repr_ne, ""), - v2_is_not_nan, - "", - ), - negative_zero, - v2, + .build_call( + self.intrinsics.cmp_f64, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_ogt_md, + self.intrinsics.fp_exception_md, + ], "", ) - .into_float_value(); + .try_as_basic_value() + .left() + .unwrap() + .into_int_value(); + let res = self.builder.build_select( - self.builder.build_or(v1_is_nan, min_cmp, ""), - v1, - v2, + v1_is_nan, + self.quiet_nan(v1), + self.builder.build_select( + v2_is_nan, + self.quiet_nan(v2), + self.builder.build_select( + v1_lt_v2, + v1, + self.builder.build_select( + v1_gt_v2, + v2, + self.builder.build_bitcast( + self.builder.build_or( + self.builder + .build_bitcast(v1, self.intrinsics.i64_ty, "") + .into_int_value(), + self.builder + .build_bitcast(v2, self.intrinsics.i64_ty, "") + .into_int_value(), + "", + ), + self.intrinsics.f64_ty, + "", + ), + "", + ), + "", + ), + "", + ), "", ); - // Because inputs were canonicalized, we always produce - // canonical NaN outputs. No pending NaN cleanup. - self.state.push1_extra(res, ExtraInfo::arithmetic_f64()); + + self.state.push1(res); } Operator::F32x4Min => { - // a) check v1 and v2 for NaN - // b) check v2 for zero - // c) check v1 for sign - // - // We pick v1 iff - // v1 is NaN or - // v2 is not NaN and either - // v1 < v2 or - // v2 is ±zero and v1 is negative. - + // This implements the same logic as LLVM's @llvm.minimum + // intrinsic would, but x86 lowering of that intrinsic + // encounters a fatal error in LLVM 11. let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; - let (v1, i1) = self.v128_into_f32x4(v1, i1); - let (v2, i2) = self.v128_into_f32x4(v2, i2); - let v1 = if !i1.is_arithmetic_f32() { - self.canonicalize_nans(v1.as_basic_value_enum()) - .into_vector_value() - } else { - v1 - }; - let v2 = if !i2.is_arithmetic_f32() { - self.canonicalize_nans(v2.as_basic_value_enum()) - .into_vector_value() - } else { - v2 - }; - - let v1_is_nan = self.builder.build_float_compare( - FloatPredicate::UNO, - v1, - self.intrinsics.f32x4_zero, - "v1nan", - ); - let v2_is_notnan = self.builder.build_float_compare( - FloatPredicate::ORD, - v2, - self.intrinsics.f32x4_zero, - "v2notnan", - ); + let (v1, _) = self.v128_into_f32x4(v1, i1); + let (v2, _) = self.v128_into_f32x4(v2, i2); - let v2_is_zero = self.builder.build_float_compare( - FloatPredicate::OEQ, - v2, - self.intrinsics.f32x4_zero, - "v2zero", - ); + let v1_is_nan = self + .builder + .build_call( + self.intrinsics.cmp_f32x4, + &[ + v1.into(), + self.intrinsics.f32x4_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v2_is_nan = self + .builder + .build_call( + self.intrinsics.cmp_f32x4, + &[ + v2.into(), + self.intrinsics.f32x4_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v1_lt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f32x4, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_olt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v1_gt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f32x4, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_ogt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); - let v1_is_negative = self.builder.build_float_compare( - FloatPredicate::OLT, + let res = self.builder.build_select( + v1_is_nan, + self.quiet_nan(v1.into()).into_vector_value(), self.builder - .build_call( - self.intrinsics.copysign_f32x4, - &[ - VectorType::const_vector( - &[self - .intrinsics - .f32_ty - .const_float(1.0) - .as_basic_value_enum(); - 4], + .build_select( + v2_is_nan, + self.quiet_nan(v2.into()).into_vector_value(), + self.builder + .build_select( + v1_lt_v2, + v1.into(), + self.builder.build_select( + v1_gt_v2, + v2.into(), + self.builder.build_bitcast( + self.builder.build_or( + self.builder + .build_bitcast(v1, self.intrinsics.i32x4_ty, "") + .into_vector_value(), + self.builder + .build_bitcast(v2, self.intrinsics.i32x4_ty, "") + .into_vector_value(), + "", + ), + self.intrinsics.f32x4_ty, + "", + ), + "", + ), + "", ) - .as_basic_value_enum(), - v1.as_basic_value_enum(), - ], + .into_vector_value(), "", ) - .try_as_basic_value() - .left() - .unwrap() .into_vector_value(), - self.intrinsics.f32x4_zero, - "v1neg", - ); - - let v1_lt_v2 = self - .builder - .build_float_compare(FloatPredicate::OLT, v1, v2, ""); - - let pick_v1 = self.builder.build_or( - v1_is_nan, - self.builder.build_and( - v2_is_notnan, - self.builder.build_or( - v1_lt_v2, - self.builder.build_and(v1_is_negative, v2_is_zero, ""), - "", - ), - "", - ), "", ); - let res = self.builder.build_select(pick_v1, v1, v2, ""); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1(res); } @@ -4397,98 +4768,116 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { self.state.push1(res); } Operator::F64x2Min => { - // a) check v1 and v2 for NaN - // b) check v2 for zero - // c) check v1 for sign - // - // We pick v1 iff - // v1 is NaN or - // v2 is not NaN and either - // v1 < v2 or - // v2 is ±zero and v1 is negative. - + // This implements the same logic as LLVM's @llvm.minimum + // intrinsic would, but x86 lowering of that intrinsic + // encounters a fatal error in LLVM 11. let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; - let (v1, i1) = self.v128_into_f64x2(v1, i1); - let (v2, i2) = self.v128_into_f64x2(v2, i2); - let v1 = if !i1.is_arithmetic_f64() { - self.canonicalize_nans(v1.as_basic_value_enum()) - .into_vector_value() - } else { - v1 - }; - let v2 = if !i2.is_arithmetic_f64() { - self.canonicalize_nans(v2.as_basic_value_enum()) - .into_vector_value() - } else { - v2 - }; - - let v1_is_nan = self.builder.build_float_compare( - FloatPredicate::UNO, - v1, - self.intrinsics.f64x2_zero, - "v1nan", - ); - let v2_is_notnan = self.builder.build_float_compare( - FloatPredicate::ORD, - v2, - self.intrinsics.f64x2_zero, - "v2notnan", - ); + let (v1, _) = self.v128_into_f64x2(v1, i1); + let (v2, _) = self.v128_into_f64x2(v2, i2); - let v2_is_zero = self.builder.build_float_compare( - FloatPredicate::OEQ, - v2, - self.intrinsics.f64x2_zero, - "v2zero", - ); + let v1_is_nan = self + .builder + .build_call( + self.intrinsics.cmp_f64x2, + &[ + v1.into(), + self.intrinsics.f64x2_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v2_is_nan = self + .builder + .build_call( + self.intrinsics.cmp_f64x2, + &[ + v2.into(), + self.intrinsics.f64x2_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v1_lt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f64x2, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_olt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v1_gt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f64x2, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_ogt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); - let v1_is_negative = self.builder.build_float_compare( - FloatPredicate::OLT, + let res = self.builder.build_select( + v1_is_nan, + self.quiet_nan(v1.into()).into_vector_value(), self.builder - .build_call( - self.intrinsics.copysign_f64x2, - &[ - VectorType::const_vector( - &[self - .intrinsics - .f64_ty - .const_float(1.0) - .as_basic_value_enum(); - 2], + .build_select( + v2_is_nan, + self.quiet_nan(v2.into()).into_vector_value(), + self.builder + .build_select( + v1_lt_v2, + v1.into(), + self.builder.build_select( + v1_gt_v2, + v2.into(), + self.builder.build_bitcast( + self.builder.build_or( + self.builder + .build_bitcast(v1, self.intrinsics.i64x2_ty, "") + .into_vector_value(), + self.builder + .build_bitcast(v2, self.intrinsics.i64x2_ty, "") + .into_vector_value(), + "", + ), + self.intrinsics.f64x2_ty, + "", + ), + "", + ), + "", ) - .as_basic_value_enum(), - v1.as_basic_value_enum(), - ], + .into_vector_value(), "", ) - .try_as_basic_value() - .left() - .unwrap() .into_vector_value(), - self.intrinsics.f64x2_zero, - "v1neg", - ); - - let v1_lt_v2 = self - .builder - .build_float_compare(FloatPredicate::OLT, v1, v2, ""); - - let pick_v1 = self.builder.build_or( - v1_is_nan, - self.builder.build_and( - v2_is_notnan, - self.builder.build_or( - v1_lt_v2, - self.builder.build_and(v1_is_negative, v2_is_zero, ""), - "", - ), - "", - ), "", ); - let res = self.builder.build_select(pick_v1, v1, v2, ""); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1(res); } @@ -4507,228 +4896,332 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { Operator::F32Max => { // This implements the same logic as LLVM's @llvm.maximum // intrinsic would, but x86 lowering of that intrinsic - // encounters a fatal error in LLVM 8 and LLVM 9. + // encounters a fatal error in LLVM 11. let (v1, v2) = self.state.pop2()?; - - // To detect min(-0.0, 0.0), we check whether the integer - // representations are equal. There's one other case where that - // can happen: non-canonical NaNs. Here we unconditionally - // canonicalize the NaNs. let v1 = self.canonicalize_nans(v1); let v2 = self.canonicalize_nans(v2); - let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let v1_is_nan = self.builder.build_float_compare( - FloatPredicate::UNO, - v1, - self.intrinsics.f32_zero, - "nan", - ); - let v2_is_not_nan = self.builder.build_float_compare( - FloatPredicate::ORD, - v2, - self.intrinsics.f32_zero, - "notnan", - ); - let v1_repr = self + let v1_is_nan = self .builder - .build_bitcast(v1, self.intrinsics.i32_ty, "") + .build_call( + self.intrinsics.cmp_f32, + &[ + v1.into(), + self.intrinsics.f32_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() .into_int_value(); - let v2_repr = self + let v2_is_nan = self .builder - .build_bitcast(v2, self.intrinsics.i32_ty, "") + .build_call( + self.intrinsics.cmp_f32, + &[ + v2.into(), + self.intrinsics.f32_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() .into_int_value(); - let repr_ne = - self.builder - .build_int_compare(IntPredicate::NE, v1_repr, v2_repr, ""); - let float_eq = self - .builder - .build_float_compare(FloatPredicate::OEQ, v1, v2, ""); - let min_cmp = self + let v1_lt_v2 = self .builder - .build_float_compare(FloatPredicate::OGT, v1, v2, ""); - let v2 = self + .build_call( + self.intrinsics.cmp_f32, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_olt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_int_value(); + let v1_gt_v2 = self .builder - .build_select( - self.builder.build_and( - self.builder.build_and(float_eq, repr_ne, ""), - v2_is_not_nan, - "", - ), - self.intrinsics.f32_zero, - v2, + .build_call( + self.intrinsics.cmp_f32, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_ogt_md, + self.intrinsics.fp_exception_md, + ], "", ) - .into_float_value(); + .try_as_basic_value() + .left() + .unwrap() + .into_int_value(); + let res = self.builder.build_select( - self.builder.build_or(v1_is_nan, min_cmp, ""), - v1, - v2, + v1_is_nan, + self.quiet_nan(v1), + self.builder.build_select( + v2_is_nan, + self.quiet_nan(v2), + self.builder.build_select( + v1_lt_v2, + v2, + self.builder.build_select( + v1_gt_v2, + v1, + self.builder.build_bitcast( + self.builder.build_and( + self.builder + .build_bitcast(v1, self.intrinsics.i32_ty, "") + .into_int_value(), + self.builder + .build_bitcast(v2, self.intrinsics.i32_ty, "") + .into_int_value(), + "", + ), + self.intrinsics.f32_ty, + "", + ), + "", + ), + "", + ), + "", + ), "", ); - // Because inputs were canonicalized, we always produce - // canonical NaN outputs. No pending NaN cleanup. - self.state.push1_extra(res, ExtraInfo::arithmetic_f32()); + + self.state.push1(res); } Operator::F64Max => { // This implements the same logic as LLVM's @llvm.maximum // intrinsic would, but x86 lowering of that intrinsic - // encounters a fatal error in LLVM 8 and LLVM 9. + // encounters a fatal error in LLVM 11. let (v1, v2) = self.state.pop2()?; - - // To detect min(-0.0, 0.0), we check whether the integer - // representations are equal. There's one other case where that - // can happen: non-canonical NaNs. Here we unconditionally - // canonicalize the NaNs. let v1 = self.canonicalize_nans(v1); let v2 = self.canonicalize_nans(v2); - let (v1, v2) = (v1.into_float_value(), v2.into_float_value()); - let v1_is_nan = self.builder.build_float_compare( - FloatPredicate::UNO, - v1, - self.intrinsics.f64_zero, - "nan", - ); - let v2_is_not_nan = self.builder.build_float_compare( - FloatPredicate::ORD, - v2, - self.intrinsics.f64_zero, - "notnan", - ); - let v1_repr = self + let v1_is_nan = self .builder - .build_bitcast(v1, self.intrinsics.i64_ty, "") + .build_call( + self.intrinsics.cmp_f64, + &[ + v1.into(), + self.intrinsics.f64_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() .into_int_value(); - let v2_repr = self + let v2_is_nan = self .builder - .build_bitcast(v2, self.intrinsics.i64_ty, "") + .build_call( + self.intrinsics.cmp_f64, + &[ + v2.into(), + self.intrinsics.f64_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_int_value(); + let v1_lt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f64, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_olt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_int_value(); + let v1_gt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f64, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_ogt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() .into_int_value(); - let repr_ne = - self.builder - .build_int_compare(IntPredicate::NE, v1_repr, v2_repr, ""); - let float_eq = self - .builder - .build_float_compare(FloatPredicate::OEQ, v1, v2, ""); - let min_cmp = self - .builder - .build_float_compare(FloatPredicate::OGT, v1, v2, ""); - let v2 = self - .builder - .build_select( - self.builder.build_and( - self.builder.build_and(float_eq, repr_ne, ""), - v2_is_not_nan, + + let res = self.builder.build_select( + v1_is_nan, + self.quiet_nan(v1), + self.builder.build_select( + v2_is_nan, + self.quiet_nan(v2), + self.builder.build_select( + v1_lt_v2, + v2, + self.builder.build_select( + v1_gt_v2, + v1, + self.builder.build_bitcast( + self.builder.build_and( + self.builder + .build_bitcast(v1, self.intrinsics.i64_ty, "") + .into_int_value(), + self.builder + .build_bitcast(v2, self.intrinsics.i64_ty, "") + .into_int_value(), + "", + ), + self.intrinsics.f64_ty, + "", + ), + "", + ), "", ), - self.intrinsics.f64_zero, - v2, "", - ) - .into_float_value(); - let res = self.builder.build_select( - self.builder.build_or(v1_is_nan, min_cmp, ""), - v1, - v2, + ), "", ); - // Because inputs were canonicalized, we always produce - // canonical NaN outputs. No pending NaN cleanup. - self.state.push1_extra(res, ExtraInfo::arithmetic_f64()); + + self.state.push1(res); } Operator::F32x4Max => { - // a) check v1 and v2 for NaN - // b) check v2 for zero - // c) check v1 for sign - // - // We pick v1 iff - // v1 is NaN or - // v2 is not NaN and either - // v1 > v2 or - // v1 is ±zero and v2 is negative. - + // This implements the same logic as LLVM's @llvm.maximum + // intrinsic would, but x86 lowering of that intrinsic + // encounters a fatal error in LLVM 11. let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; - let (v1, i1) = self.v128_into_f32x4(v1, i1); - let (v2, i2) = self.v128_into_f32x4(v2, i2); - let v1 = if !i1.is_arithmetic_f32() { - self.canonicalize_nans(v1.as_basic_value_enum()) - .into_vector_value() - } else { - v1 - }; - let v2 = if !i2.is_arithmetic_f32() { - self.canonicalize_nans(v2.as_basic_value_enum()) - .into_vector_value() - } else { - v2 - }; - - let v1_is_nan = self.builder.build_float_compare( - FloatPredicate::UNO, - v1, - self.intrinsics.f32x4_zero, - "v1nan", - ); - let v2_is_notnan = self.builder.build_float_compare( - FloatPredicate::ORD, - v2, - self.intrinsics.f32x4_zero, - "v2notnan", - ); + let (v1, _) = self.v128_into_f32x4(v1, i1); + let (v2, _) = self.v128_into_f32x4(v2, i2); - let v1_is_zero = self.builder.build_float_compare( - FloatPredicate::OEQ, - v1, - self.intrinsics.f32x4_zero, - "v1zero", - ); + let v1_is_nan = self + .builder + .build_call( + self.intrinsics.cmp_f32x4, + &[ + v1.into(), + self.intrinsics.f32x4_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v2_is_nan = self + .builder + .build_call( + self.intrinsics.cmp_f32x4, + &[ + v2.into(), + self.intrinsics.f32x4_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v1_lt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f32x4, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_olt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v1_gt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f32x4, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_ogt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); - let v2_is_negative = self.builder.build_float_compare( - FloatPredicate::OLT, + let res = self.builder.build_select( + v1_is_nan, + self.quiet_nan(v1.into()).into_vector_value(), self.builder - .build_call( - self.intrinsics.copysign_f32x4, - &[ - VectorType::const_vector( - &[self - .intrinsics - .f32_ty - .const_float(1.0) - .as_basic_value_enum(); - 4], + .build_select( + v2_is_nan, + self.quiet_nan(v2.into()).into_vector_value(), + self.builder + .build_select( + v1_lt_v2, + v2.into(), + self.builder.build_select( + v1_gt_v2, + v1.into(), + self.builder.build_bitcast( + self.builder.build_and( + self.builder + .build_bitcast(v1, self.intrinsics.i32x4_ty, "") + .into_vector_value(), + self.builder + .build_bitcast(v2, self.intrinsics.i32x4_ty, "") + .into_vector_value(), + "", + ), + self.intrinsics.f32x4_ty, + "", + ), + "", + ), + "", ) - .as_basic_value_enum(), - v2.as_basic_value_enum(), - ], + .into_vector_value(), "", ) - .try_as_basic_value() - .left() - .unwrap() .into_vector_value(), - self.intrinsics.f32x4_zero, - "v2neg", - ); - - let v1_gt_v2 = self - .builder - .build_float_compare(FloatPredicate::OGT, v1, v2, ""); - - let pick_v1 = self.builder.build_or( - v1_is_nan, - self.builder.build_and( - v2_is_notnan, - self.builder.build_or( - v1_gt_v2, - self.builder.build_and(v1_is_zero, v2_is_negative, ""), - "", - ), - "", - ), "", ); - let res = self.builder.build_select(pick_v1, v1, v2, ""); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1(res); } @@ -4741,102 +5234,121 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { .builder .build_float_compare(FloatPredicate::OLT, v1, v2, ""); let res = self.builder.build_select(cmp, v2, v1, ""); + let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1(res); } Operator::F64x2Max => { - // a) check v1 and v2 for NaN - // b) check v2 for zero - // c) check v1 for sign - // - // We pick v1 iff - // v1 is NaN or - // v2 is not NaN and either - // v1 > v2 or - // v1 is ±zero and v2 is negative. - + // This implements the same logic as LLVM's @llvm.maximum + // intrinsic would, but x86 lowering of that intrinsic + // encounters a fatal error in LLVM 11. let ((v1, i1), (v2, i2)) = self.state.pop2_extra()?; - let (v1, i1) = self.v128_into_f64x2(v1, i1); - let (v2, i2) = self.v128_into_f64x2(v2, i2); - let v1 = if !i1.is_arithmetic_f64() { - self.canonicalize_nans(v1.as_basic_value_enum()) - .into_vector_value() - } else { - v1 - }; - let v2 = if !i2.is_arithmetic_f64() { - self.canonicalize_nans(v2.as_basic_value_enum()) - .into_vector_value() - } else { - v2 - }; - - let v1_is_nan = self.builder.build_float_compare( - FloatPredicate::UNO, - v1, - self.intrinsics.f64x2_zero, - "v1nan", - ); - let v2_is_notnan = self.builder.build_float_compare( - FloatPredicate::ORD, - v2, - self.intrinsics.f64x2_zero, - "v2notnan", - ); + let (v1, _) = self.v128_into_f64x2(v1, i1); + let (v2, _) = self.v128_into_f64x2(v2, i2); - let v1_is_zero = self.builder.build_float_compare( - FloatPredicate::OEQ, - v1, - self.intrinsics.f64x2_zero, - "v1zero", - ); + let v1_is_nan = self + .builder + .build_call( + self.intrinsics.cmp_f64x2, + &[ + v1.into(), + self.intrinsics.f64x2_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v2_is_nan = self + .builder + .build_call( + self.intrinsics.cmp_f64x2, + &[ + v2.into(), + self.intrinsics.f64x2_zero.into(), + self.intrinsics.fp_uno_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v1_lt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f64x2, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_olt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); + let v1_gt_v2 = self + .builder + .build_call( + self.intrinsics.cmp_f64x2, + &[ + v1.into(), + v2.into(), + self.intrinsics.fp_ogt_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap() + .into_vector_value(); - let v2_is_negative = self.builder.build_float_compare( - FloatPredicate::OLT, + let res = self.builder.build_select( + v1_is_nan, + self.quiet_nan(v1.into()).into_vector_value(), self.builder - .build_call( - self.intrinsics.copysign_f64x2, - &[ - VectorType::const_vector( - &[self - .intrinsics - .f64_ty - .const_float(1.0) - .as_basic_value_enum(); - 2], + .build_select( + v2_is_nan, + self.quiet_nan(v2.into()).into_vector_value(), + self.builder + .build_select( + v1_lt_v2, + v2.into(), + self.builder.build_select( + v1_gt_v2, + v1.into(), + self.builder.build_bitcast( + self.builder.build_and( + self.builder + .build_bitcast(v1, self.intrinsics.i64x2_ty, "") + .into_vector_value(), + self.builder + .build_bitcast(v2, self.intrinsics.i64x2_ty, "") + .into_vector_value(), + "", + ), + self.intrinsics.f64x2_ty, + "", + ), + "", + ), + "", ) - .as_basic_value_enum(), - v2.as_basic_value_enum(), - ], + .into_vector_value(), "", ) - .try_as_basic_value() - .left() - .unwrap() .into_vector_value(), - self.intrinsics.f64x2_zero, - "v2neg", - ); - - let v1_gt_v2 = self - .builder - .build_float_compare(FloatPredicate::OGT, v1, v2, ""); - - let pick_v1 = self.builder.build_or( - v1_is_nan, - self.builder.build_and( - v2_is_notnan, - self.builder.build_or( - v1_gt_v2, - self.builder.build_and(v1_is_zero, v2_is_negative, ""), - "", - ), - "", - ), "", ); - let res = self.builder.build_select(pick_v1, v1, v2, ""); let res = self.builder.build_bitcast(res, self.intrinsics.i128_ty, ""); self.state.push1(res); } @@ -4856,7 +5368,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (input, info) = self.state.pop1_extra()?; let res = self .builder - .build_call(self.intrinsics.ceil_f32, &[input], "") + .build_call(self.intrinsics.ceil_f32, &[input.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4868,7 +5380,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f32x4(v, i); let res = self .builder - .build_call(self.intrinsics.ceil_f32x4, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.ceil_f32x4, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4880,7 +5392,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (input, info) = self.state.pop1_extra()?; let res = self .builder - .build_call(self.intrinsics.ceil_f64, &[input], "") + .build_call(self.intrinsics.ceil_f64, &[input.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4892,7 +5404,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f64x2(v, i); let res = self .builder - .build_call(self.intrinsics.ceil_f64x2, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.ceil_f64x2, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4904,7 +5416,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (input, info) = self.state.pop1_extra()?; let res = self .builder - .build_call(self.intrinsics.floor_f32, &[input], "") + .build_call(self.intrinsics.floor_f32, &[input.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4916,7 +5428,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f32x4(v, i); let res = self .builder - .build_call(self.intrinsics.floor_f32x4, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.floor_f32x4, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4928,7 +5440,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (input, info) = self.state.pop1_extra()?; let res = self .builder - .build_call(self.intrinsics.floor_f64, &[input], "") + .build_call(self.intrinsics.floor_f64, &[input.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4940,7 +5452,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f64x2(v, i); let res = self .builder - .build_call(self.intrinsics.floor_f64x2, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.floor_f64x2, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4952,7 +5464,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, i) = self.state.pop1_extra()?; let res = self .builder - .build_call(self.intrinsics.trunc_f32, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.trunc_f32, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4964,7 +5476,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f32x4(v, i); let res = self .builder - .build_call(self.intrinsics.trunc_f32x4, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.trunc_f32x4, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4976,7 +5488,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, i) = self.state.pop1_extra()?; let res = self .builder - .build_call(self.intrinsics.trunc_f64, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.trunc_f64, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -4988,7 +5500,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f64x2(v, i); let res = self .builder - .build_call(self.intrinsics.trunc_f64x2, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.trunc_f64x2, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5000,11 +5512,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, i) = self.state.pop1_extra()?; let res = self .builder - .build_call( - self.intrinsics.nearbyint_f32, - &[v.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.nearbyint_f32, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5016,11 +5524,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f32x4(v, i); let res = self .builder - .build_call( - self.intrinsics.nearbyint_f32x4, - &[v.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.nearbyint_f32x4, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5032,11 +5536,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, i) = self.state.pop1_extra()?; let res = self .builder - .build_call( - self.intrinsics.nearbyint_f64, - &[v.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.nearbyint_f64, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5048,11 +5548,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let (v, _) = self.v128_into_f64x2(v, i); let res = self .builder - .build_call( - self.intrinsics.nearbyint_f64x2, - &[v.as_basic_value_enum()], - "", - ) + .build_call(self.intrinsics.nearbyint_f64x2, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5065,7 +5561,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let v = self.apply_pending_canonicalization(v, i); let res = self .builder - .build_call(self.intrinsics.fabs_f32, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.fabs_f32, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5078,7 +5574,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let v = self.apply_pending_canonicalization(v, i); let res = self .builder - .build_call(self.intrinsics.fabs_f64, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.fabs_f64, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5094,7 +5590,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let v = self.apply_pending_canonicalization(v, i); let res = self .builder - .build_call(self.intrinsics.fabs_f32x4, &[v.as_basic_value_enum()], "") + .build_call(self.intrinsics.fabs_f32x4, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5111,7 +5607,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let v = self.apply_pending_canonicalization(v, i); let res = self .builder - .build_call(self.intrinsics.fabs_f64x2, &[v], "") + .build_call(self.intrinsics.fabs_f64x2, &[v.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5162,7 +5658,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let sgn = self.apply_pending_canonicalization(sgn, sgn_info); let res = self .builder - .build_call(self.intrinsics.copysign_f32, &[mag, sgn], "") + .build_call(self.intrinsics.copysign_f32, &[mag.into(), sgn.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -5176,7 +5672,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let sgn = self.apply_pending_canonicalization(sgn, sgn_info); let res = self .builder - .build_call(self.intrinsics.copysign_f64, &[mag, sgn], "") + .build_call(self.intrinsics.copysign_f64, &[mag.into(), sgn.into()], "") .try_as_basic_value() .left() .unwrap(); @@ -6846,13 +7342,33 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let v = v.into_float_value(); let res = self .builder - .build_float_trunc(v, self.intrinsics.f32_ty, ""); + .build_call( + self.intrinsics.fptrunc_f64, + &[ + v.into(), + self.intrinsics.fp_rounding_md, + self.intrinsics.fp_exception_md, + ], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra(res, ExtraInfo::pending_f32_nan()); } Operator::F64PromoteF32 => { let v = self.state.pop1()?; let v = v.into_float_value(); - let res = self.builder.build_float_ext(v, self.intrinsics.f64_ty, ""); + let res = self + .builder + .build_call( + self.intrinsics.fpext_f32, + &[v.into(), self.intrinsics.fp_exception_md], + "", + ) + .try_as_basic_value() + .left() + .unwrap(); self.state.push1_extra(res, ExtraInfo::pending_f64_nan()); } Operator::F32ConvertI32S | Operator::F32ConvertI64S => { @@ -10397,12 +10913,9 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let grow = self.builder.build_call( grow_fn_ptr, &[ - vmctx.as_basic_value_enum(), - delta, - self.intrinsics - .i32_ty - .const_int(mem.into(), false) - .as_basic_value_enum(), + vmctx.as_basic_value_enum().into(), + delta.into(), + self.intrinsics.i32_ty.const_int(mem.into(), false).into(), ], "", ); @@ -10414,11 +10927,8 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let size = self.builder.build_call( size_fn_ptr, &[ - vmctx.as_basic_value_enum(), - self.intrinsics - .i32_ty - .const_int(mem.into(), false) - .as_basic_value_enum(), + vmctx.as_basic_value_enum().into(), + self.intrinsics.i32_ty.const_int(mem.into(), false).into(), ], "", ); @@ -10427,31 +10937,26 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { } Operator::MemoryInit { segment, mem } => { let (dest, src, len) = self.state.pop3()?; - let mem = self - .intrinsics - .i32_ty - .const_int(mem.into(), false) - .as_basic_value_enum(); - let segment = self - .intrinsics - .i32_ty - .const_int(segment.into(), false) - .as_basic_value_enum(); + let mem = self.intrinsics.i32_ty.const_int(mem.into(), false); + let segment = self.intrinsics.i32_ty.const_int(segment.into(), false); self.builder.build_call( self.intrinsics.memory_init, - &[vmctx.as_basic_value_enum(), mem, segment, dest, src, len], + &[ + vmctx.as_basic_value_enum().into(), + mem.into(), + segment.into(), + dest.into(), + src.into(), + len.into(), + ], "", ); } Operator::DataDrop { segment } => { - let segment = self - .intrinsics - .i32_ty - .const_int(segment.into(), false) - .as_basic_value_enum(); + let segment = self.intrinsics.i32_ty.const_int(segment.into(), false); self.builder.build_call( self.intrinsics.data_drop, - &[vmctx.as_basic_value_enum(), segment], + &[vmctx.as_basic_value_enum().into(), segment.into()], "", ); } @@ -10468,19 +10973,15 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { }; let (dest_pos, src_pos, len) = self.state.pop3()?; - let src_index = self - .intrinsics - .i32_ty - .const_int(src.into(), false) - .as_basic_value_enum(); + let src_index = self.intrinsics.i32_ty.const_int(src.into(), false); self.builder.build_call( memory_copy, &[ - vmctx.as_basic_value_enum(), - src_index, - dest_pos, - src_pos, - len, + vmctx.as_basic_value_enum().into(), + src_index.into(), + dest_pos.into(), + src_pos.into(), + len.into(), ], "", ); @@ -10496,14 +10997,16 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { }; let (dst, val, len) = self.state.pop3()?; - let mem_index = self - .intrinsics - .i32_ty - .const_int(mem.into(), false) - .as_basic_value_enum(); + let mem_index = self.intrinsics.i32_ty.const_int(mem.into(), false); self.builder.build_call( memory_fill, - &[vmctx.as_basic_value_enum(), mem_index, dst, val, len], + &[ + vmctx.as_basic_value_enum().into(), + mem_index.into(), + dst.into(), + val.into(), + len.into(), + ], "", ); } @@ -10528,22 +11031,21 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { let index = self .intrinsics .i32_ty - .const_int(function_index.into(), false) - .as_basic_value_enum(); + .const_int(function_index.into(), false); let value = self .builder - .build_call(self.intrinsics.func_ref, &[self.ctx.basic(), index], "") + .build_call( + self.intrinsics.func_ref, + &[self.ctx.basic().into(), index.into()], + "", + ) .try_as_basic_value() .left() .unwrap(); self.state.push1(value); } Operator::TableGet { table } => { - let table_index = self - .intrinsics - .i32_ty - .const_int(table.into(), false) - .as_basic_value_enum(); + let table_index = self.intrinsics.i32_ty.const_int(table.into(), false); let elem = self.state.pop1()?; let table_get = if let Some(_) = self .wasm_module @@ -10555,7 +11057,11 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { }; let value = self .builder - .build_call(table_get, &[self.ctx.basic(), table_index, elem], "") + .build_call( + table_get, + &[self.ctx.basic().into(), table_index.into(), elem.into()], + "", + ) .try_as_basic_value() .left() .unwrap(); @@ -10574,11 +11080,7 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { self.state.push1(value); } Operator::TableSet { table } => { - let table_index = self - .intrinsics - .i32_ty - .const_int(table.into(), false) - .as_basic_value_enum(); + let table_index = self.intrinsics.i32_ty.const_int(table.into(), false); let (elem, value) = self.state.pop2()?; let value = self .builder @@ -10593,7 +11095,12 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { }; self.builder.build_call( table_set, - &[self.ctx.basic(), table_index, elem, value], + &[ + self.ctx.basic().into(), + table_index.into(), + elem.into(), + value.into(), + ], "", ); } @@ -10602,65 +11109,61 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { src_table, } => { let (dst, src, len) = self.state.pop3()?; - let dst_table = self - .intrinsics - .i32_ty - .const_int(dst_table as u64, false) - .as_basic_value_enum(); - let src_table = self - .intrinsics - .i32_ty - .const_int(src_table as u64, false) - .as_basic_value_enum(); + let dst_table = self.intrinsics.i32_ty.const_int(dst_table as u64, false); + let src_table = self.intrinsics.i32_ty.const_int(src_table as u64, false); self.builder.build_call( self.intrinsics.table_copy, - &[self.ctx.basic(), dst_table, src_table, dst, src, len], + &[ + self.ctx.basic().into(), + dst_table.into(), + src_table.into(), + dst.into(), + src.into(), + len.into(), + ], "", ); } Operator::TableInit { segment, table } => { let (dst, src, len) = self.state.pop3()?; - let segment = self - .intrinsics - .i32_ty - .const_int(segment as u64, false) - .as_basic_value_enum(); - let table = self - .intrinsics - .i32_ty - .const_int(table as u64, false) - .as_basic_value_enum(); + let segment = self.intrinsics.i32_ty.const_int(segment as u64, false); + let table = self.intrinsics.i32_ty.const_int(table as u64, false); self.builder.build_call( self.intrinsics.table_init, - &[self.ctx.basic(), table, segment, dst, src, len], + &[ + self.ctx.basic().into(), + table.into(), + segment.into(), + dst.into(), + src.into(), + len.into(), + ], "", ); } Operator::ElemDrop { segment } => { - let segment = self - .intrinsics - .i32_ty - .const_int(segment as u64, false) - .as_basic_value_enum(); + let segment = self.intrinsics.i32_ty.const_int(segment as u64, false); self.builder.build_call( self.intrinsics.elem_drop, - &[self.ctx.basic(), segment], + &[self.ctx.basic().into(), segment.into()], "", ); } Operator::TableFill { table } => { - let table = self - .intrinsics - .i32_ty - .const_int(table as u64, false) - .as_basic_value_enum(); + let table = self.intrinsics.i32_ty.const_int(table as u64, false); let (start, elem, len) = self.state.pop3()?; let elem = self .builder .build_bitcast(elem, self.intrinsics.anyref_ty, ""); self.builder.build_call( self.intrinsics.table_fill, - &[self.ctx.basic(), table, start, elem, len], + &[ + self.ctx.basic().into(), + table.into(), + start.into(), + elem.into(), + len.into(), + ], "", ); } @@ -10677,16 +11180,17 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { } else { (self.intrinsics.imported_table_grow, table) }; - let table_index = self - .intrinsics - .i32_ty - .const_int(table_index as u64, false) - .as_basic_value_enum(); + let table_index = self.intrinsics.i32_ty.const_int(table_index as u64, false); let size = self .builder .build_call( table_grow, - &[self.ctx.basic(), elem, delta, table_index], + &[ + self.ctx.basic().into(), + elem.into(), + delta.into(), + table_index.into(), + ], "", ) .try_as_basic_value() @@ -10703,14 +11207,14 @@ impl<'ctx, 'a> LLVMFunctionCodeGenerator<'ctx, 'a> { } else { (self.intrinsics.imported_table_size, table) }; - let table_index = self - .intrinsics - .i32_ty - .const_int(table_index as u64, false) - .as_basic_value_enum(); + let table_index = self.intrinsics.i32_ty.const_int(table_index as u64, false); let size = self .builder - .build_call(table_size, &[self.ctx.basic(), table_index], "") + .build_call( + table_size, + &[self.ctx.basic().into(), table_index.into()], + "", + ) .try_as_basic_value() .left() .unwrap(); diff --git a/lib/compiler-llvm/src/translator/intrinsics.rs b/lib/compiler-llvm/src/translator/intrinsics.rs index 4773b3b0c92..ee6753683dd 100644 --- a/lib/compiler-llvm/src/translator/intrinsics.rs +++ b/lib/compiler-llvm/src/translator/intrinsics.rs @@ -5,13 +5,15 @@ //! [llvm-intrinsics]: https://llvm.org/docs/LangRef.html#intrinsic-functions use crate::abi::Abi; +use inkwell::values::BasicMetadataValueEnum; use inkwell::{ attributes::{Attribute, AttributeLoc}, builder::Builder, context::Context, module::{Linkage, Module}, types::{ - BasicType, BasicTypeEnum, FloatType, IntType, PointerType, StructType, VectorType, VoidType, + BasicMetadataTypeEnum, BasicType, BasicTypeEnum, FloatType, IntType, PointerType, + StructType, VectorType, VoidType, }, values::{ BasicValue, BasicValueEnum, FloatValue, FunctionValue, InstructionValue, IntValue, @@ -71,11 +73,42 @@ pub struct Intrinsics<'ctx> { pub ctpop_i64: FunctionValue<'ctx>, pub ctpop_i8x16: FunctionValue<'ctx>, + pub fp_rounding_md: BasicMetadataValueEnum<'ctx>, + pub fp_exception_md: BasicMetadataValueEnum<'ctx>, + pub fp_ogt_md: BasicMetadataValueEnum<'ctx>, + pub fp_olt_md: BasicMetadataValueEnum<'ctx>, + pub fp_uno_md: BasicMetadataValueEnum<'ctx>, + + pub add_f32: FunctionValue<'ctx>, + pub add_f64: FunctionValue<'ctx>, + pub add_f32x4: FunctionValue<'ctx>, + pub add_f64x2: FunctionValue<'ctx>, + + pub sub_f32: FunctionValue<'ctx>, + pub sub_f64: FunctionValue<'ctx>, + pub sub_f32x4: FunctionValue<'ctx>, + pub sub_f64x2: FunctionValue<'ctx>, + + pub mul_f32: FunctionValue<'ctx>, + pub mul_f64: FunctionValue<'ctx>, + pub mul_f32x4: FunctionValue<'ctx>, + pub mul_f64x2: FunctionValue<'ctx>, + + pub div_f32: FunctionValue<'ctx>, + pub div_f64: FunctionValue<'ctx>, + pub div_f32x4: FunctionValue<'ctx>, + pub div_f64x2: FunctionValue<'ctx>, + pub sqrt_f32: FunctionValue<'ctx>, pub sqrt_f64: FunctionValue<'ctx>, pub sqrt_f32x4: FunctionValue<'ctx>, pub sqrt_f64x2: FunctionValue<'ctx>, + pub cmp_f32: FunctionValue<'ctx>, + pub cmp_f64: FunctionValue<'ctx>, + pub cmp_f32x4: FunctionValue<'ctx>, + pub cmp_f64x2: FunctionValue<'ctx>, + pub ceil_f32: FunctionValue<'ctx>, pub ceil_f64: FunctionValue<'ctx>, pub ceil_f32x4: FunctionValue<'ctx>, @@ -91,6 +124,9 @@ pub struct Intrinsics<'ctx> { pub trunc_f32x4: FunctionValue<'ctx>, pub trunc_f64x2: FunctionValue<'ctx>, + pub fpext_f32: FunctionValue<'ctx>, + pub fptrunc_f64: FunctionValue<'ctx>, + pub nearbyint_f32: FunctionValue<'ctx>, pub nearbyint_f64: FunctionValue<'ctx>, pub nearbyint_f32x4: FunctionValue<'ctx>, @@ -238,6 +274,8 @@ impl<'ctx> Intrinsics<'ctx> { let f32_ty = context.f32_type(); let f64_ty = context.f64_type(); + let i1x4_ty = i1_ty.vec_type(4); + let i1x2_ty = i1_ty.vec_type(2); let i1x128_ty = i1_ty.vec_type(128); let i8x16_ty = i8_ty.vec_type(16); let i16x8_ty = i16_ty.vec_type(8); @@ -283,55 +321,141 @@ impl<'ctx> Intrinsics<'ctx> { i32_ty.const_int(15, false), ]; - let i1_ty_basic = i1_ty.as_basic_type_enum(); - let i32_ty_basic = i32_ty.as_basic_type_enum(); - let i64_ty_basic = i64_ty.as_basic_type_enum(); - let f32_ty_basic = f32_ty.as_basic_type_enum(); - let f64_ty_basic = f64_ty.as_basic_type_enum(); - let i8x16_ty_basic = i8x16_ty.as_basic_type_enum(); - let i16x8_ty_basic = i16x8_ty.as_basic_type_enum(); - let f32x4_ty_basic = f32x4_ty.as_basic_type_enum(); - let f64x2_ty_basic = f64x2_ty.as_basic_type_enum(); + let md_ty = context.metadata_type(); + let i8_ptr_ty_basic = i8_ptr_ty.as_basic_type_enum(); + let i1_ty_basic_md: BasicMetadataTypeEnum = i1_ty.into(); + let i32_ty_basic_md: BasicMetadataTypeEnum = i32_ty.into(); + let i64_ty_basic_md: BasicMetadataTypeEnum = i64_ty.into(); + let f32_ty_basic_md: BasicMetadataTypeEnum = f32_ty.into(); + let f64_ty_basic_md: BasicMetadataTypeEnum = f64_ty.into(); + let i8x16_ty_basic_md: BasicMetadataTypeEnum = i8x16_ty.into(); + let i16x8_ty_basic_md: BasicMetadataTypeEnum = i16x8_ty.into(); + let f32x4_ty_basic_md: BasicMetadataTypeEnum = f32x4_ty.into(); + let f64x2_ty_basic_md: BasicMetadataTypeEnum = f64x2_ty.into(); + let md_ty_basic_md: BasicMetadataTypeEnum = md_ty.into(); + let ctx_ty = i8_ty; let ctx_ptr_ty = ctx_ty.ptr_type(AddressSpace::Generic); + let ctx_ptr_ty_basic = ctx_ptr_ty.as_basic_type_enum(); + let ctx_ptr_ty_basic_md: BasicMetadataTypeEnum = ctx_ptr_ty.into(); let sigindex_ty = i32_ty; let anyfunc_ty = context.struct_type( - &[ - i8_ptr_ty_basic, - sigindex_ty.as_basic_type_enum(), - ctx_ptr_ty.as_basic_type_enum(), - ], + &[i8_ptr_ty_basic, sigindex_ty.into(), ctx_ptr_ty_basic], false, ); let funcref_ty = anyfunc_ty.ptr_type(AddressSpace::Generic); let externref_ty = funcref_ty; let anyref_ty = i8_ptr_ty; + let anyref_ty_basic_md: BasicMetadataTypeEnum = anyref_ty.into(); + + let ret_i8x16_take_i8x16 = i8x16_ty.fn_type(&[i8x16_ty_basic_md], false); + let ret_i8x16_take_i8x16_i8x16 = + i8x16_ty.fn_type(&[i8x16_ty_basic_md, i8x16_ty_basic_md], false); + let ret_i16x8_take_i16x8_i16x8 = + i16x8_ty.fn_type(&[i16x8_ty_basic_md, i16x8_ty_basic_md], false); + + let ret_i32_take_i32_i1 = i32_ty.fn_type(&[i32_ty_basic_md, i1_ty_basic_md], false); + let ret_i64_take_i64_i1 = i64_ty.fn_type(&[i64_ty_basic_md, i1_ty_basic_md], false); - let ret_i8x16_take_i8x16 = i8x16_ty.fn_type(&[i8x16_ty_basic], false); - let ret_i8x16_take_i8x16_i8x16 = i8x16_ty.fn_type(&[i8x16_ty_basic, i8x16_ty_basic], false); - let ret_i16x8_take_i16x8_i16x8 = i16x8_ty.fn_type(&[i16x8_ty_basic, i16x8_ty_basic], false); + let ret_i32_take_i32 = i32_ty.fn_type(&[i32_ty_basic_md], false); + let ret_i64_take_i64 = i64_ty.fn_type(&[i64_ty_basic_md], false); - let ret_i32_take_i32_i1 = i32_ty.fn_type(&[i32_ty_basic, i1_ty_basic], false); - let ret_i64_take_i64_i1 = i64_ty.fn_type(&[i64_ty_basic, i1_ty_basic], false); + let ret_f32_take_f32 = f32_ty.fn_type(&[f32_ty_basic_md], false); + let ret_f64_take_f64 = f64_ty.fn_type(&[f64_ty_basic_md], false); + let ret_f32x4_take_f32x4 = f32x4_ty.fn_type(&[f32x4_ty_basic_md], false); + let ret_f64x2_take_f64x2 = f64x2_ty.fn_type(&[f64x2_ty_basic_md], false); - let ret_i32_take_i32 = i32_ty.fn_type(&[i32_ty_basic], false); - let ret_i64_take_i64 = i64_ty.fn_type(&[i64_ty_basic], false); + let ret_f32_take_f32_f32 = f32_ty.fn_type(&[f32_ty_basic_md, f32_ty_basic_md], false); + let ret_f64_take_f64_f64 = f64_ty.fn_type(&[f64_ty_basic_md, f64_ty_basic_md], false); + let ret_f32x4_take_f32x4_f32x4 = + f32x4_ty.fn_type(&[f32x4_ty_basic_md, f32x4_ty_basic_md], false); + let ret_f64x2_take_f64x2_f64x2 = + f64x2_ty.fn_type(&[f64x2_ty_basic_md, f64x2_ty_basic_md], false); - let ret_f32_take_f32 = f32_ty.fn_type(&[f32_ty_basic], false); - let ret_f64_take_f64 = f64_ty.fn_type(&[f64_ty_basic], false); - let ret_f32x4_take_f32x4 = f32x4_ty.fn_type(&[f32x4_ty_basic], false); - let ret_f64x2_take_f64x2 = f64x2_ty.fn_type(&[f64x2_ty_basic], false); + let ret_f64_take_f32_md = f64_ty.fn_type(&[f32_ty_basic_md, md_ty_basic_md], false); + let ret_f32_take_f64_md_md = + f32_ty.fn_type(&[f64_ty_basic_md, md_ty_basic_md, md_ty_basic_md], false); + + let ret_i1_take_i1_i1 = i1_ty.fn_type(&[i1_ty_basic_md, i1_ty_basic_md], false); + + let ret_i1_take_f32_f32_md_md = i1_ty.fn_type( + &[ + f32_ty_basic_md, + f32_ty_basic_md, + md_ty_basic_md, + md_ty_basic_md, + ], + false, + ); + let ret_i1_take_f64_f64_md_md = i1_ty.fn_type( + &[ + f64_ty_basic_md, + f64_ty_basic_md, + md_ty_basic_md, + md_ty_basic_md, + ], + false, + ); + let ret_i1x4_take_f32x4_f32x4_md_md = i1x4_ty.fn_type( + &[ + f32x4_ty_basic_md, + f32x4_ty_basic_md, + md_ty_basic_md, + md_ty_basic_md, + ], + false, + ); + let ret_i1x2_take_f64x2_f64x2_md_md = i1x2_ty.fn_type( + &[ + f64x2_ty_basic_md, + f64x2_ty_basic_md, + md_ty_basic_md, + md_ty_basic_md, + ], + false, + ); - let ret_f32_take_f32_f32 = f32_ty.fn_type(&[f32_ty_basic, f32_ty_basic], false); - let ret_f64_take_f64_f64 = f64_ty.fn_type(&[f64_ty_basic, f64_ty_basic], false); - let ret_f32x4_take_f32x4_f32x4 = f32x4_ty.fn_type(&[f32x4_ty_basic, f32x4_ty_basic], false); - let ret_f64x2_take_f64x2_f64x2 = f64x2_ty.fn_type(&[f64x2_ty_basic, f64x2_ty_basic], false); + let ret_f32_take_f32_f32_md_md = f32_ty.fn_type( + &[ + f32_ty_basic_md, + f32_ty_basic_md, + md_ty_basic_md, + md_ty_basic_md, + ], + false, + ); + let ret_f64_take_f64_f64_md_md = f64_ty.fn_type( + &[ + f64_ty_basic_md, + f64_ty_basic_md, + md_ty_basic_md, + md_ty_basic_md, + ], + false, + ); + let ret_f32x4_take_f32x4_f32x4_md_md = f32x4_ty.fn_type( + &[ + f32x4_ty_basic_md, + f32x4_ty_basic_md, + md_ty_basic_md, + md_ty_basic_md, + ], + false, + ); + let ret_f64x2_take_f64x2_f64x2_md_md = f64x2_ty.fn_type( + &[ + f64x2_ty_basic_md, + f64x2_ty_basic_md, + md_ty_basic_md, + md_ty_basic_md, + ], + false, + ); - let ret_i1_take_i1_i1 = i1_ty.fn_type(&[i1_ty_basic, i1_ty_basic], false); let intrinsics = Self { ctlz_i32: module.add_function("llvm.ctlz.i32", ret_i32_take_i32_i1, None), ctlz_i64: module.add_function("llvm.ctlz.i64", ret_i64_take_i64_i1, None), @@ -343,6 +467,13 @@ impl<'ctx> Intrinsics<'ctx> { ctpop_i64: module.add_function("llvm.ctpop.i64", ret_i64_take_i64, None), ctpop_i8x16: module.add_function("llvm.ctpop.v16i8", ret_i8x16_take_i8x16, None), + fp_rounding_md: context.metadata_string("round.tonearest").into(), + fp_exception_md: context.metadata_string("fpexcept.strict").into(), + + fp_ogt_md: context.metadata_string("ogt").into(), + fp_olt_md: context.metadata_string("olt").into(), + fp_uno_md: context.metadata_string("uno").into(), + sqrt_f32: module.add_function("llvm.sqrt.f32", ret_f32_take_f32, None), sqrt_f64: module.add_function("llvm.sqrt.f64", ret_f64_take_f64, None), sqrt_f32x4: module.add_function("llvm.sqrt.v4f32", ret_f32x4_take_f32x4, None), @@ -376,6 +507,122 @@ impl<'ctx> Intrinsics<'ctx> { None, ), + add_f32: module.add_function( + "llvm.experimental.constrained.fadd.f32", + ret_f32_take_f32_f32_md_md, + None, + ), + add_f64: module.add_function( + "llvm.experimental.constrained.fadd.f64", + ret_f64_take_f64_f64_md_md, + None, + ), + add_f32x4: module.add_function( + "llvm.experimental.constrained.fadd.v4f32", + ret_f32x4_take_f32x4_f32x4_md_md, + None, + ), + add_f64x2: module.add_function( + "llvm.experimental.constrained.fadd.v2f64", + ret_f64x2_take_f64x2_f64x2_md_md, + None, + ), + + sub_f32: module.add_function( + "llvm.experimental.constrained.fsub.f32", + ret_f32_take_f32_f32_md_md, + None, + ), + sub_f64: module.add_function( + "llvm.experimental.constrained.fsub.f64", + ret_f64_take_f64_f64_md_md, + None, + ), + sub_f32x4: module.add_function( + "llvm.experimental.constrained.fsub.v4f32", + ret_f32x4_take_f32x4_f32x4_md_md, + None, + ), + sub_f64x2: module.add_function( + "llvm.experimental.constrained.fsub.v2f64", + ret_f64x2_take_f64x2_f64x2_md_md, + None, + ), + + mul_f32: module.add_function( + "llvm.experimental.constrained.fmul.f32", + ret_f32_take_f32_f32_md_md, + None, + ), + mul_f64: module.add_function( + "llvm.experimental.constrained.fmul.f64", + ret_f64_take_f64_f64_md_md, + None, + ), + mul_f32x4: module.add_function( + "llvm.experimental.constrained.fmul.v4f32", + ret_f32x4_take_f32x4_f32x4_md_md, + None, + ), + mul_f64x2: module.add_function( + "llvm.experimental.constrained.fmul.v2f64", + ret_f64x2_take_f64x2_f64x2_md_md, + None, + ), + + div_f32: module.add_function( + "llvm.experimental.constrained.fdiv.f32", + ret_f32_take_f32_f32_md_md, + None, + ), + div_f64: module.add_function( + "llvm.experimental.constrained.fdiv.f64", + ret_f64_take_f64_f64_md_md, + None, + ), + div_f32x4: module.add_function( + "llvm.experimental.constrained.fdiv.v4f32", + ret_f32x4_take_f32x4_f32x4_md_md, + None, + ), + div_f64x2: module.add_function( + "llvm.experimental.constrained.fdiv.v2f64", + ret_f64x2_take_f64x2_f64x2_md_md, + None, + ), + + cmp_f32: module.add_function( + "llvm.experimental.constrained.fcmp.f32", + ret_i1_take_f32_f32_md_md, + None, + ), + cmp_f64: module.add_function( + "llvm.experimental.constrained.fcmp.f64", + ret_i1_take_f64_f64_md_md, + None, + ), + cmp_f32x4: module.add_function( + "llvm.experimental.constrained.fcmp.v4f32", + ret_i1x4_take_f32x4_f32x4_md_md, + None, + ), + cmp_f64x2: module.add_function( + "llvm.experimental.constrained.fcmp.v2f64", + ret_i1x2_take_f64x2_f64x2_md_md, + None, + ), + + fpext_f32: module.add_function( + "llvm.experimental.constrained.fpext.f64.f32", + ret_f64_take_f32_md, + None, + ), + fptrunc_f64: module.add_function( + "llvm.experimental.constrained.fptrunc.f32.f64", + ret_f32_take_f64_md_md, + None, + ), + fabs_f32: module.add_function("llvm.fabs.f32", ret_f32_take_f32, None), fabs_f64: module.add_function("llvm.fabs.f64", ret_f64_take_f64, None), fabs_f32x4: module.add_function("llvm.fabs.v4f32", ret_f32x4_take_f32x4, None), @@ -526,8 +773,8 @@ impl<'ctx> Intrinsics<'ctx> { "llvm.experimental.stackmap", void_ty.fn_type( &[ - i64_ty_basic, /* id */ - i32_ty_basic, /* numShadowBytes */ + i64_ty_basic_md, /* id */ + i32_ty_basic_md, /* numShadowBytes */ ], true, ), @@ -539,12 +786,12 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_table_copy", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -554,12 +801,12 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_table_init", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -569,11 +816,11 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_table_fill", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - anyref_ty.as_basic_type_enum(), - i32_ty_basic, + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + anyref_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -581,18 +828,18 @@ impl<'ctx> Intrinsics<'ctx> { ), table_size: module.add_function( "wasmer_vm_table_size", - i32_ty.fn_type(&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic], false), + i32_ty.fn_type(&[ctx_ptr_ty_basic_md, i32_ty_basic_md], false), None, ), imported_table_size: module.add_function( "wasmer_vm_imported_table_size", - i32_ty.fn_type(&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic], false), + i32_ty.fn_type(&[ctx_ptr_ty_basic_md, i32_ty_basic_md], false), None, ), table_get: module.add_function( "wasmer_vm_table_get", anyref_ty.fn_type( - &[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic, i32_ty_basic], + &[ctx_ptr_ty_basic_md, i32_ty_basic_md, i32_ty_basic_md], false, ), None, @@ -600,7 +847,7 @@ impl<'ctx> Intrinsics<'ctx> { imported_table_get: module.add_function( "wasmer_vm_imported_table_get", anyref_ty.fn_type( - &[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic, i32_ty_basic], + &[ctx_ptr_ty_basic_md, i32_ty_basic_md, i32_ty_basic_md], false, ), None, @@ -609,10 +856,10 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_table_set", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - anyref_ty.as_basic_type_enum(), + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + anyref_ty_basic_md, ], false, ), @@ -622,10 +869,10 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_imported_table_set", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - anyref_ty.as_basic_type_enum(), + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + anyref_ty_basic_md, ], false, ), @@ -635,10 +882,10 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_table_grow", i32_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - anyref_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, + ctx_ptr_ty_basic_md, + anyref_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -648,10 +895,10 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_imported_table_grow", i32_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - anyref_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, + ctx_ptr_ty_basic_md, + anyref_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -661,12 +908,12 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_memory32_init", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -676,11 +923,11 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_memory32_copy", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -690,11 +937,11 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_imported_memory32_copy", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -704,11 +951,11 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_memory32_fill", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -718,11 +965,11 @@ impl<'ctx> Intrinsics<'ctx> { "wasmer_vm_imported_memory32_fill", void_ty.fn_type( &[ - ctx_ptr_ty.as_basic_type_enum(), - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, - i32_ty_basic, + ctx_ptr_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, + i32_ty_basic_md, ], false, ), @@ -730,22 +977,22 @@ impl<'ctx> Intrinsics<'ctx> { ), data_drop: module.add_function( "wasmer_vm_data_drop", - void_ty.fn_type(&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic], false), + void_ty.fn_type(&[ctx_ptr_ty_basic_md, i32_ty_basic_md], false), None, ), func_ref: module.add_function( "wasmer_vm_func_ref", - funcref_ty.fn_type(&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic], false), + funcref_ty.fn_type(&[ctx_ptr_ty_basic_md, i32_ty_basic_md], false), None, ), elem_drop: module.add_function( "wasmer_vm_elem_drop", - void_ty.fn_type(&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic], false), + void_ty.fn_type(&[ctx_ptr_ty_basic_md, i32_ty_basic_md], false), None, ), throw_trap: module.add_function( "wasmer_vm_raise_trap", - void_ty.fn_type(&[i32_ty_basic], false), + void_ty.fn_type(&[i32_ty_basic_md], false), None, ), @@ -757,28 +1004,28 @@ impl<'ctx> Intrinsics<'ctx> { // TODO: this i64 is actually a rust usize vmmemory_definition_ptr_ty: context - .struct_type(&[i8_ptr_ty_basic, i32_ty_basic], false) + .struct_type(&[i8_ptr_ty_basic, i32_ty.into()], false) .ptr_type(AddressSpace::Generic), vmmemory_definition_base_element: 0, vmmemory_definition_current_length_element: 1, memory32_grow_ptr_ty: i32_ty .fn_type( - &[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic, i32_ty_basic], + &[ctx_ptr_ty_basic_md, i32_ty_basic_md, i32_ty_basic_md], false, ) .ptr_type(AddressSpace::Generic), imported_memory32_grow_ptr_ty: i32_ty .fn_type( - &[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic, i32_ty_basic], + &[ctx_ptr_ty_basic_md, i32_ty_basic_md, i32_ty_basic_md], false, ) .ptr_type(AddressSpace::Generic), memory32_size_ptr_ty: i32_ty - .fn_type(&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic], false) + .fn_type(&[ctx_ptr_ty_basic_md, i32_ty_basic_md], false) .ptr_type(AddressSpace::Generic), imported_memory32_size_ptr_ty: i32_ty - .fn_type(&[ctx_ptr_ty.as_basic_type_enum(), i32_ty_basic], false) + .fn_type(&[ctx_ptr_ty_basic_md, i32_ty_basic_md], false) .ptr_type(AddressSpace::Generic), ctx_ptr_ty, diff --git a/lib/compiler-singlepass/src/config.rs b/lib/compiler-singlepass/src/config.rs index d788478a9b9..78496e83d40 100644 --- a/lib/compiler-singlepass/src/config.rs +++ b/lib/compiler-singlepass/src/config.rs @@ -38,10 +38,10 @@ impl Singlepass { self } - /// Enable NaN canonicalization. - /// - /// NaN canonicalization is useful when trying to run WebAssembly - /// deterministically across different architectures. + fn enable_nan_canonicalization(&mut self) { + self.enable_nan_canonicalization = true; + } + pub fn canonicalize_nans(&mut self, enable: bool) -> &mut Self { self.enable_nan_canonicalization = enable; self diff --git a/lib/compiler/src/compiler.rs b/lib/compiler/src/compiler.rs index 9ae25798f64..e36641ea400 100644 --- a/lib/compiler/src/compiler.rs +++ b/lib/compiler/src/compiler.rs @@ -37,6 +37,25 @@ pub trait CompilerConfig { // in case they create an IR that they can verify. } + /// Enable NaN canonicalization. + /// + /// NaN canonicalization is useful when trying to run WebAssembly + /// deterministically across different architectures. + #[deprecated(note = "Please use the canonicalize_nans instead")] + fn enable_nan_canonicalization(&mut self) { + // By default we do nothing, each backend will need to customize this + // in case they create an IR that they can verify. + } + + /// Enable NaN canonicalization. + /// + /// NaN canonicalization is useful when trying to run WebAssembly + /// deterministically across different architectures. + fn canonicalize_nans(&mut self, _enable: bool) { + // By default we do nothing, each backend will need to customize this + // in case they create an IR that they can verify. + } + /// Gets the custom compiler config fn compiler(self: Box) -> Box;