Skip to content

Commit

Permalink
improv(compiler) More float and native call work (133 tests passes now)
Browse files Browse the repository at this point in the history
  • Loading branch information
ptitSeb committed Jan 14, 2022
1 parent 7cbf2fe commit 8d066a1
Show file tree
Hide file tree
Showing 5 changed files with 571 additions and 229 deletions.
22 changes: 12 additions & 10 deletions lib/compiler-singlepass/src/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -709,8 +709,8 @@ impl<'a, M: Machine> FuncGen<'a, M> {

let params: Vec<_> = params.collect();

// Save used GPRs.
self.machine.push_used_gpr();
// Save used GPRs. Preserve correct stack alignment
let mut used_stack = self.machine.push_used_gpr();
let used_gprs = self.machine.get_used_gprs();
for r in used_gprs.iter() {
let content = self.state.register_values[self.machine.index_from_gpr(*r).0].clone();
Expand All @@ -725,7 +725,7 @@ impl<'a, M: Machine> FuncGen<'a, M> {
// Save used XMM registers.
let used_simds = self.machine.get_used_simd();
if used_simds.len() > 0 {
self.machine.push_used_simd();
used_stack += self.machine.push_used_simd();

for r in used_simds.iter().rev() {
let content =
Expand Down Expand Up @@ -757,15 +757,17 @@ impl<'a, M: Machine> FuncGen<'a, M> {
}

// Align stack to 16 bytes.
if self.machine.round_stack_adjust(8) == 8 {
if (self.get_stack_offset() + used_gprs.len() * 8 + used_simds.len() * 8 + stack_offset)
% 16
!= 0
{
if (self.machine.round_stack_adjust(self.get_stack_offset()) + used_stack + stack_offset)
% 16
!= 0
{
if self.machine.round_stack_adjust(8) == 8 {
self.machine.adjust_stack(8);
stack_offset += 8;
self.state.stack_values.push(MachineValue::Undefined);
} else {
self.machine.emit_push(Size::S64, Location::Imm32(0));
}
stack_offset += 8;
self.state.stack_values.push(MachineValue::Undefined);
}

let mut call_movs: Vec<(Location<M::GPR, M::SIMD>, M::GPR)> = vec![];
Expand Down
156 changes: 127 additions & 29 deletions lib/compiler-singlepass/src/emitter_arm64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ pub trait EmitterARM64 {
fn emit_stur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32);
fn emit_ldur(&mut self, sz: Size, reg: Location, addr: GPR, offset: i32);
fn emit_strdb(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32);
fn emit_stria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32);
fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32);
fn emit_stpdb(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32);
fn emit_ldpia(&mut self, sz: Size, reg1: Location, reg2: Location, addr: GPR, offset: u32);
Expand Down Expand Up @@ -182,6 +183,10 @@ pub trait EmitterARM64 {
fn emit_fmin(&mut self, sz: Size, src1: Location, src2: Location, dst: Location);
fn emit_fmax(&mut self, sz: Size, src1: Location, src2: Location, dst: Location);

fn emit_scvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location);
fn emit_ucvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location);
fn emit_fcvt(&mut self, sz_in: Size, src: Location, dst: Location);

fn arch_supports_canonicalize_nan(&self) -> bool {
true
}
Expand Down Expand Up @@ -322,10 +327,11 @@ impl EmitterARM64 for Assembler {
unreachable!();
}
let mult = mult as u32;
if mult == 0 {
unreachable!();
match mult {
0 => dynasm!(self ; ldr X(reg), [X(addr)]),
1 => dynasm!(self ; ldr X(reg), [X(addr), X(r2)]),
_ => dynasm!(self ; ldr X(reg), [X(addr), X(r2), LSL mult]),
}
dynasm!(self ; ldr X(reg), [X(addr), X(r2), LSL mult]);
}
(Size::S64, Location::SIMD(reg), Location::Memory(addr, disp)) => {
let reg = reg.into_index() as u32;
Expand Down Expand Up @@ -410,6 +416,21 @@ impl EmitterARM64 for Assembler {
_ => unreachable!(),
}
}
fn emit_stria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) {
match (sz, reg) {
(Size::S64, Location::GPR(reg)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
dynasm!(self ; str X(reg), [X(addr)], (offset as i32));
}
(Size::S64, Location::SIMD(reg)) => {
let reg = reg.into_index() as u32;
let addr = addr.into_index() as u32;
dynasm!(self ; str D(reg), [X(addr)], (offset as i32));
}
_ => unreachable!(),
}
}
fn emit_ldria(&mut self, sz: Size, reg: Location, addr: GPR, offset: u32) {
match (sz, reg) {
(Size::S64, Location::GPR(reg)) => {
Expand Down Expand Up @@ -1059,6 +1080,10 @@ impl EmitterARM64 for Assembler {
let dst = dst.into_index() as u32;
dynasm!(self ; cmp X(dst), imm as u32);
}
(Size::S64, Location::Imm64(imm), Location::GPR(dst)) => {
let dst = dst.into_index() as u32;
dynasm!(self ; cmp X(dst), imm as u32);
}
(Size::S32, Location::Imm8(imm), Location::GPR(dst)) => {
let dst = dst.into_index() as u32;
dynasm!(self ; cmp W(dst), imm as u32);
Expand Down Expand Up @@ -1809,7 +1834,7 @@ impl EmitterARM64 for Assembler {
}

fn emit_udf(&mut self) {
dynasm!(self ; udf 0);
dynasm!(self ; udf 0x1234);
}
fn emit_dmb(&mut self) {
dynasm!(self ; dmb ish);
Expand Down Expand Up @@ -1983,6 +2008,81 @@ impl EmitterARM64 for Assembler {
),
}
}

fn emit_scvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) {
match (sz_in, src, sz_out, dst) {
(Size::S32, Location::GPR(src), Size::S32, Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; scvtf S(dst), W(src));
}
(Size::S64, Location::GPR(src), Size::S32, Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; scvtf S(dst), X(src));
}
(Size::S32, Location::GPR(src), Size::S64, Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; scvtf D(dst), W(src));
}
(Size::S64, Location::GPR(src), Size::S64, Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; scvtf D(dst), X(src));
}
_ => panic!(
"singlepass can't emit SCVTF {:?} {:?} {:?} {:?}",
sz_in, src, sz_out, dst
),
}
}
fn emit_ucvtf(&mut self, sz_in: Size, src: Location, sz_out: Size, dst: Location) {
match (sz_in, src, sz_out, dst) {
(Size::S32, Location::GPR(src), Size::S32, Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; ucvtf S(dst), W(src));
}
(Size::S64, Location::GPR(src), Size::S32, Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; ucvtf S(dst), X(src));
}
(Size::S32, Location::GPR(src), Size::S64, Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; ucvtf D(dst), W(src));
}
(Size::S64, Location::GPR(src), Size::S64, Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; ucvtf D(dst), X(src));
}
_ => panic!(
"singlepass can't emit UCVTF {:?} {:?} {:?} {:?}",
sz_in, src, sz_out, dst
),
}
}
fn emit_fcvt(&mut self, sz_in: Size, src: Location, dst: Location) {
match (sz_in, src, dst) {
(Size::S32, Location::SIMD(src), Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; fcvt D(dst), S(src));
}
(Size::S64, Location::SIMD(src), Location::SIMD(dst)) => {
let src = src.into_index() as u32;
let dst = dst.into_index() as u32;
dynasm!(self ; fcvt S(dst), D(src));
}
_ => panic!(
"singlepass can't emit UCVTF {:?} {:?} {:?}",
sz_in, src, dst
),
}
}
}

pub fn gen_std_trampoline_arm64(
Expand All @@ -1992,7 +2092,7 @@ pub fn gen_std_trampoline_arm64(
let mut a = Assembler::new(0);

let fptr = GPR::X26;
let args = GPR::X8;
let args = GPR::X25;

dynasm!(a
; .arch aarch64
Expand All @@ -2004,7 +2104,7 @@ pub fn gen_std_trampoline_arm64(
; mov X(args as u32), x2
);

let stack_args = sig.params().len().saturating_sub(8);
let stack_args = sig.params().len().saturating_sub(7); //1st arg is ctx, not an actual arg
let mut stack_offset = stack_args as u32 * 8;
if stack_args > 0 {
if stack_offset % 16 != 0 {
Expand All @@ -2016,7 +2116,7 @@ pub fn gen_std_trampoline_arm64(

// Move arguments to their locations.
// `callee_vmctx` is already in the first argument register, so no need to move.
for (i, param) in sig.params().iter().enumerate() {
for (i, param) in sig.params().iter().enumerate().rev() {
let sz = match *param {
Type::I32 | Type::F32 => Size::S32,
Type::I64 | Type::F64 => Size::S64,
Expand All @@ -2036,14 +2136,15 @@ pub fn gen_std_trampoline_arm64(
);
}
_ => {
// using X1 as scratch reg, because the for args is going backward
a.emit_ldr(
sz,
Location::GPR(GPR::X18),
Location::GPR(GPR::X1),
Location::Memory(args, (i * 16) as i32),
);
a.emit_str(
sz,
Location::GPR(GPR::X18),
Location::GPR(GPR::X1),
Location::Memory(GPR::XzrSp, (i as i32 - 7) * 8),
)
}
Expand Down Expand Up @@ -2089,11 +2190,11 @@ pub fn gen_std_dynamic_import_trampoline_arm64(
16,
);

if stack_offset < 256 + 16 {
if stack_offset < 0x1000 + 16 {
a.emit_sub(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::Imm8((stack_offset - 16) as _),
Location::Imm32((stack_offset - 16) as _),
Location::GPR(GPR::XzrSp),
);
} else {
Expand Down Expand Up @@ -2177,7 +2278,7 @@ pub fn gen_std_dynamic_import_trampoline_arm64(
}

// Release values array.
if stack_offset < 256 + 16 {
if stack_offset < 0x1000 + 16 {
a.emit_add(
Size::S64,
Location::GPR(GPR::XzrSp),
Expand Down Expand Up @@ -2232,8 +2333,8 @@ pub fn gen_import_call_trampoline_arm64(
let mut param_locations: Vec<Location> = vec![];

// Allocate stack space for arguments.
let stack_offset: i32 = if sig.params().len() > 5 {
5 * 8
let stack_offset: i32 = if sig.params().len() > 7 {
7 * 8
} else {
(sig.params().len() as i32) * 8
};
Expand All @@ -2243,11 +2344,11 @@ pub fn gen_import_call_trampoline_arm64(
stack_offset
};
if stack_offset > 0 {
if stack_offset < 256 {
if stack_offset < 0x1000 {
a.emit_sub(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::Imm8(stack_offset as u8),
Location::Imm32(stack_offset as u32),
Location::GPR(GPR::XzrSp),
);
} else {
Expand Down Expand Up @@ -2278,30 +2379,27 @@ pub fn gen_import_call_trampoline_arm64(
a.emit_str(Size::S64, Location::GPR(PARAM_REGS[i]), loc);
loc
}
_ => Location::Memory(GPR::XzrSp, stack_offset + ((i - 5) * 8) as i32),
_ => Location::Memory(GPR::XzrSp, stack_offset + ((i - 7) * 8) as i32),
};
param_locations.push(loc);
}

// Copy arguments.
let mut caller_stack_offset: i32 = 0;
let mut argalloc = ArgumentRegisterAllocator::default();
argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext
let mut caller_stack_offset: i32 = 0;
for (i, ty) in sig.params().iter().enumerate() {
let prev_loc = param_locations[i];
let targ = match argalloc.next(*ty, calling_convention) {
Some(ARM64Register::GPR(gpr)) => Location::GPR(gpr),
Some(ARM64Register::NEON(neon)) => Location::SIMD(neon),
None => {
// No register can be allocated. Put this argument on the stack.
a.emit_ldr(Size::S64, Location::GPR(GPR::X20), prev_loc);
a.emit_ldr(Size::S64, Location::GPR(GPR::X16), prev_loc);
a.emit_str(
Size::S64,
Location::GPR(GPR::X20),
Location::Memory(
GPR::XzrSp,
stack_offset + 8 + caller_stack_offset,
),
Location::GPR(GPR::X16),
Location::Memory(GPR::XzrSp, stack_offset + caller_stack_offset),
);
caller_stack_offset += 8;
continue;
Expand All @@ -2312,11 +2410,11 @@ pub fn gen_import_call_trampoline_arm64(

// Restore stack pointer.
if stack_offset > 0 {
if stack_offset < 256 {
if stack_offset < 0x1000 {
a.emit_add(
Size::S64,
Location::GPR(GPR::XzrSp),
Location::Imm8(stack_offset as u8),
Location::Imm32(stack_offset as u32),
Location::GPR(GPR::XzrSp),
);
} else {
Expand All @@ -2339,7 +2437,9 @@ pub fn gen_import_call_trampoline_arm64(
let offset = vmoffsets.vmctx_vmfunction_import(index);
// for ldr, offset needs to be a multiple of 8, wich often is not
// so use ldur, but then offset is limited to -255 .. +255. It will be positive here
let offset = if offset > 255 {
let offset = if offset > 0 && offset < 0x1000 {
offset
} else {
a.emit_mov_imm(Location::GPR(GPR::X16), offset as u64);
a.emit_add(
Size::S64,
Expand All @@ -2348,8 +2448,6 @@ pub fn gen_import_call_trampoline_arm64(
Location::GPR(GPR::X0),
);
0
} else {
offset
};
match calling_convention {
_ => {
Expand Down
8 changes: 4 additions & 4 deletions lib/compiler-singlepass/src/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ pub trait Machine {
fn reserve_unused_temp_gpr(&mut self, gpr: Self::GPR) -> Self::GPR;
/// reserve a GPR
fn reserve_gpr(&mut self, gpr: Self::GPR);
/// Push used gpr to the stack
fn push_used_gpr(&mut self);
/// Push used gpr to the stack. Return the bytes taken on the stack
fn push_used_gpr(&mut self) -> usize;
/// Pop used gpr to the stack
fn pop_used_gpr(&mut self);
/// Picks an unused SIMD register.
Expand All @@ -100,8 +100,8 @@ pub trait Machine {
fn reserve_simd(&mut self, simd: Self::SIMD);
/// Releases a temporary XMM register.
fn release_simd(&mut self, simd: Self::SIMD);
/// Push used simd regs to the stack
fn push_used_simd(&mut self);
/// Push used simd regs to the stack. Return bytes taken on the stack
fn push_used_simd(&mut self) -> usize;
/// Pop used simd regs to the stack
fn pop_used_simd(&mut self);
/// Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example)
Expand Down
Loading

0 comments on commit 8d066a1

Please sign in to comment.