Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JitArm64: Fix frsqrte misclassifying large negative inputs as NaN #13241

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,8 @@ void JitArm64::GenerateFrsqrte()
RET();

SetJumpTarget(not_positive_normal_not_zero);
FixupBranch nan_or_inf = TBNZ(ARM64Reg::X1, 62);
FixupBranch negative = TBNZ(ARM64Reg::X1, 63);
TBNZ(ARM64Reg::X1, 62, done); // Branch to done if NaN

// "Normalize" denormal values.
// The simplified calculation used here results in the upper 11 bits being incorrect,
Expand All @@ -375,12 +375,11 @@ void JitArm64::GenerateFrsqrte()
BFI(ARM64Reg::X1, ARM64Reg::X3, 52, 12);
B(positive_normal);

SetJumpTarget(nan_or_inf);
SetJumpTarget(negative);
MOVI2R(ARM64Reg::X2, std::bit_cast<u64>(-std::numeric_limits<double>::infinity()));
CMP(ARM64Reg::X1, ARM64Reg::X2);
B(CCFlags::CC_NEQ, done);
B(CCFlags::CC_HI, done); // Branch to done if NaN

SetJumpTarget(negative);
TBNZ(ARM64Reg::W3, 9, done);
ORRI2R(ARM64Reg::W3, ARM64Reg::W3, FPSCR_FX | FPSCR_VXSQRT, ARM64Reg::W2);
B(store_fpscr);
Expand Down
37 changes: 34 additions & 3 deletions Source/UnitTests/Core/PowerPC/Jit64Common/Frsqrte.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,51 @@ TEST(Jit64, Frsqrte)
Core::DeclareAsCPUThread();
Common::ScopeGuard cpu_thread_guard([] { Core::UndeclareAsCPUThread(); });

TestCommonAsmRoutines routines(Core::System::GetInstance());
Core::System& system = Core::System::GetInstance();
TestCommonAsmRoutines routines(system);

UReg_FPSCR fpscr;

for (const u64 ivalue : double_test_values)
{
SCOPED_TRACE(fmt::format("frsqrte input: {:016x}\n", ivalue));

double dvalue = std::bit_cast<double>(ivalue);

u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocalSquareRoot(dvalue));

u64 actual = routines.wrapped_frsqrte(ivalue, fpscr);

fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);

EXPECT_EQ(expected, actual);

for (u32 zx = 0; zx < 2; ++zx)
{
for (u32 vxsqrt = 0; vxsqrt < 2; ++vxsqrt)
{
fpscr = {};
fpscr.ZX = zx;
fpscr.VXSQRT = vxsqrt;

routines.wrapped_frsqrte(ivalue, fpscr);

const u32 value_class = Common::ClassifyDouble(dvalue);

const bool input_is_zero =
value_class == Common::PPC_FPCLASS_NZ || value_class == Common::PPC_FPCLASS_PZ;
const bool input_is_negative = value_class == Common::PPC_FPCLASS_NN ||
value_class == Common::PPC_FPCLASS_ND ||
value_class == Common::PPC_FPCLASS_NINF;

const bool zx_expected = input_is_zero || zx;
const bool vxsqrt_expected = input_is_negative || vxsqrt;
const bool fx_expected = (input_is_zero && !zx) || (input_is_negative && !vxsqrt);

const u32 fpscr_expected = (zx_expected ? FPSCR_ZX : 0) |
(vxsqrt_expected ? FPSCR_VXSQRT : 0) |
(fx_expected ? FPSCR_FX : 0);

EXPECT_EQ(fpscr_expected, fpscr.Hex);
}
}
}
}
26 changes: 22 additions & 4 deletions Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,36 @@ TEST(JitArm64, Fres)
Core::DeclareAsCPUThread();
Common::ScopeGuard cpu_thread_guard([] { Core::UndeclareAsCPUThread(); });

TestFres test(Core::System::GetInstance());
Core::System& system = Core::System::GetInstance();
TestFres test(system);

for (const u64 ivalue : double_test_values)
{
SCOPED_TRACE(fmt::format("fres input: {:016x}\n", ivalue));

const double dvalue = std::bit_cast<double>(ivalue);

const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocal(dvalue));
const u64 actual = test.fres(ivalue);

if (expected != actual)
fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);

EXPECT_EQ(expected, actual);

for (u32 zx = 0; zx < 2; ++zx)
{
UReg_FPSCR& fpscr = system.GetPPCState().fpscr;
fpscr = {};
fpscr.ZX = zx;

test.fres(ivalue);

const bool input_is_zero = (ivalue & (Common::DOUBLE_EXP | Common::DOUBLE_FRAC)) == 0;

const bool zx_expected = input_is_zero || zx;
const bool fx_expected = input_is_zero && !zx;

const u32 fpscr_expected = (zx_expected ? FPSCR_ZX : 0) | (fx_expected ? FPSCR_FX : 0);

EXPECT_EQ(fpscr_expected, fpscr.Hex);
}
}
}
39 changes: 35 additions & 4 deletions Source/UnitTests/Core/PowerPC/JitArm64/Frsqrte.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,49 @@ TEST(JitArm64, Frsqrte)
Core::DeclareAsCPUThread();
Common::ScopeGuard cpu_thread_guard([] { Core::UndeclareAsCPUThread(); });

TestFrsqrte test(Core::System::GetInstance());
Core::System& system = Core::System::GetInstance();
TestFrsqrte test(system);

for (const u64 ivalue : double_test_values)
{
SCOPED_TRACE(fmt::format("frsqrte input: {:016x}\n", ivalue));

const double dvalue = std::bit_cast<double>(ivalue);

const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocalSquareRoot(dvalue));
const u64 actual = test.frsqrte(ivalue);

if (expected != actual)
fmt::print("{:016x} -> {:016x} == {:016x}\n", ivalue, actual, expected);

EXPECT_EQ(expected, actual);

for (u32 zx = 0; zx < 2; ++zx)
{
for (u32 vxsqrt = 0; vxsqrt < 2; ++vxsqrt)
{
UReg_FPSCR& fpscr = system.GetPPCState().fpscr;
fpscr = {};
fpscr.ZX = zx;
fpscr.VXSQRT = vxsqrt;

test.frsqrte(ivalue);

const u32 value_class = Common::ClassifyDouble(dvalue);

const bool input_is_zero =
value_class == Common::PPC_FPCLASS_NZ || value_class == Common::PPC_FPCLASS_PZ;
const bool input_is_negative = value_class == Common::PPC_FPCLASS_NN ||
value_class == Common::PPC_FPCLASS_ND ||
value_class == Common::PPC_FPCLASS_NINF;

const bool zx_expected = input_is_zero || zx;
const bool vxsqrt_expected = input_is_negative || vxsqrt;
const bool fx_expected = (input_is_zero && !zx) || (input_is_negative && !vxsqrt);

const u32 fpscr_expected = (zx_expected ? FPSCR_ZX : 0) |
(vxsqrt_expected ? FPSCR_VXSQRT : 0) |
(fx_expected ? FPSCR_FX : 0);

EXPECT_EQ(fpscr_expected, fpscr.Hex);
}
}
}
}