From 761e95eec01771dc358f425dd5c8c6bcb45c89f9 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 6 Jul 2020 21:01:24 +0100 Subject: [PATCH] A64: Add unsafe_optimizations option * Strength reduce FMA unsafely --- include/dynarmic/A64/config.h | 3 +++ src/backend/x64/a64_emit_x64.h | 2 ++ src/backend/x64/emit_x64.h | 2 ++ src/backend/x64/emit_x64_floating_point.cpp | 14 ++++++++++++++ src/backend/x64/emit_x64_vector_floating_point.cpp | 14 ++++++++++++++ 5 files changed, 35 insertions(+) diff --git a/include/dynarmic/A64/config.h b/include/dynarmic/A64/config.h index 04a09f07..3c413efb 100644 --- a/include/dynarmic/A64/config.h +++ b/include/dynarmic/A64/config.h @@ -138,6 +138,9 @@ struct UserConfig { return (f & optimizations) != no_optimizations; } + /// This enables unsafe optimizations that reduce emulation accuracy in favour of speed. + bool unsafe_optimizations = false; + /// When set to true, UserCallbacks::DataCacheOperationRaised will be called when any /// data cache instruction is executed. Notably DC ZVA will not implicitly do anything. /// When set to false, UserCallbacks::DataCacheOperationRaised will never be called. diff --git a/src/backend/x64/a64_emit_x64.h b/src/backend/x64/a64_emit_x64.h index 03d57cb3..0730e6b8 100644 --- a/src/backend/x64/a64_emit_x64.h +++ b/src/backend/x64/a64_emit_x64.h @@ -29,6 +29,8 @@ struct A64EmitContext final : public EmitContext { bool IsSingleStep() const; FP::FPCR FPCR(bool fpcr_controlled = true) const override; + bool UnsafeOptimizations() const override { return conf.unsafe_optimizations; } + const A64::UserConfig& conf; }; diff --git a/src/backend/x64/emit_x64.h b/src/backend/x64/emit_x64.h index f6cb486b..2266c032 100644 --- a/src/backend/x64/emit_x64.h +++ b/src/backend/x64/emit_x64.h @@ -51,6 +51,8 @@ struct EmitContext { virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0; + virtual bool UnsafeOptimizations() const { return false; } + RegAlloc& reg_alloc; IR::Block& block; }; diff --git a/src/backend/x64/emit_x64_floating_point.cpp b/src/backend/x64/emit_x64_floating_point.cpp index 78eec783..2f170bbe 100644 --- a/src/backend/x64/emit_x64_floating_point.cpp +++ b/src/backend/x64/emit_x64_floating_point.cpp @@ -637,6 +637,20 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); return; } + + if (ctx.UnsafeOptimizations()) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); + + FCODE(muls)(operand2, operand3); + FCODE(adds)(operand1, operand2); + + ctx.reg_alloc.DefineValue(inst, operand1); + return; + } } auto args = ctx.reg_alloc.GetArgumentInfo(inst); diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index 038fd0ac..e1ac36a2 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -1021,6 +1021,20 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.DefineValue(inst, result); return; } + + if (ctx.UnsafeOptimizations()) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); + + FCODE(mulp)(operand2, operand3); + FCODE(addp)(operand1, operand2); + + ctx.reg_alloc.DefineValue(inst, operand1); + return; + } } EmitFourOpFallback(code, ctx, inst, fallback_fn);