diff --git a/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/backend/x64/emit_x64_vector.cpp index 871aa2e4..6b1549c1 100644 --- a/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -439,6 +439,17 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pand); } +void EmitX64::EmitVectorAndNot(EmitContext& ctx, IR::Inst* inst) { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); + + code.pandn(xmm_b, xmm_a); + + ctx.reg_alloc.DefineValue(inst, xmm_b); +} + static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) { if (code.HasHostFeature(HostFeature::GFNI)) { const u64 shift_matrix = shift_amount < 8 diff --git a/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp b/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp index 49a655c0..dd485a63 100644 --- a/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/asimd_three_regs.cpp @@ -318,7 +318,7 @@ bool TranslatorVisitor::asimd_VAND_reg(bool D, size_t Vn, size_t Vd, bool N, boo bool TranslatorVisitor::asimd_VBIC_reg(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) { return BitwiseInstruction(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_n, const auto& reg_m) { - return ir.VectorAnd(reg_n, ir.VectorNot(reg_m)); + return ir.VectorAndNot(reg_n, reg_m); }); } @@ -342,19 +342,19 @@ bool TranslatorVisitor::asimd_VEOR_reg(bool D, size_t Vn, size_t Vd, bool N, boo bool TranslatorVisitor::asimd_VBSL(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) { return BitwiseInstruction(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) { - return ir.VectorOr(ir.VectorAnd(reg_n, reg_d), ir.VectorAnd(reg_m, ir.VectorNot(reg_d))); + return ir.VectorOr(ir.VectorAnd(reg_n, reg_d), ir.VectorAndNot(reg_m, reg_d)); }); } bool TranslatorVisitor::asimd_VBIT(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) { return BitwiseInstruction(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) { - return ir.VectorOr(ir.VectorAnd(reg_n, reg_m), ir.VectorAnd(reg_d, ir.VectorNot(reg_m))); + return ir.VectorOr(ir.VectorAnd(reg_n, reg_m), ir.VectorAndNot(reg_d, reg_m)); }); } bool TranslatorVisitor::asimd_VBIF(bool D, size_t Vn, size_t Vd, bool N, bool Q, bool M, size_t Vm) { return BitwiseInstruction(*this, D, Vn, Vd, N, Q, M, Vm, [this](const auto& reg_d, const auto& reg_n, const auto& reg_m) { - return ir.VectorOr(ir.VectorAnd(reg_d, reg_m), ir.VectorAnd(reg_n, ir.VectorNot(reg_m))); + return ir.VectorOr(ir.VectorAnd(reg_d, reg_m), ir.VectorAndNot(reg_n, reg_m)); }); } diff --git a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp index e5ea0e22..a8cf1b40 100644 --- a/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp +++ b/src/dynarmic/frontend/A32/translate/impl/asimd_two_regs_shift.cpp @@ -177,7 +177,7 @@ bool TranslatorVisitor::asimd_VSRI(bool D, size_t imm6, size_t Vd, bool L, bool const auto shifted = ir.VectorLogicalShiftRight(esize, reg_m, static_cast(shift_amount)); const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask)); - const auto result = ir.VectorOr(ir.VectorAnd(reg_d, ir.VectorNot(mask_vec)), shifted); + const auto result = ir.VectorOr(ir.VectorAndNot(reg_d, mask_vec), shifted); ir.SetVector(d, result); return true; @@ -203,7 +203,7 @@ bool TranslatorVisitor::asimd_VSLI(bool D, size_t imm6, size_t Vd, bool L, bool const auto shifted = ir.VectorLogicalShiftLeft(esize, reg_m, static_cast(shift_amount)); const auto mask_vec = ir.VectorBroadcast(esize, I(esize, mask)); - const auto result = ir.VectorOr(ir.VectorAnd(reg_d, ir.VectorNot(mask_vec)), shifted); + const auto result = ir.VectorOr(ir.VectorAndNot(reg_d, mask_vec), shifted); ir.SetVector(d, result); return true; diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_crypto_four_register.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_crypto_four_register.cpp index 0e1bae79..8464bba5 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_crypto_four_register.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_crypto_four_register.cpp @@ -23,7 +23,7 @@ bool TranslatorVisitor::BCAX(Vec Vm, Vec Va, Vec Vn, Vec Vd) { const IR::U128 m = ir.GetQ(Vm); const IR::U128 n = ir.GetQ(Vn); - const IR::U128 result = ir.VectorEor(n, ir.VectorAnd(m, ir.VectorNot(a))); + const IR::U128 result = ir.VectorEor(n, ir.VectorAndNot(m, a)); ir.SetQ(Vd, result); return true; diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp index 6ffc7f6d..f39ccd0f 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_shift_by_immediate.cpp @@ -350,7 +350,7 @@ bool TranslatorVisitor::SRI_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) const IR::U128 shifted = ir.VectorLogicalShiftRight(esize, operand1, shift_amount); const IR::U128 mask_vec = ir.VectorBroadcast(esize, I(esize, mask)); - const IR::U128 result = ir.VectorOr(ir.VectorAnd(operand2, ir.VectorNot(mask_vec)), shifted); + const IR::U128 result = ir.VectorOr(ir.VectorAndNot(operand2, mask_vec), shifted); V(datasize, Vd, result); return true; @@ -376,7 +376,7 @@ bool TranslatorVisitor::SLI_2(bool Q, Imm<4> immh, Imm<3> immb, Vec Vn, Vec Vd) const IR::U128 shifted = ir.VectorLogicalShiftLeft(esize, operand1, shift_amount); const IR::U128 mask_vec = ir.VectorBroadcast(esize, I(esize, mask)); - const IR::U128 result = ir.VectorOr(ir.VectorAnd(operand2, ir.VectorNot(mask_vec)), shifted); + const IR::U128 result = ir.VectorOr(ir.VectorAndNot(operand2, mask_vec), shifted); V(datasize, Vd, result); return true; diff --git a/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp b/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp index 34939c83..5bcf3c73 100644 --- a/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp +++ b/src/dynarmic/frontend/A64/translate/impl/simd_three_same.cpp @@ -773,7 +773,7 @@ bool TranslatorVisitor::BIC_asimd_reg(bool Q, Vec Vm, Vec Vn, Vec Vd) { const IR::U128 operand1 = V(datasize, Vn); const IR::U128 operand2 = V(datasize, Vm); - IR::U128 result = ir.VectorAnd(operand1, ir.VectorNot(operand2)); + IR::U128 result = ir.VectorAndNot(operand1, operand2); if (datasize == 64) { result = ir.VectorZeroUpper(result); } diff --git a/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/ir/ir_emitter.cpp index 4f13c318..84f72203 100644 --- a/src/dynarmic/ir/ir_emitter.cpp +++ b/src/dynarmic/ir/ir_emitter.cpp @@ -958,6 +958,10 @@ U128 IREmitter::VectorAnd(const U128& a, const U128& b) { return Inst(Opcode::VectorAnd, a, b); } +U128 IREmitter::VectorAndNot(const U128& a, const U128& b) { + return Inst(Opcode::VectorAndNot, a, b); +} + U128 IREmitter::VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift_amount) { switch (esize) { case 8: diff --git a/src/dynarmic/ir/ir_emitter.h b/src/dynarmic/ir/ir_emitter.h index f4cf66fd..2814f5ed 100644 --- a/src/dynarmic/ir/ir_emitter.h +++ b/src/dynarmic/ir/ir_emitter.h @@ -240,6 +240,7 @@ public: U128 VectorAbs(size_t esize, const U128& a); U128 VectorAdd(size_t esize, const U128& a, const U128& b); U128 VectorAnd(const U128& a, const U128& b); + U128 VectorAndNot(const U128& a, const U128& b); U128 VectorArithmeticShiftRight(size_t esize, const U128& a, u8 shift_amount); U128 VectorArithmeticVShift(size_t esize, const U128& a, const U128& b); U128 VectorBroadcast(size_t esize, const UAny& a); diff --git a/src/dynarmic/ir/opcodes.inc b/src/dynarmic/ir/opcodes.inc index 666f6fa0..57e4ba33 100644 --- a/src/dynarmic/ir/opcodes.inc +++ b/src/dynarmic/ir/opcodes.inc @@ -291,6 +291,7 @@ OPCODE(VectorAdd16, U128, U128 OPCODE(VectorAdd32, U128, U128, U128 ) OPCODE(VectorAdd64, U128, U128, U128 ) OPCODE(VectorAnd, U128, U128, U128 ) +OPCODE(VectorAndNot, U128, U128, U128 ) OPCODE(VectorArithmeticShiftRight8, U128, U128, U8 ) OPCODE(VectorArithmeticShiftRight16, U128, U128, U8 ) OPCODE(VectorArithmeticShiftRight32, U128, U128, U8 )