diff --git a/src/backend/x64/emit_x64_vector_floating_point.cpp b/src/backend/x64/emit_x64_vector_floating_point.cpp index 0cc3b0b5..f0f9520f 100644 --- a/src/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/backend/x64/emit_x64_vector_floating_point.cpp @@ -1300,6 +1300,18 @@ void EmitX64::EmitFPVectorRSqrtStepFused64(EmitContext& ctx, IR::Inst* inst) { EmitRSqrtStepFused<64>(code, ctx, inst); } +void EmitX64::EmitFPVectorSqrt32(EmitContext& ctx, IR::Inst* inst) { + EmitTwoOpVectorOperation<32, DefaultIndexer>(code, ctx, inst, [this](const Xbyak::Xmm& result, const Xbyak::Xmm& operand) { + code.sqrtps(result, operand); + }); +} + +void EmitX64::EmitFPVectorSqrt64(EmitContext& ctx, IR::Inst* inst) { + EmitTwoOpVectorOperation<64, DefaultIndexer>(code, ctx, inst, [this](const Xbyak::Xmm& result, const Xbyak::Xmm& operand) { + code.sqrtpd(result, operand); + }); +} + void EmitX64::EmitFPVectorSub32(EmitContext& ctx, IR::Inst* inst) { EmitThreeOpVectorOperation<32, DefaultIndexer>(code, ctx, inst, &Xbyak::CodeGenerator::subps); } diff --git a/src/frontend/ir/ir_emitter.cpp b/src/frontend/ir/ir_emitter.cpp index 6452e7f7..c9302970 100644 --- a/src/frontend/ir/ir_emitter.cpp +++ b/src/frontend/ir/ir_emitter.cpp @@ -2243,6 +2243,17 @@ U128 IREmitter::FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& return {}; } +U128 IREmitter::FPVectorSqrt(size_t esize, const U128& a) { + switch (esize) { + case 32: + return Inst(Opcode::FPVectorSqrt32, a); + case 64: + return Inst(Opcode::FPVectorSqrt64, a); + } + UNREACHABLE(); + return {}; +} + U128 IREmitter::FPVectorSub(size_t esize, const U128& a, const U128& b) { switch (esize) { case 32: diff --git a/src/frontend/ir/ir_emitter.h b/src/frontend/ir/ir_emitter.h index f58c6efa..99d0169b 100644 --- a/src/frontend/ir/ir_emitter.h +++ b/src/frontend/ir/ir_emitter.h @@ -344,6 +344,7 @@ public: U128 FPVectorRoundInt(size_t esize, const U128& operand, FP::RoundingMode rounding, bool exact); U128 FPVectorRSqrtEstimate(size_t esize, const U128& a); U128 FPVectorRSqrtStepFused(size_t esize, const U128& a, const U128& b); + U128 FPVectorSqrt(size_t esize, const U128& a); U128 FPVectorSub(size_t esize, const U128& a, const U128& b); U128 FPVectorToSignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); U128 FPVectorToUnsignedFixed(size_t esize, const U128& a, size_t fbits, FP::RoundingMode rounding); diff --git a/src/frontend/ir/microinstruction.cpp b/src/frontend/ir/microinstruction.cpp index 36acf6dc..c3849bab 100644 --- a/src/frontend/ir/microinstruction.cpp +++ b/src/frontend/ir/microinstruction.cpp @@ -333,6 +333,8 @@ bool Inst::ReadsFromAndWritesToFPSRCumulativeExceptionBits() const { case Opcode::FPVectorRSqrtEstimate64: case Opcode::FPVectorRSqrtStepFused32: case Opcode::FPVectorRSqrtStepFused64: + case Opcode::FPVectorSqrt32: + case Opcode::FPVectorSqrt64: case Opcode::FPVectorSub32: case Opcode::FPVectorSub64: return true; diff --git a/src/frontend/ir/opcodes.inc b/src/frontend/ir/opcodes.inc index 7306e3d0..256eeba0 100644 --- a/src/frontend/ir/opcodes.inc +++ b/src/frontend/ir/opcodes.inc @@ -564,6 +564,8 @@ OPCODE(FPVectorRSqrtEstimate32, U128, U128 OPCODE(FPVectorRSqrtEstimate64, U128, U128 ) OPCODE(FPVectorRSqrtStepFused32, U128, U128, U128 ) OPCODE(FPVectorRSqrtStepFused64, U128, U128, U128 ) +OPCODE(FPVectorSqrt32, U128, U128 ) +OPCODE(FPVectorSqrt64, U128, U128 ) OPCODE(FPVectorSub32, U128, U128, U128 ) OPCODE(FPVectorSub64, U128, U128, U128 ) OPCODE(FPVectorToSignedFixed32, U128, U128, U8, U8 )