From 69de50a8786bc817e013e77a17ff68d7e1dd58a5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 10 Feb 2018 16:24:43 +0000 Subject: [PATCH] emit_x64_vector: Add SSE4.1 implementations for VectorZeroExtend --- src/backend_x64/emit_x64_vector.cpp | 57 ++++++++++++++++------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/src/backend_x64/emit_x64_vector.cpp b/src/backend_x64/emit_x64_vector.cpp index 7c206005..adc78b61 100644 --- a/src/backend_x64/emit_x64_vector.cpp +++ b/src/backend_x64/emit_x64_vector.cpp @@ -703,45 +703,52 @@ void EmitX64::EmitVectorSub64(EmitContext& ctx, IR::Inst* inst) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubq); } -static void EmitVectorZeroExtend(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { +void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); - - code.pxor(zeros, zeros); - switch (size) { - case 8: + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.pmovzxbw(a, a); + } else { + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + code.pxor(zeros, zeros); code.punpcklbw(a, zeros); - break; - case 16: - code.punpcklwd(a, zeros); - break; - case 32: - code.punpckldq(a, zeros); - break; - case 64: - code.punpcklqdq(a, zeros); - break; } - ctx.reg_alloc.DefineValue(inst, a); } -void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { - EmitVectorZeroExtend(code, ctx, inst, 8); -} - void EmitX64::EmitVectorZeroExtend16(EmitContext& ctx, IR::Inst* inst) { - EmitVectorZeroExtend(code, ctx, inst, 16); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.pmovzxwd(a, a); + } else { + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + code.pxor(zeros, zeros); + code.punpcklwd(a, zeros); + } + ctx.reg_alloc.DefineValue(inst, a); } void EmitX64::EmitVectorZeroExtend32(EmitContext& ctx, IR::Inst* inst) { - EmitVectorZeroExtend(code, ctx, inst, 32); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) { + code.pmovzxdq(a, a); + } else { + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + code.pxor(zeros, zeros); + code.punpckldq(a, zeros); + } + ctx.reg_alloc.DefineValue(inst, a); } void EmitX64::EmitVectorZeroExtend64(EmitContext& ctx, IR::Inst* inst) { - EmitVectorZeroExtend(code, ctx, inst, 64); + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + code.pxor(zeros, zeros); + code.punpcklqdq(a, zeros); + ctx.reg_alloc.DefineValue(inst, a); } void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) {