From 13367a7efde7ca86104f9effcd55b8908654f53d Mon Sep 17 00:00:00 2001 From: MerryMage Date: Thu, 18 Jun 2020 11:27:12 +0100 Subject: [PATCH] A64: Match A32 page_table code Here we increase the similarity between the A64 and A32 front-ends in terms of their page_table handling code. In this commit, we: * Reserve and use r14 as a register to store the page_table pointer. * Align the code to be more similar in structure. * Add a conf member to A32EmitContext. * Remove scratch argument from EmitVAddrLookup. --- src/backend/x64/a32_emit_x64.cpp | 53 +++++------ src/backend/x64/a32_emit_x64.h | 5 +- src/backend/x64/a64_emit_x64.cpp | 140 ++++++++++++++++++------------ src/backend/x64/a64_emit_x64.h | 7 +- src/backend/x64/a64_interface.cpp | 8 +- 5 files changed, 128 insertions(+), 85 deletions(-) diff --git a/src/backend/x64/a32_emit_x64.cpp b/src/backend/x64/a32_emit_x64.cpp index 8a0a3fc0..abfdddfb 100644 --- a/src/backend/x64/a32_emit_x64.cpp +++ b/src/backend/x64/a32_emit_x64.cpp @@ -60,15 +60,15 @@ static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) { ASSERT_FALSE("Should never happen."); } -A32EmitContext::A32EmitContext(RegAlloc& reg_alloc, IR::Block& block) - : EmitContext(reg_alloc, block) {} +A32EmitContext::A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block) + : EmitContext(reg_alloc, block), conf(conf) {} A32::LocationDescriptor A32EmitContext::Location() const { return A32::LocationDescriptor{block.Location()}; } bool A32EmitContext::IsSingleStep() const { - return A32::LocationDescriptor{block.Location()}.SingleStepping(); + return Location().SingleStepping(); } FP::FPCR A32EmitContext::FPCR() const { @@ -105,7 +105,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { }(); RegAlloc reg_alloc{code, A32JitState::SpillCount, SpillToOpArg, gpr_order, any_xmm}; - A32EmitContext ctx{reg_alloc, block}; + A32EmitContext ctx{conf, reg_alloc, block}; // Start emitting. code.align(); @@ -876,11 +876,14 @@ FakeCall A32EmitX64::FastmemCallback(u64 rip_) { return ret; } -static void EmitDetectMisaignedVAddr(BlockOfCode& code, const A32::UserConfig& conf, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg32 vaddr, Xbyak::Reg32 tmp) { - constexpr size_t page_bits = 12; - constexpr size_t page_size = 1 << page_bits; +namespace { - if (bitsize == 8 || (conf.detect_misaligned_access_via_page_table & bitsize) == 0) { +constexpr size_t page_bits = 12; +constexpr size_t page_size = 1 << page_bits; +constexpr size_t page_mask = (1 << page_bits) - 1; + +void EmitDetectMisaignedVAddr(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg32 vaddr, Xbyak::Reg32 tmp) { + if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) { return; } @@ -898,7 +901,7 @@ static void EmitDetectMisaignedVAddr(BlockOfCode& code, const A32::UserConfig& c code.test(vaddr, align_mask); - if (!conf.only_detect_misalignment_via_page_table_on_page_boundary) { + if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) { code.jnz(abort, code.T_NEAR); return; } @@ -920,12 +923,11 @@ static void EmitDetectMisaignedVAddr(BlockOfCode& code, const A32::UserConfig& c code.SwitchToNearCode(); } -static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc, const A32::UserConfig& conf, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, std::optional arg_scratch = {}) { - constexpr size_t page_bits = A32::UserConfig::PAGE_BITS; - const Xbyak::Reg64 page = arg_scratch ? *arg_scratch : reg_alloc.ScratchGpr(); - const Xbyak::Reg32 tmp = conf.absolute_offset_page_table ? page.cvt32() : reg_alloc.ScratchGpr().cvt32(); +Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) { + const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr().cvt32(); - EmitDetectMisaignedVAddr(code, conf, bitsize, abort, vaddr.cvt32(), tmp); + EmitDetectMisaignedVAddr(code, ctx, bitsize, abort, vaddr.cvt32(), tmp); // TODO: This code assumes vaddr has been zext from 32-bits to 64-bits. @@ -933,18 +935,17 @@ static Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, RegAlloc& reg_alloc, con code.shr(tmp, static_cast(page_bits)); code.mov(page, qword[r14 + tmp * sizeof(void*)]); code.test(page, page); - code.jz(abort); - if (conf.absolute_offset_page_table) { + code.jz(abort, code.T_NEAR); + if (ctx.conf.absolute_offset_page_table) { return page + vaddr; } - constexpr size_t page_mask = (1 << page_bits) - 1; code.mov(tmp, vaddr.cvt32()); code.and_(tmp, static_cast(page_mask)); return page + tmp; } template -static void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, const Xbyak::RegExp& addr) { +void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, const Xbyak::RegExp& addr) { switch (bitsize) { case 8: code.movzx(value.cvt32(), code.byte[addr]); @@ -964,7 +965,7 @@ static void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, cons } template -static void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbyak::Reg64& value) { +void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbyak::Reg64& value) { switch (bitsize) { case 8: code.mov(code.byte[addr], value.cvt8()); @@ -983,6 +984,8 @@ static void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, con } } +} // anonymous namespace + template void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); @@ -1017,14 +1020,14 @@ void A32EmitX64::ReadMemory(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Label abort, end; - const auto src_ptr = EmitVAddrLookup(code, ctx.reg_alloc, conf, bitsize, abort, vaddr, value); + const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); EmitReadMemoryMov(code, value, src_ptr); - code.jmp(end); + code.L(end); code.SwitchToFarCode(); code.L(abort); code.call(wrapped_fn); - code.L(end); + code.jmp(end, code.T_NEAR); code.SwitchToNearCode(); ctx.reg_alloc.DefineValue(inst, value); @@ -1063,14 +1066,14 @@ void A32EmitX64::WriteMemory(A32EmitContext& ctx, IR::Inst* inst) { Xbyak::Label abort, end; - const auto dest_ptr = EmitVAddrLookup(code, ctx.reg_alloc, conf, bitsize, abort, vaddr); + const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); EmitWriteMemoryMov(code, dest_ptr, value); - code.jmp(end); + code.L(end); code.SwitchToFarCode(); code.L(abort); code.call(wrapped_fn); - code.L(end); + code.jmp(end, code.T_NEAR); code.SwitchToNearCode(); } diff --git a/src/backend/x64/a32_emit_x64.h b/src/backend/x64/a32_emit_x64.h index a86f26ce..2b69d7b9 100644 --- a/src/backend/x64/a32_emit_x64.h +++ b/src/backend/x64/a32_emit_x64.h @@ -26,10 +26,13 @@ namespace Dynarmic::Backend::X64 { class RegAlloc; struct A32EmitContext final : public EmitContext { - A32EmitContext(RegAlloc& reg_alloc, IR::Block& block); + A32EmitContext(const A32::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block); + A32::LocationDescriptor Location() const; bool IsSingleStep() const; FP::FPCR FPCR() const override; + + const A32::UserConfig& conf; }; class A32EmitX64 final : public EmitX64 { diff --git a/src/backend/x64/a64_emit_x64.cpp b/src/backend/x64/a64_emit_x64.cpp index 9586254d..8e8adf7e 100644 --- a/src/backend/x64/a64_emit_x64.cpp +++ b/src/backend/x64/a64_emit_x64.cpp @@ -71,7 +71,15 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) { code.EnableWriting(); SCOPE_EXIT { code.DisableWriting(); }; - RegAlloc reg_alloc{code, A64JitState::SpillCount, SpillToOpArg, any_gpr, any_xmm}; + static const std::vector gpr_order = [this]{ + std::vector gprs{any_gpr}; + if (conf.page_table) { + gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14)); + } + return gprs; + }(); + + RegAlloc reg_alloc{code, A64JitState::SpillCount, SpillToOpArg, gpr_order, any_xmm}; A64EmitContext ctx{conf, reg_alloc, block}; // Start emitting. @@ -728,6 +736,7 @@ namespace { constexpr size_t page_bits = 12; constexpr size_t page_size = 1 << page_bits; +constexpr size_t page_mask = (1 << page_bits) - 1; void EmitDetectMisaignedVAddr(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, Xbyak::Reg64 tmp) { if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) { @@ -772,16 +781,15 @@ void EmitDetectMisaignedVAddr(BlockOfCode& code, A64EmitContext& ctx, size_t bit code.SwitchToNearCode(); } -Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, std::optional arg_scratch = {}) { +Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) { const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits; const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits; - const Xbyak::Reg64 page_table = arg_scratch ? *arg_scratch : ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr(); EmitDetectMisaignedVAddr(code, ctx, bitsize, abort, vaddr, tmp); - code.mov(page_table, reinterpret_cast(ctx.conf.page_table)); code.mov(tmp, vaddr); if (unused_top_bits == 0) { code.shr(tmp, int(page_bits)); @@ -799,86 +807,108 @@ Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bit code.test(tmp, u32(-(1 << valid_page_index_bits))); code.jnz(abort, code.T_NEAR); } - code.mov(page_table, qword[page_table + tmp * sizeof(void*)]); - code.test(page_table, page_table); + code.mov(page, qword[r14 + tmp * sizeof(void*)]); + code.test(page, page); code.jz(abort, code.T_NEAR); if (ctx.conf.absolute_offset_page_table) { - return page_table + vaddr; + return page + vaddr; } code.mov(tmp, vaddr); - code.and_(tmp, static_cast(page_size - 1)); - return page_table + tmp; + code.and_(tmp, static_cast(page_mask)); + return page + tmp; +} + +template +void EmitReadMemoryMov(BlockOfCode& code, const Xbyak::Reg64& value, const Xbyak::RegExp& addr) { + switch (bitsize) { + case 8: + code.movzx(value.cvt32(), code.byte[addr]); + return; + case 16: + code.movzx(value.cvt32(), word[addr]); + return; + case 32: + code.mov(value.cvt32(), dword[addr]); + return; + case 64: + code.mov(value, qword[addr]); + return; + default: + ASSERT_FALSE("Invalid bitsize"); + } +} + +template +void EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, const Xbyak::Reg64& value) { + switch (bitsize) { + case 8: + code.mov(code.byte[addr], value.cvt8()); + return; + case 16: + code.mov(word[addr], value.cvt16()); + return; + case 32: + code.mov(dword[addr], value.cvt32()); + return; + case 64: + code.mov(qword[addr], value); + return; + default: + ASSERT_FALSE("Invalid bitsize"); + } } } // anonymous namepsace -void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) { - Xbyak::Label abort, end; - +template +void A64EmitX64::EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 value = ctx.reg_alloc.ScratchGpr(); - const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr, value); - switch (bitsize) { - case 8: - code.movzx(value.cvt32(), code.byte[src_ptr]); - break; - case 16: - code.movzx(value.cvt32(), word[src_ptr]); - break; - case 32: - code.mov(value.cvt32(), dword[src_ptr]); - break; - case 64: - code.mov(value, qword[src_ptr]); - break; - } + const auto wrapped_fn = read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; + + Xbyak::Label abort, end; + + const auto src_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); + EmitReadMemoryMov(code, value, src_ptr); code.L(end); code.SwitchToFarCode(); code.L(abort); - code.call(read_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]); + code.call(wrapped_fn); code.jmp(end, code.T_NEAR); code.SwitchToNearCode(); ctx.reg_alloc.DefineValue(inst, value); } -void A64EmitX64::EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize) { - Xbyak::Label abort, end; - +template +void A64EmitX64::EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[0]); const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); + const auto wrapped_fn = write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]; + + Xbyak::Label abort, end; + const auto dest_ptr = EmitVAddrLookup(code, ctx, bitsize, abort, vaddr); - switch (bitsize) { - case 8: - code.mov(code.byte[dest_ptr], value.cvt8()); - break; - case 16: - code.mov(word[dest_ptr], value.cvt16()); - break; - case 32: - code.mov(dword[dest_ptr], value.cvt32()); - break; - case 64: - code.mov(qword[dest_ptr], value); - break; - } + EmitWriteMemoryMov(code, dest_ptr, value); code.L(end); code.SwitchToFarCode(); code.L(abort); - code.call(write_fallbacks[std::make_tuple(bitsize, vaddr.getIdx(), value.getIdx())]); + code.call(wrapped_fn); code.jmp(end, code.T_NEAR); code.SwitchToNearCode(); } void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { if (conf.page_table) { - EmitDirectPageTableMemoryRead(ctx, inst, 8); + EmitDirectPageTableMemoryRead<8>(ctx, inst); return; } @@ -889,7 +919,7 @@ void A64EmitX64::EmitA64ReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { if (conf.page_table) { - EmitDirectPageTableMemoryRead(ctx, inst, 16); + EmitDirectPageTableMemoryRead<16>(ctx, inst); return; } @@ -900,7 +930,7 @@ void A64EmitX64::EmitA64ReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { if (conf.page_table) { - EmitDirectPageTableMemoryRead(ctx, inst, 32); + EmitDirectPageTableMemoryRead<32>(ctx, inst); return; } @@ -911,7 +941,7 @@ void A64EmitX64::EmitA64ReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64ReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { if (conf.page_table) { - EmitDirectPageTableMemoryRead(ctx, inst, 64); + EmitDirectPageTableMemoryRead<64>(ctx, inst); return; } @@ -950,7 +980,7 @@ void A64EmitX64::EmitA64ReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { if (conf.page_table) { - EmitDirectPageTableMemoryWrite(ctx, inst, 8); + EmitDirectPageTableMemoryWrite<8>(ctx, inst); return; } @@ -961,7 +991,7 @@ void A64EmitX64::EmitA64WriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { if (conf.page_table) { - EmitDirectPageTableMemoryWrite(ctx, inst, 16); + EmitDirectPageTableMemoryWrite<16>(ctx, inst); return; } @@ -972,7 +1002,7 @@ void A64EmitX64::EmitA64WriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { if (conf.page_table) { - EmitDirectPageTableMemoryWrite(ctx, inst, 32); + EmitDirectPageTableMemoryWrite<32>(ctx, inst); return; } @@ -983,7 +1013,7 @@ void A64EmitX64::EmitA64WriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64WriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { if (conf.page_table) { - EmitDirectPageTableMemoryWrite(ctx, inst, 64); + EmitDirectPageTableMemoryWrite<64>(ctx, inst); return; } diff --git a/src/backend/x64/a64_emit_x64.h b/src/backend/x64/a64_emit_x64.h index 7ec7ed80..1090be4c 100644 --- a/src/backend/x64/a64_emit_x64.h +++ b/src/backend/x64/a64_emit_x64.h @@ -5,6 +5,7 @@ #pragma once +#include #include #include @@ -79,8 +80,10 @@ protected: FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr; void GenTerminalHandlers(); - void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); - void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize); + template + void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst); + template + void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst); template void EmitExclusiveReadMemory(A64EmitContext& ctx, IR::Inst* inst); template diff --git a/src/backend/x64/a64_interface.cpp b/src/backend/x64/a64_interface.cpp index b3d8ceea..b16687d1 100644 --- a/src/backend/x64/a64_interface.cpp +++ b/src/backend/x64/a64_interface.cpp @@ -33,8 +33,12 @@ static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*Lo }; } -static std::function GenRCP(const A64::UserConfig&) { - return [](BlockOfCode&){}; +static std::function GenRCP(const A64::UserConfig& conf) { + return [conf](BlockOfCode& code) { + if (conf.page_table) { + code.mov(code.r14, Common::BitCast(conf.page_table)); + } + }; } struct Jit::Impl final {