mirror of
https://github.com/azahar-emu/dynarmic
synced 2025-11-10 17:10:00 +01:00
388 lines
13 KiB
C++
388 lines
13 KiB
C++
/* This file is part of the dynarmic project.
|
|
* Copyright (c) 2022 MerryMage
|
|
* SPDX-License-Identifier: 0BSD
|
|
*/
|
|
|
|
#include <xbyak/xbyak.h>
|
|
|
|
#include "dynarmic/backend/x64/a32_emit_x64.h"
|
|
#include "dynarmic/backend/x64/a64_emit_x64.h"
|
|
#include "dynarmic/backend/x64/exclusive_monitor_friend.h"
|
|
#include "dynarmic/common/spin_lock_x64.h"
|
|
#include "dynarmic/interface/exclusive_monitor.h"
|
|
#include "dynarmic/ir/acc_type.h"
|
|
|
|
namespace Dynarmic::Backend::X64 {
|
|
|
|
namespace {
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
constexpr size_t page_bits = 12;
|
|
constexpr size_t page_size = 1 << page_bits;
|
|
constexpr size_t page_mask = (1 << page_bits) - 1;
|
|
|
|
template<typename EmitContext>
|
|
void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, Xbyak::Reg64 tmp) {
|
|
if (bitsize == 8 || (ctx.conf.detect_misaligned_access_via_page_table & bitsize) == 0) {
|
|
return;
|
|
}
|
|
|
|
const u32 align_mask = [bitsize]() -> u32 {
|
|
switch (bitsize) {
|
|
case 16:
|
|
return 0b1;
|
|
case 32:
|
|
return 0b11;
|
|
case 64:
|
|
return 0b111;
|
|
case 128:
|
|
return 0b1111;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}();
|
|
|
|
code.test(vaddr, align_mask);
|
|
|
|
if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
|
code.jnz(abort, code.T_NEAR);
|
|
return;
|
|
}
|
|
|
|
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
|
|
|
|
Xbyak::Label detect_boundary, resume;
|
|
|
|
code.jnz(detect_boundary, code.T_NEAR);
|
|
code.L(resume);
|
|
|
|
code.SwitchToFarCode();
|
|
code.L(detect_boundary);
|
|
code.mov(tmp, vaddr);
|
|
code.and_(tmp, page_align_mask);
|
|
code.cmp(tmp, page_align_mask);
|
|
code.jne(resume, code.T_NEAR);
|
|
// NOTE: We expect to fallthrough into abort code here.
|
|
code.SwitchToNearCode();
|
|
}
|
|
|
|
template<typename EmitContext>
|
|
Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr);
|
|
|
|
template<>
|
|
[[maybe_unused]] Xbyak::RegExp EmitVAddrLookup<A32EmitContext>(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
|
|
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
|
|
const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr().cvt32();
|
|
|
|
EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp.cvt64());
|
|
|
|
// TODO: This code assumes vaddr has been zext from 32-bits to 64-bits.
|
|
|
|
code.mov(tmp, vaddr.cvt32());
|
|
code.shr(tmp, static_cast<int>(page_bits));
|
|
|
|
code.mov(page, qword[r14 + tmp.cvt64() * sizeof(void*)]);
|
|
if (ctx.conf.page_table_pointer_mask_bits == 0) {
|
|
code.test(page, page);
|
|
} else {
|
|
code.and_(page, ~u32(0) << ctx.conf.page_table_pointer_mask_bits);
|
|
}
|
|
code.jz(abort, code.T_NEAR);
|
|
if (ctx.conf.absolute_offset_page_table) {
|
|
return page + vaddr;
|
|
}
|
|
code.mov(tmp, vaddr.cvt32());
|
|
code.and_(tmp, static_cast<u32>(page_mask));
|
|
return page + tmp.cvt64();
|
|
}
|
|
|
|
template<>
|
|
[[maybe_unused]] Xbyak::RegExp EmitVAddrLookup<A64EmitContext>(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
|
|
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
|
|
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
|
|
|
|
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
|
|
const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr();
|
|
|
|
EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp);
|
|
|
|
if (unused_top_bits == 0) {
|
|
code.mov(tmp, vaddr);
|
|
code.shr(tmp, int(page_bits));
|
|
} else if (ctx.conf.silently_mirror_page_table) {
|
|
if (valid_page_index_bits >= 32) {
|
|
if (code.HasHostFeature(HostFeature::BMI2)) {
|
|
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr();
|
|
code.mov(bit_count, unused_top_bits);
|
|
code.bzhi(tmp, vaddr, bit_count);
|
|
code.shr(tmp, int(page_bits));
|
|
ctx.reg_alloc.Release(bit_count);
|
|
} else {
|
|
code.mov(tmp, vaddr);
|
|
code.shl(tmp, int(unused_top_bits));
|
|
code.shr(tmp, int(unused_top_bits + page_bits));
|
|
}
|
|
} else {
|
|
code.mov(tmp, vaddr);
|
|
code.shr(tmp, int(page_bits));
|
|
code.and_(tmp, u32((1 << valid_page_index_bits) - 1));
|
|
}
|
|
} else {
|
|
ASSERT(valid_page_index_bits < 32);
|
|
code.mov(tmp, vaddr);
|
|
code.shr(tmp, int(page_bits));
|
|
code.test(tmp, u32(-(1 << valid_page_index_bits)));
|
|
code.jnz(abort, code.T_NEAR);
|
|
}
|
|
code.mov(page, qword[r14 + tmp * sizeof(void*)]);
|
|
if (ctx.conf.page_table_pointer_mask_bits == 0) {
|
|
code.test(page, page);
|
|
} else {
|
|
code.and_(page, ~u32(0) << ctx.conf.page_table_pointer_mask_bits);
|
|
}
|
|
code.jz(abort, code.T_NEAR);
|
|
if (ctx.conf.absolute_offset_page_table) {
|
|
return page + vaddr;
|
|
}
|
|
code.mov(tmp, vaddr);
|
|
code.and_(tmp, static_cast<u32>(page_mask));
|
|
return page + tmp;
|
|
}
|
|
|
|
template<typename EmitContext>
|
|
Xbyak::RegExp EmitFastmemVAddr(BlockOfCode& code, EmitContext& ctx, Xbyak::Label& abort, Xbyak::Reg64 vaddr, bool& require_abort_handling, std::optional<Xbyak::Reg64> tmp = std::nullopt);
|
|
|
|
template<>
|
|
[[maybe_unused]] Xbyak::RegExp EmitFastmemVAddr<A32EmitContext>(BlockOfCode&, A32EmitContext&, Xbyak::Label&, Xbyak::Reg64 vaddr, bool&, std::optional<Xbyak::Reg64>) {
|
|
return r13 + vaddr;
|
|
}
|
|
|
|
template<>
|
|
[[maybe_unused]] Xbyak::RegExp EmitFastmemVAddr<A64EmitContext>(BlockOfCode& code, A64EmitContext& ctx, Xbyak::Label& abort, Xbyak::Reg64 vaddr, bool& require_abort_handling, std::optional<Xbyak::Reg64> tmp) {
|
|
const size_t unused_top_bits = 64 - ctx.conf.fastmem_address_space_bits;
|
|
|
|
if (unused_top_bits == 0) {
|
|
return r13 + vaddr;
|
|
} else if (ctx.conf.silently_mirror_fastmem) {
|
|
if (!tmp) {
|
|
tmp = ctx.reg_alloc.ScratchGpr();
|
|
}
|
|
if (unused_top_bits < 32) {
|
|
code.mov(*tmp, vaddr);
|
|
code.shl(*tmp, int(unused_top_bits));
|
|
code.shr(*tmp, int(unused_top_bits));
|
|
} else if (unused_top_bits == 32) {
|
|
code.mov(tmp->cvt32(), vaddr.cvt32());
|
|
} else {
|
|
code.mov(tmp->cvt32(), vaddr.cvt32());
|
|
code.and_(*tmp, u32((1 << ctx.conf.fastmem_address_space_bits) - 1));
|
|
}
|
|
return r13 + *tmp;
|
|
} else {
|
|
if (ctx.conf.fastmem_address_space_bits < 32) {
|
|
code.test(vaddr, u32(-(1 << ctx.conf.fastmem_address_space_bits)));
|
|
code.jnz(abort, code.T_NEAR);
|
|
require_abort_handling = true;
|
|
} else {
|
|
// TODO: Consider having TEST as above but coalesce 64-bit constant in register allocator
|
|
if (!tmp) {
|
|
tmp = ctx.reg_alloc.ScratchGpr();
|
|
}
|
|
code.mov(*tmp, vaddr);
|
|
code.shr(*tmp, int(ctx.conf.fastmem_address_space_bits));
|
|
code.jnz(abort, code.T_NEAR);
|
|
require_abort_handling = true;
|
|
}
|
|
return r13 + vaddr;
|
|
}
|
|
}
|
|
|
|
template<std::size_t bitsize>
|
|
const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::RegExp& addr, bool ordered) {
|
|
if (ordered) {
|
|
if constexpr (bitsize != 128) {
|
|
code.xor_(Xbyak::Reg32{value_idx}, Xbyak::Reg32{value_idx});
|
|
} else {
|
|
code.xor_(eax, eax);
|
|
code.xor_(ebx, ebx);
|
|
code.xor_(ecx, ecx);
|
|
code.xor_(edx, edx);
|
|
}
|
|
|
|
const void* fastmem_location = code.getCurr();
|
|
switch (bitsize) {
|
|
case 8:
|
|
code.lock();
|
|
code.xadd(code.byte[addr], Xbyak::Reg32{value_idx}.cvt8());
|
|
break;
|
|
case 16:
|
|
code.lock();
|
|
code.xadd(word[addr], Xbyak::Reg16{value_idx});
|
|
break;
|
|
case 32:
|
|
code.lock();
|
|
code.xadd(dword[addr], Xbyak::Reg32{value_idx});
|
|
break;
|
|
case 64:
|
|
code.lock();
|
|
code.xadd(qword[addr], Xbyak::Reg64{value_idx});
|
|
break;
|
|
case 128:
|
|
code.lock();
|
|
code.cmpxchg16b(xword[addr]);
|
|
if (code.HasHostFeature(HostFeature::SSE41)) {
|
|
code.movq(Xbyak::Xmm{value_idx}, rax);
|
|
code.pinsrq(Xbyak::Xmm{value_idx}, rdx, 1);
|
|
} else {
|
|
code.movq(Xbyak::Xmm{value_idx}, rax);
|
|
code.movq(xmm0, rdx);
|
|
code.punpcklqdq(Xbyak::Xmm{value_idx}, xmm0);
|
|
}
|
|
break;
|
|
default:
|
|
ASSERT_FALSE("Invalid bitsize");
|
|
}
|
|
return fastmem_location;
|
|
}
|
|
|
|
const void* fastmem_location = code.getCurr();
|
|
switch (bitsize) {
|
|
case 8:
|
|
code.movzx(Xbyak::Reg32{value_idx}, code.byte[addr]);
|
|
break;
|
|
case 16:
|
|
code.movzx(Xbyak::Reg32{value_idx}, word[addr]);
|
|
break;
|
|
case 32:
|
|
code.mov(Xbyak::Reg32{value_idx}, dword[addr]);
|
|
break;
|
|
case 64:
|
|
code.mov(Xbyak::Reg64{value_idx}, qword[addr]);
|
|
break;
|
|
case 128:
|
|
code.movups(Xbyak::Xmm{value_idx}, xword[addr]);
|
|
break;
|
|
default:
|
|
ASSERT_FALSE("Invalid bitsize");
|
|
}
|
|
return fastmem_location;
|
|
}
|
|
|
|
template<std::size_t bitsize>
|
|
const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int value_idx, bool ordered) {
|
|
if (ordered) {
|
|
if constexpr (bitsize == 128) {
|
|
code.xor_(eax, eax);
|
|
code.xor_(edx, edx);
|
|
if (code.HasHostFeature(HostFeature::SSE41)) {
|
|
code.movq(rbx, Xbyak::Xmm{value_idx});
|
|
code.pextrq(rcx, Xbyak::Xmm{value_idx}, 1);
|
|
} else {
|
|
code.movaps(xmm0, Xbyak::Xmm{value_idx});
|
|
code.movq(rbx, xmm0);
|
|
code.punpckhqdq(xmm0, xmm0);
|
|
code.movq(rcx, xmm0);
|
|
}
|
|
}
|
|
|
|
const void* fastmem_location = code.getCurr();
|
|
switch (bitsize) {
|
|
case 8:
|
|
code.xchg(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8());
|
|
break;
|
|
case 16:
|
|
code.xchg(word[addr], Xbyak::Reg16{value_idx});
|
|
break;
|
|
case 32:
|
|
code.xchg(dword[addr], Xbyak::Reg32{value_idx});
|
|
break;
|
|
case 64:
|
|
code.xchg(qword[addr], Xbyak::Reg64{value_idx});
|
|
break;
|
|
case 128: {
|
|
Xbyak::Label loop;
|
|
code.L(loop);
|
|
code.lock();
|
|
code.cmpxchg16b(xword[addr]);
|
|
code.jnz(loop);
|
|
break;
|
|
}
|
|
default:
|
|
ASSERT_FALSE("Invalid bitsize");
|
|
}
|
|
return fastmem_location;
|
|
}
|
|
|
|
const void* fastmem_location = code.getCurr();
|
|
switch (bitsize) {
|
|
case 8:
|
|
code.mov(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8());
|
|
break;
|
|
case 16:
|
|
code.mov(word[addr], Xbyak::Reg16{value_idx});
|
|
break;
|
|
case 32:
|
|
code.mov(dword[addr], Xbyak::Reg32{value_idx});
|
|
break;
|
|
case 64:
|
|
code.mov(qword[addr], Xbyak::Reg64{value_idx});
|
|
break;
|
|
case 128:
|
|
code.movups(xword[addr], Xbyak::Xmm{value_idx});
|
|
break;
|
|
default:
|
|
ASSERT_FALSE("Invalid bitsize");
|
|
}
|
|
return fastmem_location;
|
|
}
|
|
|
|
template<typename UserConfig>
|
|
void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 pointer, Xbyak::Reg32 tmp) {
|
|
if (conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) {
|
|
return;
|
|
}
|
|
|
|
code.mov(pointer, Common::BitCast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
|
|
EmitSpinLockLock(code, pointer, tmp);
|
|
}
|
|
|
|
template<typename UserConfig>
|
|
void EmitExclusiveUnlock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 pointer, Xbyak::Reg32 tmp) {
|
|
if (conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) {
|
|
return;
|
|
}
|
|
|
|
code.mov(pointer, Common::BitCast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
|
|
EmitSpinLockUnlock(code, pointer, tmp);
|
|
}
|
|
|
|
template<typename UserConfig>
|
|
void EmitExclusiveTestAndClear(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 vaddr, Xbyak::Reg64 pointer, Xbyak::Reg64 tmp) {
|
|
if (conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) {
|
|
return;
|
|
}
|
|
|
|
code.mov(tmp, 0xDEAD'DEAD'DEAD'DEAD);
|
|
const size_t processor_count = GetExclusiveMonitorProcessorCount(conf.global_monitor);
|
|
for (size_t processor_index = 0; processor_index < processor_count; processor_index++) {
|
|
if (processor_index == conf.processor_id) {
|
|
continue;
|
|
}
|
|
Xbyak::Label ok;
|
|
code.mov(pointer, Common::BitCast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, processor_index)));
|
|
code.cmp(qword[pointer], vaddr);
|
|
code.jne(ok);
|
|
code.mov(qword[pointer], tmp);
|
|
code.L(ok);
|
|
}
|
|
}
|
|
|
|
inline bool IsOrdered(IR::AccType acctype) {
|
|
return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
} // namespace Dynarmic::Backend::X64
|