diff --git a/src/core/core.cpp b/src/core/core.cpp index 36fb8cc56..005003ec8 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -821,7 +821,6 @@ void System::serialize(Archive& ar, const unsigned int file_version) { timing->UnlockEventQueue(); memory->SetDSP(*dsp_core); cheat_engine.Connect(cheats_pid); - gpu->Sync(); // Re-register gpu callback, because gsp service changed after service_manager got // serialized diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e59b6ad7e..4de7ed4fc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(video_core STATIC rasterizer_interface.h renderer_base.cpp renderer_base.h + pica/dirty_regs.h pica/geometry_pipeline.cpp pica/geometry_pipeline.h pica/pica_core.cpp diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ecfae2932..11fb9be8f 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -290,10 +290,6 @@ void GPU::WriteReg(VAddr addr, u32 data) { } } -void GPU::Sync() { - impl->renderer->Sync(); -} - VideoCore::RendererBase& GPU::Renderer() { return *impl->renderer; } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2ca608b2b..ee109a254 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -75,9 +75,6 @@ public: /// Writes the provided value to the GPU virtual address. void WriteReg(VAddr addr, u32 data); - /// Synchronizes fixed function renderer state with PICA registers. - void Sync(); - /// Returns a mutable reference to the renderer. [[nodiscard]] VideoCore::RendererBase& Renderer(); diff --git a/src/video_core/pica/dirty_regs.h b/src/video_core/pica/dirty_regs.h new file mode 100644 index 000000000..cf0410e92 --- /dev/null +++ b/src/video_core/pica/dirty_regs.h @@ -0,0 +1,110 @@ +// Copyright Citra Emulator Project / Azahar Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/pica/regs_internal.h" + +namespace Pica { + +#define M_R(base, num_bits) (((1ULL << num_bits) - 1) << (PICA_REG_INDEX(base) & 0x3f)) +#define M(base) M_R(base, 1) + +union DirtyRegs { + void Set(u32 reg_id) { + qwords[reg_id >> 6] |= 1ULL << (reg_id & 0x3f); + } + + void Reset() { + qwords.fill(0ULL); + } + + bool CheckClipping() const { + // Checks if GPUREG_FRAGOP_CLIP or GPUREG_FRAGOP_CLIP_DATAi are dirty + static constexpr u64 ClipMask = M_R(rasterizer.clip_enable, 5); + return rasterizer & ClipMask; + } + + bool CheckDepth() const { + // Checks if GPUREG_DEPTHMAP_SCALE or GPUREG_DEPTHMAP_OFFSET are dirty + static constexpr u64 DepthMask = + M(rasterizer.viewport_depth_range) | M(rasterizer.viewport_depth_near_plane); + return rasterizer & DepthMask; + } + + bool CheckLight(u32 index) const { + // Checks if any GPUREG_LIGHTi_* is dirty + return lights[index]; + } + + bool CheckFogColor() const { + // Checks if GPUREG_FOG_COLOR is dirty + static constexpr u64 FogColorMask = M(texturing.fog_color); + return texenv & FogColorMask; + } + + bool CheckTexUnits() const { + // Checks if GPUREG_TEXUNITi_BORDER_COLOR or GPUREG_TEXUNITi_LOD are dirty + static constexpr u64 TexUnitMask = + M(texturing.texture0.border_color) | M(texturing.texture0.lod) | + M(texturing.texture1.border_color) | M(texturing.texture1.lod) | + M(texturing.texture2.border_color) | M(texturing.texture2.lod); + return tex_units & TexUnitMask; + } + + bool CheckProctex() const { + // Checks if any GPUREG_TEXUNIT3_PROCTEXi reg is dirty + static constexpr u64 ProctexMask = M_R(texturing.proctex, 6); + return tex_units & ProctexMask; + } + + bool CheckTexEnv() const { + // Checks if GPUREG_TEXENV_BUFFER_COLOR or any GPUREG_TEXENVi_COLOR reg is dirty + static constexpr u64 TexEnvMask = + M(texturing.tev_combiner_buffer_color) | M(texturing.tev_stage0.const_color) | + M(texturing.tev_stage1.const_color) | M(texturing.tev_stage2.const_color) | + M(texturing.tev_stage3.const_color) | M(texturing.tev_stage4.const_color) | + M(texturing.tev_stage5.const_color); + return texenv & TexEnvMask; + } + + bool CheckLightingAmbient() const { + // Checks if GPUREG_LIGHTING_AMBIENT is dirty + static constexpr u64 LightingMask = M(lighting.global_ambient); + return light_lut & LightingMask; + } + + bool CheckBlend() const { + // Checks if GPUREG_BLEND_COLOR or GPUREG_FRAGOP_ALPHA_TEST are dirty + static constexpr u64 BlendMask = + M(framebuffer.output_merger.blend_const) | M(framebuffer.output_merger.alpha_test); + return framebuffer & BlendMask; + } + + bool CheckShadow() const { + // Checks if GPUREG_FRAGOP_SHADOW or GPUREG_TEXUNIT0_SHADOW are dirty + static constexpr u64 ShadowMask1 = M(framebuffer.shadow); + static constexpr u64 ShadowMask2 = M(texturing.shadow); + return (framebuffer & ShadowMask1) || (tex_units & ShadowMask2); + } + + struct { + u64 misc; + u64 rasterizer; + u64 tex_units; + u64 texenv; + u64 framebuffer; + std::array lights; + u64 light_lut; + u128 geo_pipeline; + u128 shader; + }; + std::array qwords; +}; +static_assert(sizeof(DirtyRegs) == 12 * sizeof(u64)); + +#undef M +#undef M_R + +} // namespace Pica diff --git a/src/video_core/pica/pica_core.cpp b/src/video_core/pica/pica_core.cpp index e610b42fb..f9f2f4bfb 100644 --- a/src/video_core/pica/pica_core.cpp +++ b/src/video_core/pica/pica_core.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -149,7 +149,6 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask) { // Track events. if (debug_context) { debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, &id); - SCOPE_EXIT({ debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, &id); }); } switch (id) { @@ -363,7 +362,8 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask) { auto& lut_config = regs.internal.lighting.lut_config; ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); - lighting.luts[lut_config.type][lut_config.index].raw = value; + const u32 prev = std::exchange(lighting.luts[lut_config.type][lut_config.index].raw, value); + lighting.lut_dirty |= (prev != value) << lut_config.type; lut_config.index.Assign(lut_config.index + 1); break; } @@ -376,7 +376,9 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(texturing.fog_lut_data[5]): case PICA_REG_INDEX(texturing.fog_lut_data[6]): case PICA_REG_INDEX(texturing.fog_lut_data[7]): { - fog.lut[regs.internal.texturing.fog_lut_offset % 128].raw = value; + const u32 prev = + std::exchange(fog.lut[regs.internal.texturing.fog_lut_offset % 128].raw, value); + fog.lut_dirty |= prev != value; regs.internal.texturing.fog_lut_offset.Assign(regs.internal.texturing.fog_lut_offset + 1); break; } @@ -390,22 +392,28 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(texturing.proctex_lut_data[6]): case PICA_REG_INDEX(texturing.proctex_lut_data[7]): { auto& index = regs.internal.texturing.proctex_lut_config.index; + const auto lut_table = regs.internal.texturing.proctex_lut_config.ref_table.Value(); - switch (regs.internal.texturing.proctex_lut_config.ref_table.Value()) { + const auto sync_lut = [&](auto& proctex_table) { + const u32 prev = std::exchange(proctex_table[index % proctex_table.size()].raw, value); + proctex.table_dirty |= (prev != value) << u32(lut_table); + }; + + switch (lut_table) { case TexturingRegs::ProcTexLutTable::Noise: - proctex.noise_table[index % proctex.noise_table.size()].raw = value; + sync_lut(proctex.noise_table); break; case TexturingRegs::ProcTexLutTable::ColorMap: - proctex.color_map_table[index % proctex.color_map_table.size()].raw = value; + sync_lut(proctex.color_map_table); break; case TexturingRegs::ProcTexLutTable::AlphaMap: - proctex.alpha_map_table[index % proctex.alpha_map_table.size()].raw = value; + sync_lut(proctex.alpha_map_table); break; case TexturingRegs::ProcTexLutTable::Color: - proctex.color_table[index % proctex.color_table.size()].raw = value; + sync_lut(proctex.color_table); break; case TexturingRegs::ProcTexLutTable::ColorDiff: - proctex.color_diff_table[index % proctex.color_diff_table.size()].raw = value; + sync_lut(proctex.color_diff_table); break; } index.Assign(index + 1); @@ -415,8 +423,11 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask) { break; } - // Notify the rasterizer an internal register was updated. - rasterizer->NotifyPicaRegisterChanged(id); + dirty_regs.Set(id); + + if (debug_context) { + debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, &id); + } } void PicaCore::SubmitImmediate(u32 value) { @@ -460,8 +471,6 @@ void PicaCore::DrawImmediate() { if (debug_context) { debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, std::addressof(immediate.input_vertex)); - SCOPE_EXIT( - { debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); }); } ShaderUnit shader_unit; @@ -486,6 +495,10 @@ void PicaCore::DrawImmediate() { // Flush the immediate triangle. rasterizer->DrawTriangles(); immediate.current_attribute = 0; + + if (debug_context) { + debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } } void PicaCore::DrawArrays(bool is_indexed) { @@ -494,8 +507,6 @@ void PicaCore::DrawArrays(bool is_indexed) { // Track vertex in the debug recorder. if (debug_context) { debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); - SCOPE_EXIT( - { debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); }); } const bool accelerate_draw = [this] { @@ -530,6 +541,10 @@ void PicaCore::DrawArrays(bool is_indexed) { // Draw emitted triangles. rasterizer->DrawTriangles(); + + if (debug_context) { + debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } } void PicaCore::LoadVertices(bool is_indexed) { diff --git a/src/video_core/pica/pica_core.h b/src/video_core/pica/pica_core.h index 579bb2109..ba71c43d9 100644 --- a/src/video_core/pica/pica_core.h +++ b/src/video_core/pica/pica_core.h @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "core/hle/service/gsp/gsp_interrupt.h" +#include "video_core/pica/dirty_regs.h" #include "video_core/pica/geometry_pipeline.h" #include "video_core/pica/packed_attribute.h" #include "video_core/pica/primitive_assembly.h" @@ -118,6 +119,8 @@ public: }; struct ProcTex { + static constexpr u8 TableAllDirty = 0xFF; + union ValueEntry { u32 raw; @@ -168,6 +171,14 @@ public: std::array alpha_map_table; std::array color_table; std::array color_diff_table; + union { + u8 table_dirty = TableAllDirty; + BitField<0, 1, u8> noise_lut_dirty; + BitField<2, 1, u8> color_map_dirty; + BitField<3, 1, u8> alpha_map_dirty; + BitField<4, 1, u8> lut_dirty; + BitField<5, 1, u8> diff_lut_dirty; + }; private: friend class boost::serialization::access; @@ -178,6 +189,8 @@ public: }; struct Lighting { + static constexpr u32 LutAllDirty = 0xFFFFFF; + union LutEntry { // Used for raw access u32 raw; @@ -205,6 +218,7 @@ public: }; std::array, 24> luts; + u32 lut_dirty = LutAllDirty; private: friend class boost::serialization::access; @@ -232,6 +246,7 @@ public: }; std::array lut; + bool lut_dirty = true; private: friend class boost::serialization::access; @@ -243,7 +258,7 @@ public: RegsLcd regs_lcd{}; Regs regs{}; - // TODO: Move these to a separate shader scheduler class + DirtyRegs dirty_regs{}; GeometryShaderUnit gs_unit; ShaderSetup vs_setup; ShaderSetup gs_setup; diff --git a/src/video_core/pica/shader_setup.cpp b/src/video_core/pica/shader_setup.cpp index 9eca8f372..70808b02f 100644 --- a/src/video_core/pica/shader_setup.cpp +++ b/src/video_core/pica/shader_setup.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -18,13 +18,15 @@ ShaderSetup::~ShaderSetup() = default; void ShaderSetup::WriteUniformBoolReg(u32 value) { const auto bits = BitSet32(value); for (u32 i = 0; i < uniforms.b.size(); ++i) { - uniforms.b[i] = bits[i]; + const bool prev = std::exchange(uniforms.b[i], bits[i]); + uniforms_dirty |= prev != bits[i]; } } void ShaderSetup::WriteUniformIntReg(u32 index, const Common::Vec4 values) { ASSERT(index < uniforms.i.size()); - uniforms.i[index] = values; + const auto prev = std::exchange(uniforms.i[index], values); + uniforms_dirty |= prev != values; } std::optional ShaderSetup::WriteUniformFloatReg(ShaderRegs& config, u32 value) { @@ -41,7 +43,8 @@ std::optional ShaderSetup::WriteUniformFloatReg(ShaderRegs& config, u32 val } const u32 index = uniform_setup.index.Value(); - uniforms.f[index] = uniform; + const auto prev = std::exchange(uniforms.f[index], uniform); + uniforms_dirty |= prev != uniform; uniform_setup.index.Assign(index + 1); return index; } diff --git a/src/video_core/pica/shader_setup.h b/src/video_core/pica/shader_setup.h index 196d528bf..ad4981b95 100644 --- a/src/video_core/pica/shader_setup.h +++ b/src/video_core/pica/shader_setup.h @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -80,6 +80,7 @@ public: SwizzleData swizzle_data{}; u32 entry_point{}; const void* cached_shader{}; + bool uniforms_dirty = true; private: bool program_code_hash_dirty{true}; diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 60a336f72..c6ebfdb70 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -54,9 +54,7 @@ RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::OutputVertex& } RasterizerAccelerated::RasterizerAccelerated(Memory::MemorySystem& memory_, Pica::PicaCore& pica_) - : memory{memory_}, pica{pica_}, regs{pica.regs.internal} { - fs_uniform_block_data.lighting_lut_dirty.fill(true); -} + : memory{memory_}, pica{pica_}, regs{pica.regs.internal} {} /** * This is a helper function to resolve an issue when interpolating opposite quaternions. See below @@ -130,734 +128,132 @@ RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray return {vertex_min, vertex_max, vs_input_size}; } -void RasterizerAccelerated::SyncEntireState() { - // Sync renderer-specific fixed-function state - SyncFixedState(); +void RasterizerAccelerated::SyncDrawUniforms() { + auto& dirty = pica.dirty_regs; - // Sync uniforms - SyncClipPlane(); - SyncDepthScale(); - SyncDepthOffset(); - SyncAlphaTest(); - SyncCombinerColor(); - auto& tev_stages = regs.texturing.GetTevStages(); - for (std::size_t index = 0; index < tev_stages.size(); ++index) { - SyncTevConstColor(index, tev_stages[index]); + // The register that contains the flip bit also contains the framebuffer dimentions + // that we don't depend on. So avoid the dirty table and check manually + const bool is_flipped = regs.framebuffer.framebuffer.IsFlipped(); + const bool prev_flipped = std::exchange(vs_data.flip_viewport, is_flipped); + vs_data_dirty = is_flipped != prev_flipped; + + // Sync clip plane uniforms + if (dirty.CheckClipping()) { + const auto raw_clip_coef = regs.rasterizer.GetClipCoef(); + vs_data.enable_clip1 = regs.rasterizer.clip_enable != 0; + vs_data.clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), + raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; + vs_data_dirty = true; } - SyncGlobalAmbient(); - for (u32 light_index = 0; light_index < 8; light_index++) { - SyncLightSpecular0(light_index); - SyncLightSpecular1(light_index); - SyncLightDiffuse(light_index); - SyncLightAmbient(light_index); - SyncLightPosition(light_index); - SyncLightDistanceAttenuationBias(light_index); - SyncLightDistanceAttenuationScale(light_index); + // Sync depth testing uniforms + if (dirty.CheckDepth()) { + fs_data.depth_scale = f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32(); + fs_data.depth_offset = f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32(); + fs_data_dirty = true; } - SyncFogColor(); - SyncProcTexNoise(); - SyncProcTexBias(); - SyncShadowBias(); - SyncShadowTextureBias(); - - for (u32 tex_index = 0; tex_index < 3; tex_index++) { - SyncTextureLodBias(tex_index); + // Sync alpha testing and blending uniforms + if (dirty.CheckBlend()) { + fs_data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; + fs_data.blend_color = ColorRGBA8(regs.framebuffer.output_merger.blend_const.raw); + fs_data_dirty = true; } -} -void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { - switch (id) { - // Depth modifiers - case PICA_REG_INDEX(rasterizer.viewport_depth_range): - SyncDepthScale(); - break; - case PICA_REG_INDEX(rasterizer.viewport_depth_near_plane): - SyncDepthOffset(); - break; - - // Depth buffering - case PICA_REG_INDEX(rasterizer.depthmap_enable): - shader_dirty = true; - break; - - // Shadow texture - case PICA_REG_INDEX(texturing.shadow): - SyncShadowTextureBias(); - break; - - // Fog state - case PICA_REG_INDEX(texturing.fog_color): - SyncFogColor(); - break; - case PICA_REG_INDEX(texturing.fog_lut_data[0]): - case PICA_REG_INDEX(texturing.fog_lut_data[1]): - case PICA_REG_INDEX(texturing.fog_lut_data[2]): - case PICA_REG_INDEX(texturing.fog_lut_data[3]): - case PICA_REG_INDEX(texturing.fog_lut_data[4]): - case PICA_REG_INDEX(texturing.fog_lut_data[5]): - case PICA_REG_INDEX(texturing.fog_lut_data[6]): - case PICA_REG_INDEX(texturing.fog_lut_data[7]): - fs_uniform_block_data.fog_lut_dirty = true; - break; - - // ProcTex state - case PICA_REG_INDEX(texturing.proctex): - case PICA_REG_INDEX(texturing.proctex_lut): - case PICA_REG_INDEX(texturing.proctex_lut_offset): - SyncProcTexBias(); - shader_dirty = true; - break; - - case PICA_REG_INDEX(texturing.proctex_noise_u): - case PICA_REG_INDEX(texturing.proctex_noise_v): - case PICA_REG_INDEX(texturing.proctex_noise_frequency): - SyncProcTexNoise(); - break; - - case PICA_REG_INDEX(texturing.proctex_lut_data[0]): - case PICA_REG_INDEX(texturing.proctex_lut_data[1]): - case PICA_REG_INDEX(texturing.proctex_lut_data[2]): - case PICA_REG_INDEX(texturing.proctex_lut_data[3]): - case PICA_REG_INDEX(texturing.proctex_lut_data[4]): - case PICA_REG_INDEX(texturing.proctex_lut_data[5]): - case PICA_REG_INDEX(texturing.proctex_lut_data[6]): - case PICA_REG_INDEX(texturing.proctex_lut_data[7]): - using Pica::TexturingRegs; - switch (regs.texturing.proctex_lut_config.ref_table.Value()) { - case TexturingRegs::ProcTexLutTable::Noise: - fs_uniform_block_data.proctex_noise_lut_dirty = true; - break; - case TexturingRegs::ProcTexLutTable::ColorMap: - fs_uniform_block_data.proctex_color_map_dirty = true; - break; - case TexturingRegs::ProcTexLutTable::AlphaMap: - fs_uniform_block_data.proctex_alpha_map_dirty = true; - break; - case TexturingRegs::ProcTexLutTable::Color: - fs_uniform_block_data.proctex_lut_dirty = true; - break; - case TexturingRegs::ProcTexLutTable::ColorDiff: - fs_uniform_block_data.proctex_diff_lut_dirty = true; - break; + // Sync texture unit uniforms + if (dirty.CheckTexUnits()) { + const auto pica_textures = regs.texturing.GetTextures(); + for (u32 tex_index = 0; tex_index < 3; tex_index++) { + const auto& config = pica_textures[tex_index].config; + fs_data.tex_lod_bias[tex_index] = config.lod.bias / 256.0f; + fs_data.tex_border_color[tex_index] = ColorRGBA8(config.border_color.raw); } - break; - - // Fragment operation mode - case PICA_REG_INDEX(framebuffer.output_merger.fragment_operation_mode): - shader_dirty = true; - break; - - // Alpha test - case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): - SyncAlphaTest(); - shader_dirty = true; - break; - - case PICA_REG_INDEX(framebuffer.shadow): - SyncShadowBias(); - break; - - // Scissor test - case PICA_REG_INDEX(rasterizer.scissor_test.mode): - shader_dirty = true; - break; - - case PICA_REG_INDEX(texturing.main_config): - shader_dirty = true; - break; - - // Texture 0 type - case PICA_REG_INDEX(texturing.texture0.type): - shader_dirty = true; - break; - - // TEV stages - // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input) - case PICA_REG_INDEX(texturing.tev_stage0.color_source1): - case PICA_REG_INDEX(texturing.tev_stage0.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage0.color_op): - case PICA_REG_INDEX(texturing.tev_stage0.color_scale): - case PICA_REG_INDEX(texturing.tev_stage1.color_source1): - case PICA_REG_INDEX(texturing.tev_stage1.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage1.color_op): - case PICA_REG_INDEX(texturing.tev_stage1.color_scale): - case PICA_REG_INDEX(texturing.tev_stage2.color_source1): - case PICA_REG_INDEX(texturing.tev_stage2.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage2.color_op): - case PICA_REG_INDEX(texturing.tev_stage2.color_scale): - case PICA_REG_INDEX(texturing.tev_stage3.color_source1): - case PICA_REG_INDEX(texturing.tev_stage3.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage3.color_op): - case PICA_REG_INDEX(texturing.tev_stage3.color_scale): - case PICA_REG_INDEX(texturing.tev_stage4.color_source1): - case PICA_REG_INDEX(texturing.tev_stage4.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage4.color_op): - case PICA_REG_INDEX(texturing.tev_stage4.color_scale): - case PICA_REG_INDEX(texturing.tev_stage5.color_source1): - case PICA_REG_INDEX(texturing.tev_stage5.color_modifier1): - case PICA_REG_INDEX(texturing.tev_stage5.color_op): - case PICA_REG_INDEX(texturing.tev_stage5.color_scale): - case PICA_REG_INDEX(texturing.tev_combiner_buffer_input): - shader_dirty = true; - break; - case PICA_REG_INDEX(texturing.tev_stage0.const_r): - SyncTevConstColor(0, regs.texturing.tev_stage0); - break; - case PICA_REG_INDEX(texturing.tev_stage1.const_r): - SyncTevConstColor(1, regs.texturing.tev_stage1); - break; - case PICA_REG_INDEX(texturing.tev_stage2.const_r): - SyncTevConstColor(2, regs.texturing.tev_stage2); - break; - case PICA_REG_INDEX(texturing.tev_stage3.const_r): - SyncTevConstColor(3, regs.texturing.tev_stage3); - break; - case PICA_REG_INDEX(texturing.tev_stage4.const_r): - SyncTevConstColor(4, regs.texturing.tev_stage4); - break; - case PICA_REG_INDEX(texturing.tev_stage5.const_r): - SyncTevConstColor(5, regs.texturing.tev_stage5); - break; - - // TEV combiner buffer color - case PICA_REG_INDEX(texturing.tev_combiner_buffer_color): - SyncCombinerColor(); - break; - - // Fragment lighting switches - case PICA_REG_INDEX(lighting.disable): - case PICA_REG_INDEX(lighting.max_light_index): - case PICA_REG_INDEX(lighting.config0): - case PICA_REG_INDEX(lighting.config1): - case PICA_REG_INDEX(lighting.abs_lut_input): - case PICA_REG_INDEX(lighting.lut_input): - case PICA_REG_INDEX(lighting.lut_scale): - case PICA_REG_INDEX(lighting.light_enable): - break; - - // Fragment lighting specular 0 color - case PICA_REG_INDEX(lighting.light[0].specular_0): - SyncLightSpecular0(0); - break; - case PICA_REG_INDEX(lighting.light[1].specular_0): - SyncLightSpecular0(1); - break; - case PICA_REG_INDEX(lighting.light[2].specular_0): - SyncLightSpecular0(2); - break; - case PICA_REG_INDEX(lighting.light[3].specular_0): - SyncLightSpecular0(3); - break; - case PICA_REG_INDEX(lighting.light[4].specular_0): - SyncLightSpecular0(4); - break; - case PICA_REG_INDEX(lighting.light[5].specular_0): - SyncLightSpecular0(5); - break; - case PICA_REG_INDEX(lighting.light[6].specular_0): - SyncLightSpecular0(6); - break; - case PICA_REG_INDEX(lighting.light[7].specular_0): - SyncLightSpecular0(7); - break; - - // Fragment lighting specular 1 color - case PICA_REG_INDEX(lighting.light[0].specular_1): - SyncLightSpecular1(0); - break; - case PICA_REG_INDEX(lighting.light[1].specular_1): - SyncLightSpecular1(1); - break; - case PICA_REG_INDEX(lighting.light[2].specular_1): - SyncLightSpecular1(2); - break; - case PICA_REG_INDEX(lighting.light[3].specular_1): - SyncLightSpecular1(3); - break; - case PICA_REG_INDEX(lighting.light[4].specular_1): - SyncLightSpecular1(4); - break; - case PICA_REG_INDEX(lighting.light[5].specular_1): - SyncLightSpecular1(5); - break; - case PICA_REG_INDEX(lighting.light[6].specular_1): - SyncLightSpecular1(6); - break; - case PICA_REG_INDEX(lighting.light[7].specular_1): - SyncLightSpecular1(7); - break; - - // Fragment lighting diffuse color - case PICA_REG_INDEX(lighting.light[0].diffuse): - SyncLightDiffuse(0); - break; - case PICA_REG_INDEX(lighting.light[1].diffuse): - SyncLightDiffuse(1); - break; - case PICA_REG_INDEX(lighting.light[2].diffuse): - SyncLightDiffuse(2); - break; - case PICA_REG_INDEX(lighting.light[3].diffuse): - SyncLightDiffuse(3); - break; - case PICA_REG_INDEX(lighting.light[4].diffuse): - SyncLightDiffuse(4); - break; - case PICA_REG_INDEX(lighting.light[5].diffuse): - SyncLightDiffuse(5); - break; - case PICA_REG_INDEX(lighting.light[6].diffuse): - SyncLightDiffuse(6); - break; - case PICA_REG_INDEX(lighting.light[7].diffuse): - SyncLightDiffuse(7); - break; - - // Fragment lighting ambient color - case PICA_REG_INDEX(lighting.light[0].ambient): - SyncLightAmbient(0); - break; - case PICA_REG_INDEX(lighting.light[1].ambient): - SyncLightAmbient(1); - break; - case PICA_REG_INDEX(lighting.light[2].ambient): - SyncLightAmbient(2); - break; - case PICA_REG_INDEX(lighting.light[3].ambient): - SyncLightAmbient(3); - break; - case PICA_REG_INDEX(lighting.light[4].ambient): - SyncLightAmbient(4); - break; - case PICA_REG_INDEX(lighting.light[5].ambient): - SyncLightAmbient(5); - break; - case PICA_REG_INDEX(lighting.light[6].ambient): - SyncLightAmbient(6); - break; - case PICA_REG_INDEX(lighting.light[7].ambient): - SyncLightAmbient(7); - break; - - // Fragment lighting position - case PICA_REG_INDEX(lighting.light[0].x): - case PICA_REG_INDEX(lighting.light[0].z): - SyncLightPosition(0); - break; - case PICA_REG_INDEX(lighting.light[1].x): - case PICA_REG_INDEX(lighting.light[1].z): - SyncLightPosition(1); - break; - case PICA_REG_INDEX(lighting.light[2].x): - case PICA_REG_INDEX(lighting.light[2].z): - SyncLightPosition(2); - break; - case PICA_REG_INDEX(lighting.light[3].x): - case PICA_REG_INDEX(lighting.light[3].z): - SyncLightPosition(3); - break; - case PICA_REG_INDEX(lighting.light[4].x): - case PICA_REG_INDEX(lighting.light[4].z): - SyncLightPosition(4); - break; - case PICA_REG_INDEX(lighting.light[5].x): - case PICA_REG_INDEX(lighting.light[5].z): - SyncLightPosition(5); - break; - case PICA_REG_INDEX(lighting.light[6].x): - case PICA_REG_INDEX(lighting.light[6].z): - SyncLightPosition(6); - break; - case PICA_REG_INDEX(lighting.light[7].x): - case PICA_REG_INDEX(lighting.light[7].z): - SyncLightPosition(7); - break; - - // Fragment spot lighting direction - case PICA_REG_INDEX(lighting.light[0].spot_x): - case PICA_REG_INDEX(lighting.light[0].spot_z): - SyncLightSpotDirection(0); - break; - case PICA_REG_INDEX(lighting.light[1].spot_x): - case PICA_REG_INDEX(lighting.light[1].spot_z): - SyncLightSpotDirection(1); - break; - case PICA_REG_INDEX(lighting.light[2].spot_x): - case PICA_REG_INDEX(lighting.light[2].spot_z): - SyncLightSpotDirection(2); - break; - case PICA_REG_INDEX(lighting.light[3].spot_x): - case PICA_REG_INDEX(lighting.light[3].spot_z): - SyncLightSpotDirection(3); - break; - case PICA_REG_INDEX(lighting.light[4].spot_x): - case PICA_REG_INDEX(lighting.light[4].spot_z): - SyncLightSpotDirection(4); - break; - case PICA_REG_INDEX(lighting.light[5].spot_x): - case PICA_REG_INDEX(lighting.light[5].spot_z): - SyncLightSpotDirection(5); - break; - case PICA_REG_INDEX(lighting.light[6].spot_x): - case PICA_REG_INDEX(lighting.light[6].spot_z): - SyncLightSpotDirection(6); - break; - case PICA_REG_INDEX(lighting.light[7].spot_x): - case PICA_REG_INDEX(lighting.light[7].spot_z): - SyncLightSpotDirection(7); - break; - - // Fragment lighting light source config - case PICA_REG_INDEX(lighting.light[0].config): - case PICA_REG_INDEX(lighting.light[1].config): - case PICA_REG_INDEX(lighting.light[2].config): - case PICA_REG_INDEX(lighting.light[3].config): - case PICA_REG_INDEX(lighting.light[4].config): - case PICA_REG_INDEX(lighting.light[5].config): - case PICA_REG_INDEX(lighting.light[6].config): - case PICA_REG_INDEX(lighting.light[7].config): - shader_dirty = true; - break; - - // Fragment lighting distance attenuation bias - case PICA_REG_INDEX(lighting.light[0].dist_atten_bias): - SyncLightDistanceAttenuationBias(0); - break; - case PICA_REG_INDEX(lighting.light[1].dist_atten_bias): - SyncLightDistanceAttenuationBias(1); - break; - case PICA_REG_INDEX(lighting.light[2].dist_atten_bias): - SyncLightDistanceAttenuationBias(2); - break; - case PICA_REG_INDEX(lighting.light[3].dist_atten_bias): - SyncLightDistanceAttenuationBias(3); - break; - case PICA_REG_INDEX(lighting.light[4].dist_atten_bias): - SyncLightDistanceAttenuationBias(4); - break; - case PICA_REG_INDEX(lighting.light[5].dist_atten_bias): - SyncLightDistanceAttenuationBias(5); - break; - case PICA_REG_INDEX(lighting.light[6].dist_atten_bias): - SyncLightDistanceAttenuationBias(6); - break; - case PICA_REG_INDEX(lighting.light[7].dist_atten_bias): - SyncLightDistanceAttenuationBias(7); - break; - - // Fragment lighting distance attenuation scale - case PICA_REG_INDEX(lighting.light[0].dist_atten_scale): - SyncLightDistanceAttenuationScale(0); - break; - case PICA_REG_INDEX(lighting.light[1].dist_atten_scale): - SyncLightDistanceAttenuationScale(1); - break; - case PICA_REG_INDEX(lighting.light[2].dist_atten_scale): - SyncLightDistanceAttenuationScale(2); - break; - case PICA_REG_INDEX(lighting.light[3].dist_atten_scale): - SyncLightDistanceAttenuationScale(3); - break; - case PICA_REG_INDEX(lighting.light[4].dist_atten_scale): - SyncLightDistanceAttenuationScale(4); - break; - case PICA_REG_INDEX(lighting.light[5].dist_atten_scale): - SyncLightDistanceAttenuationScale(5); - break; - case PICA_REG_INDEX(lighting.light[6].dist_atten_scale): - SyncLightDistanceAttenuationScale(6); - break; - case PICA_REG_INDEX(lighting.light[7].dist_atten_scale): - SyncLightDistanceAttenuationScale(7); - break; - - // Fragment lighting global ambient color (emission + ambient * ambient) - case PICA_REG_INDEX(lighting.global_ambient): - SyncGlobalAmbient(); - break; - - // Fragment lighting lookup tables - case PICA_REG_INDEX(lighting.lut_data[0]): - case PICA_REG_INDEX(lighting.lut_data[1]): - case PICA_REG_INDEX(lighting.lut_data[2]): - case PICA_REG_INDEX(lighting.lut_data[3]): - case PICA_REG_INDEX(lighting.lut_data[4]): - case PICA_REG_INDEX(lighting.lut_data[5]): - case PICA_REG_INDEX(lighting.lut_data[6]): - case PICA_REG_INDEX(lighting.lut_data[7]): { - const auto& lut_config = regs.lighting.lut_config; - fs_uniform_block_data.lighting_lut_dirty[lut_config.type] = true; - fs_uniform_block_data.lighting_lut_dirty_any = true; - break; + fs_data_dirty = true; } - // Texture LOD biases - case PICA_REG_INDEX(texturing.texture0.lod.bias): - SyncTextureLodBias(0); - break; - case PICA_REG_INDEX(texturing.texture1.lod.bias): - SyncTextureLodBias(1); - break; - case PICA_REG_INDEX(texturing.texture2.lod.bias): - SyncTextureLodBias(2); - break; - - // Texture borders - case PICA_REG_INDEX(texturing.texture0.border_color): - SyncTextureBorderColor(0); - break; - case PICA_REG_INDEX(texturing.texture1.border_color): - SyncTextureBorderColor(1); - break; - case PICA_REG_INDEX(texturing.texture2.border_color): - SyncTextureBorderColor(2); - break; - - // Clipping plane - case PICA_REG_INDEX(rasterizer.clip_enable): - case PICA_REG_INDEX(rasterizer.clip_coef[0]): - case PICA_REG_INDEX(rasterizer.clip_coef[1]): - case PICA_REG_INDEX(rasterizer.clip_coef[2]): - case PICA_REG_INDEX(rasterizer.clip_coef[3]): - SyncClipPlane(); - break; + // Sync texenv uniforms + if (dirty.CheckTexEnv()) { + const auto tev_stages = regs.texturing.GetTevStages(); + for (std::size_t index = 0; index < tev_stages.size(); ++index) { + fs_data.const_color[index] = ColorRGBA8(tev_stages[index].const_color); + } + fs_data.tev_combiner_buffer_color = + ColorRGBA8(regs.texturing.tev_combiner_buffer_color.raw); + fs_data_dirty = true; } - // Forward registers that map to fixed function API features to the video backend - NotifyFixedFunctionPicaRegisterChanged(id); -} - -void RasterizerAccelerated::SyncDepthScale() { - const f32 depth_scale = f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32(); - - if (depth_scale != fs_uniform_block_data.data.depth_scale) { - fs_uniform_block_data.data.depth_scale = depth_scale; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncDepthOffset() { - const f32 depth_offset = f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32(); - - if (depth_offset != fs_uniform_block_data.data.depth_offset) { - fs_uniform_block_data.data.depth_offset = depth_offset; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncFogColor() { - const auto& fog_color_regs = regs.texturing.fog_color; - const Common::Vec3f fog_color = { - fog_color_regs.r.Value() / 255.0f, - fog_color_regs.g.Value() / 255.0f, - fog_color_regs.b.Value() / 255.0f, - }; - - if (fog_color != fs_uniform_block_data.data.fog_color) { - fs_uniform_block_data.data.fog_color = fog_color; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncProcTexNoise() { - const Common::Vec2f proctex_noise_f = { - Pica::f16::FromRaw(regs.texturing.proctex_noise_frequency.u).ToFloat32(), - Pica::f16::FromRaw(regs.texturing.proctex_noise_frequency.v).ToFloat32(), - }; - const Common::Vec2f proctex_noise_a = { - regs.texturing.proctex_noise_u.amplitude / 4095.0f, - regs.texturing.proctex_noise_v.amplitude / 4095.0f, - }; - const Common::Vec2f proctex_noise_p = { - Pica::f16::FromRaw(regs.texturing.proctex_noise_u.phase).ToFloat32(), - Pica::f16::FromRaw(regs.texturing.proctex_noise_v.phase).ToFloat32(), - }; - - if (proctex_noise_f != fs_uniform_block_data.data.proctex_noise_f || - proctex_noise_a != fs_uniform_block_data.data.proctex_noise_a || - proctex_noise_p != fs_uniform_block_data.data.proctex_noise_p) { - fs_uniform_block_data.data.proctex_noise_f = proctex_noise_f; - fs_uniform_block_data.data.proctex_noise_a = proctex_noise_a; - fs_uniform_block_data.data.proctex_noise_p = proctex_noise_p; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncProcTexBias() { - const auto proctex_bias = Pica::f16::FromRaw(regs.texturing.proctex.bias_low | - (regs.texturing.proctex_lut.bias_high << 8)) - .ToFloat32(); - if (proctex_bias != fs_uniform_block_data.data.proctex_bias) { - fs_uniform_block_data.data.proctex_bias = proctex_bias; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncAlphaTest() { - if (regs.framebuffer.output_merger.alpha_test.ref != - static_cast(fs_uniform_block_data.data.alphatest_ref)) { - fs_uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncCombinerColor() { - const auto combiner_color = ColorRGBA8(regs.texturing.tev_combiner_buffer_color.raw); - if (combiner_color != fs_uniform_block_data.data.tev_combiner_buffer_color) { - fs_uniform_block_data.data.tev_combiner_buffer_color = combiner_color; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncTevConstColor( - const std::size_t stage_index, const Pica::TexturingRegs::TevStageConfig& tev_stage) { - const auto const_color = ColorRGBA8(tev_stage.const_color); - - if (const_color == fs_uniform_block_data.data.const_color[stage_index]) { - return; + // Sync global lighting uniforms + if (dirty.CheckLightingAmbient()) { + fs_data.lighting_global_ambient = LightColor(regs.lighting.global_ambient); + fs_data_dirty = true; } - fs_uniform_block_data.data.const_color[stage_index] = const_color; - fs_uniform_block_data.dirty = true; -} + // Sync light uniforms + for (u32 light_index = 0; light_index < 8; light_index++) { + if (!dirty.CheckLight(light_index)) { + continue; + } -void RasterizerAccelerated::SyncGlobalAmbient() { - const auto color = LightColor(regs.lighting.global_ambient); - if (color != fs_uniform_block_data.data.lighting_global_ambient) { - fs_uniform_block_data.data.lighting_global_ambient = color; - fs_uniform_block_data.dirty = true; + const auto& light = regs.lighting.light[light_index]; + fs_data.light_src[light_index].specular_0 = LightColor(light.specular_0); + fs_data.light_src[light_index].specular_1 = LightColor(light.specular_1); + fs_data.light_src[light_index].diffuse = LightColor(light.diffuse); + fs_data.light_src[light_index].ambient = LightColor(light.ambient); + fs_data.light_src[light_index].position = { + Pica::f16::FromRaw(light.x).ToFloat32(), + Pica::f16::FromRaw(light.y).ToFloat32(), + Pica::f16::FromRaw(light.z).ToFloat32(), + }; + fs_data.light_src[light_index].spot_direction = { + light.spot_x / 2047.0f, light.spot_y / 2047.0f, light.spot_z / 2047.0f}; + fs_data.light_src[light_index].dist_atten_bias = + Pica::f20::FromRaw(light.dist_atten_bias).ToFloat32(); + fs_data.light_src[light_index].dist_atten_scale = + Pica::f20::FromRaw(light.dist_atten_scale).ToFloat32(); + fs_data_dirty = true; } -} -void RasterizerAccelerated::SyncLightSpecular0(int light_index) { - const auto color = LightColor(regs.lighting.light[light_index].specular_0); - if (color != fs_uniform_block_data.data.light_src[light_index].specular_0) { - fs_uniform_block_data.data.light_src[light_index].specular_0 = color; - fs_uniform_block_data.dirty = true; + // Sync fog uniforms + if (dirty.CheckFogColor()) { + fs_data.fog_color = { + regs.texturing.fog_color.r.Value() / 255.0f, + regs.texturing.fog_color.g.Value() / 255.0f, + regs.texturing.fog_color.b.Value() / 255.0f, + }; + fs_data_dirty = true; } -} -void RasterizerAccelerated::SyncLightSpecular1(int light_index) { - const auto color = LightColor(regs.lighting.light[light_index].specular_1); - if (color != fs_uniform_block_data.data.light_src[light_index].specular_1) { - fs_uniform_block_data.data.light_src[light_index].specular_1 = color; - fs_uniform_block_data.dirty = true; + // Sync proctex uniforms + if (dirty.CheckProctex()) { + fs_data.proctex_noise_f = { + Pica::f16::FromRaw(regs.texturing.proctex_noise_frequency.u).ToFloat32(), + Pica::f16::FromRaw(regs.texturing.proctex_noise_frequency.v).ToFloat32(), + }; + fs_data.proctex_noise_a = { + regs.texturing.proctex_noise_u.amplitude / 4095.0f, + regs.texturing.proctex_noise_v.amplitude / 4095.0f, + }; + fs_data.proctex_noise_p = { + Pica::f16::FromRaw(regs.texturing.proctex_noise_u.phase).ToFloat32(), + Pica::f16::FromRaw(regs.texturing.proctex_noise_v.phase).ToFloat32(), + }; + fs_data.proctex_bias = Pica::f16::FromRaw(regs.texturing.proctex.bias_low | + (regs.texturing.proctex_lut.bias_high << 8)) + .ToFloat32(); + fs_data_dirty = true; } -} -void RasterizerAccelerated::SyncLightDiffuse(int light_index) { - const auto color = LightColor(regs.lighting.light[light_index].diffuse); - if (color != fs_uniform_block_data.data.light_src[light_index].diffuse) { - fs_uniform_block_data.data.light_src[light_index].diffuse = color; - fs_uniform_block_data.dirty = true; + // Sync shadow uniforms + if (dirty.CheckShadow()) { + const auto& shadow = regs.framebuffer.shadow; + fs_data.shadow_bias_constant = Pica::f16::FromRaw(shadow.constant).ToFloat32(); + fs_data.shadow_bias_linear = Pica::f16::FromRaw(shadow.linear).ToFloat32(); + fs_data.shadow_texture_bias = regs.texturing.shadow.bias << 1; + fs_data_dirty = true; } -} -void RasterizerAccelerated::SyncLightAmbient(int light_index) { - const auto color = LightColor(regs.lighting.light[light_index].ambient); - if (color != fs_uniform_block_data.data.light_src[light_index].ambient) { - fs_uniform_block_data.data.light_src[light_index].ambient = color; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncLightPosition(int light_index) { - const Common::Vec3f position = { - Pica::f16::FromRaw(regs.lighting.light[light_index].x).ToFloat32(), - Pica::f16::FromRaw(regs.lighting.light[light_index].y).ToFloat32(), - Pica::f16::FromRaw(regs.lighting.light[light_index].z).ToFloat32(), - }; - - if (position != fs_uniform_block_data.data.light_src[light_index].position) { - fs_uniform_block_data.data.light_src[light_index].position = position; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncLightSpotDirection(int light_index) { - const auto& light = regs.lighting.light[light_index]; - const auto spot_direction = - Common::Vec3f{light.spot_x / 2047.0f, light.spot_y / 2047.0f, light.spot_z / 2047.0f}; - - if (spot_direction != fs_uniform_block_data.data.light_src[light_index].spot_direction) { - fs_uniform_block_data.data.light_src[light_index].spot_direction = spot_direction; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncLightDistanceAttenuationBias(int light_index) { - const f32 dist_atten_bias = - Pica::f20::FromRaw(regs.lighting.light[light_index].dist_atten_bias).ToFloat32(); - - if (dist_atten_bias != fs_uniform_block_data.data.light_src[light_index].dist_atten_bias) { - fs_uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncLightDistanceAttenuationScale(int light_index) { - const f32 dist_atten_scale = - Pica::f20::FromRaw(regs.lighting.light[light_index].dist_atten_scale).ToFloat32(); - - if (dist_atten_scale != fs_uniform_block_data.data.light_src[light_index].dist_atten_scale) { - fs_uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncShadowBias() { - const auto& shadow = regs.framebuffer.shadow; - const f32 constant = Pica::f16::FromRaw(shadow.constant).ToFloat32(); - const f32 linear = Pica::f16::FromRaw(shadow.linear).ToFloat32(); - - if (constant != fs_uniform_block_data.data.shadow_bias_constant || - linear != fs_uniform_block_data.data.shadow_bias_linear) { - fs_uniform_block_data.data.shadow_bias_constant = constant; - fs_uniform_block_data.data.shadow_bias_linear = linear; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncShadowTextureBias() { - const s32 bias = regs.texturing.shadow.bias << 1; - if (bias != fs_uniform_block_data.data.shadow_texture_bias) { - fs_uniform_block_data.data.shadow_texture_bias = bias; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncTextureLodBias(int tex_index) { - const auto pica_textures = regs.texturing.GetTextures(); - const f32 bias = pica_textures[tex_index].config.lod.bias / 256.0f; - if (bias != fs_uniform_block_data.data.tex_lod_bias[tex_index]) { - fs_uniform_block_data.data.tex_lod_bias[tex_index] = bias; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncTextureBorderColor(int tex_index) { - const auto pica_textures = regs.texturing.GetTextures(); - const auto params = pica_textures[tex_index].config; - const Common::Vec4f border_color = ColorRGBA8(params.border_color.raw); - if (border_color != fs_uniform_block_data.data.tex_border_color[tex_index]) { - fs_uniform_block_data.data.tex_border_color[tex_index] = border_color; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerAccelerated::SyncClipPlane() { - const bool enable_clip1 = regs.rasterizer.clip_enable != 0; - const auto raw_clip_coef = regs.rasterizer.GetClipCoef(); - const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), - raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()}; - if (enable_clip1 != (vs_uniform_block_data.data.enable_clip1 != 0) || - new_clip_coef != vs_uniform_block_data.data.clip_coef) { - vs_uniform_block_data.data.enable_clip1 = enable_clip1; - vs_uniform_block_data.data.clip_coef = new_clip_coef; - vs_uniform_block_data.dirty = true; - } + // We have synched all uniforms, reset dirty state. + pica.dirty_regs.Reset(); } } // namespace VideoCore diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index b82961bbf..a73f48eac 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -27,105 +27,11 @@ public: void AddTriangle(const Pica::OutputVertex& v0, const Pica::OutputVertex& v1, const Pica::OutputVertex& v2) override; - void NotifyPicaRegisterChanged(u32 id) override; - - void SyncEntireState() override; +protected: + /// Sync vertex and framgent uniforms from PICA registers + void SyncDrawUniforms(); protected: - /// Sync fixed-function pipeline state - virtual void SyncFixedState() = 0; - - /// Notifies that a fixed function PICA register changed to the video backend - virtual void NotifyFixedFunctionPicaRegisterChanged(u32 id) = 0; - - /// Syncs the depth scale to match the PICA register - void SyncDepthScale(); - - /// Syncs the depth offset to match the PICA register - void SyncDepthOffset(); - - /// Syncs the fog states to match the PICA register - void SyncFogColor(); - - /// Sync the procedural texture noise configuration to match the PICA register - void SyncProcTexNoise(); - - /// Sync the procedural texture bias configuration to match the PICA register - void SyncProcTexBias(); - - /// Syncs the alpha test states to match the PICA register - void SyncAlphaTest(); - - /// Syncs the TEV combiner color buffer to match the PICA register - void SyncCombinerColor(); - - /// Syncs the TEV constant color to match the PICA register - void SyncTevConstColor(std::size_t tev_index, - const Pica::TexturingRegs::TevStageConfig& tev_stage); - - /// Syncs the lighting global ambient color to match the PICA register - void SyncGlobalAmbient(); - - /// Syncs the specified light's specular 0 color to match the PICA register - void SyncLightSpecular0(int light_index); - - /// Syncs the specified light's specular 1 color to match the PICA register - void SyncLightSpecular1(int light_index); - - /// Syncs the specified light's diffuse color to match the PICA register - void SyncLightDiffuse(int light_index); - - /// Syncs the specified light's ambient color to match the PICA register - void SyncLightAmbient(int light_index); - - /// Syncs the specified light's position to match the PICA register - void SyncLightPosition(int light_index); - - /// Syncs the specified spot light direcition to match the PICA register - void SyncLightSpotDirection(int light_index); - - /// Syncs the specified light's distance attenuation bias to match the PICA register - void SyncLightDistanceAttenuationBias(int light_index); - - /// Syncs the specified light's distance attenuation scale to match the PICA register - void SyncLightDistanceAttenuationScale(int light_index); - - /// Syncs the shadow rendering bias to match the PICA register - void SyncShadowBias(); - - /// Syncs the shadow texture bias to match the PICA register - void SyncShadowTextureBias(); - - /// Syncs the texture LOD bias to match the PICA register - void SyncTextureLodBias(int tex_index); - - /// Syncs the texture border color to match the PICA registers - void SyncTextureBorderColor(int tex_index); - - /// Syncs the clip plane state to match the PICA register - void SyncClipPlane(); - -protected: - /// Structure that keeps tracks of the vertex shader uniform state - struct VSUniformBlockData { - Pica::Shader::Generator::VSUniformData data{}; - bool dirty = true; - }; - - /// Structure that keeps tracks of the fragment shader uniform state - struct FSUniformBlockData { - Pica::Shader::Generator::FSUniformData data{}; - std::array lighting_lut_dirty{}; - bool lighting_lut_dirty_any = true; - bool fog_lut_dirty = true; - bool proctex_noise_lut_dirty = true; - bool proctex_color_map_dirty = true; - bool proctex_alpha_map_dirty = true; - bool proctex_lut_dirty = true; - bool proctex_diff_lut_dirty = true; - bool dirty = true; - }; - /// Structure that the hardware rendered vertices are composed of struct HardwareVertex { HardwareVertex() = default; @@ -154,21 +60,12 @@ protected: Memory::MemorySystem& memory; Pica::PicaCore& pica; Pica::RegsInternal& regs; - std::vector vertex_batch; Pica::Shader::UserConfig user_config{}; - bool shader_dirty = true; - - VSUniformBlockData vs_uniform_block_data{}; - FSUniformBlockData fs_uniform_block_data{}; - using LightLUT = std::array; - std::array lighting_lut_data{}; - std::array fog_lut_data{}; - std::array proctex_noise_lut_data{}; - std::array proctex_color_map_data{}; - std::array proctex_alpha_map_data{}; - std::array proctex_lut_data{}; - std::array proctex_diff_lut_data{}; + Pica::Shader::Generator::VSUniformData vs_data{}; + Pica::Shader::Generator::FSUniformData fs_data{}; + bool vs_data_dirty = true; + bool fs_data_dirty = true; }; } // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 9f024ad2c..01b82f2f0 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -39,9 +39,6 @@ public: /// Draw the current batch of triangles virtual void DrawTriangles() = 0; - /// Notify rasterizer that the specified PICA register has been changed - virtual void NotifyPicaRegisterChanged(u32 id) = 0; - /// Notify rasterizer that all caches should be flushed to 3DS memory virtual void FlushAll() = 0; @@ -88,8 +85,6 @@ public: switch_disk_resources_callback = callback; } - virtual void SyncEntireState() {} - void SetAccurateMul(bool accurate_mul_) { accurate_mul = accurate_mul_; } diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 55cb90d0b..a44d513fd 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -1,4 +1,4 @@ -// Copyright 2014 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -60,9 +60,6 @@ public: /// Cleans up after video dumping is ended virtual void CleanupVideoDumping() {} - /// Synchronizes fixed function renderer state - virtual void Sync() {} - /// This is called to notify the rendering backend of a surface change virtual void NotifySurfaceChanged() {} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c39a0a10a..6f3d5d79d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -164,8 +164,6 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory, Pica::PicaCore& glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle()); glEnable(GL_BLEND); - - SyncEntireState(); } RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -256,18 +254,113 @@ void RasterizerOpenGL::SwitchDiskResources(u64 title_id) { } } -void RasterizerOpenGL::SyncFixedState() { - SyncClipEnabled(); - SyncCullMode(); - SyncBlendEnabled(); - SyncBlendFuncs(); - SyncBlendColor(); - SyncLogicOp(); - SyncStencilTest(); - SyncDepthTest(); - SyncColorWriteMask(); - SyncStencilWriteMask(); - SyncDepthWriteMask(); +void RasterizerOpenGL::SyncDrawState() { + SyncDrawUniforms(); + + // SyncClipEnabled(); + state.clip_distance[1] = regs.rasterizer.clip_enable != 0; + // SyncCullMode(); + state.cull.enabled = regs.rasterizer.cull_mode != Pica::RasterizerRegs::CullMode::KeepAll; + if (state.cull.enabled) { + state.cull.front_face = + regs.rasterizer.cull_mode == Pica::RasterizerRegs::CullMode::KeepClockWise ? GL_CW + : GL_CCW; + } + // If the framebuffer is flipped, vertex shader flips vertex y, so invert culling + const bool is_flipped = regs.framebuffer.framebuffer.IsFlipped(); + state.cull.mode = is_flipped && state.cull.enabled ? GL_FRONT : GL_BACK; + // SyncBlendEnabled(); + state.blend.enabled = (regs.framebuffer.output_merger.alphablend_enable == 1); + // SyncBlendFuncs(); + const bool has_minmax_factor = driver.HasBlendMinMaxFactor(); + state.blend.rgb_equation = PicaToGL::BlendEquation( + regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb, has_minmax_factor); + state.blend.a_equation = PicaToGL::BlendEquation( + regs.framebuffer.output_merger.alpha_blending.blend_equation_a, has_minmax_factor); + state.blend.src_rgb_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); + state.blend.dst_rgb_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); + state.blend.src_a_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a); + state.blend.dst_a_func = + PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); + if (!has_minmax_factor) { + // Blending with min/max equations is emulated in the fragment shader so + // configure blending to not modify the incoming fragment color. + emulate_minmax_blend = false; + if (state.EmulateColorBlend()) { + emulate_minmax_blend = true; + state.blend.rgb_equation = GL_FUNC_ADD; + state.blend.src_rgb_func = GL_ONE; + state.blend.dst_rgb_func = GL_ZERO; + } + if (state.EmulateAlphaBlend()) { + emulate_minmax_blend = true; + state.blend.a_equation = GL_FUNC_ADD; + state.blend.src_a_func = GL_ONE; + state.blend.dst_a_func = GL_ZERO; + } + } + // SyncBlendColor(); + const auto blend_color = PicaToGL::ColorRGBA8(regs.framebuffer.output_merger.blend_const.raw); + state.blend.color.red = blend_color[0]; + state.blend.color.green = blend_color[1]; + state.blend.color.blue = blend_color[2]; + state.blend.color.alpha = blend_color[3]; + // SyncLogicOp(); + // SyncColorWriteMask(); + state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op); + if (driver.IsOpenGLES() && !regs.framebuffer.output_merger.alphablend_enable && + regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. + state.color_mask = {}; + } else { + auto is_color_write_enabled = [&](u32 value) { + return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE + : GL_FALSE; + }; + state.color_mask.red_enabled = + is_color_write_enabled(regs.framebuffer.output_merger.red_enable); + state.color_mask.green_enabled = + is_color_write_enabled(regs.framebuffer.output_merger.green_enable); + state.color_mask.blue_enabled = + is_color_write_enabled(regs.framebuffer.output_merger.blue_enable); + state.color_mask.alpha_enabled = + is_color_write_enabled(regs.framebuffer.output_merger.alpha_enable); + } + // SyncStencilTest(); + state.stencil.test_enabled = + regs.framebuffer.output_merger.stencil_test.enable && + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + state.stencil.test_func = + PicaToGL::CompareFunc(regs.framebuffer.output_merger.stencil_test.func); + state.stencil.test_ref = regs.framebuffer.output_merger.stencil_test.reference_value; + state.stencil.test_mask = regs.framebuffer.output_merger.stencil_test.input_mask; + state.stencil.action_stencil_fail = + PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_stencil_fail); + state.stencil.action_depth_fail = + PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_fail); + state.stencil.action_depth_pass = + PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_pass); + // SyncDepthTest(); + state.depth.test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1; + state.depth.test_func = + regs.framebuffer.output_merger.depth_test_enable == 1 + ? PicaToGL::CompareFunc(regs.framebuffer.output_merger.depth_test_func) + : GL_ALWAYS; + // SyncStencilWriteMask(); + state.stencil.write_mask = + (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0; + // SyncDepthWriteMask(); + state.depth.write_mask = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable) + ? GL_TRUE + : GL_FALSE; } void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, @@ -448,6 +541,7 @@ void RasterizerOpenGL::DrawTriangles() { bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { MICROPROFILE_SCOPE(OpenGL_Drawing); + SyncDrawState(); const bool shadow_rendering = regs.framebuffer.IsShadowRendering(); const bool has_stencil = regs.framebuffer.HasStencil(); @@ -487,12 +581,6 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { state.viewport.width = static_cast(viewport.width); state.viewport.height = static_cast(viewport.height); - // If the framebuffer is flipped, request vertex shader to flip vertex y - const bool is_flipped = regs.framebuffer.framebuffer.IsFlipped(); - vs_uniform_block_data.dirty |= (vs_uniform_block_data.data.flip_viewport != 0) != is_flipped; - vs_uniform_block_data.data.flip_viewport = is_flipped; - state.cull.mode = is_flipped && state.cull.enabled ? GL_FRONT : GL_BACK; - // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. // Enable scissor test to prevent drawing outside of the framebuffer region const auto draw_rect = fb_helper.DrawRect(); @@ -504,16 +592,14 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { // Update scissor uniforms const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); - if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 || - fs_uniform_block_data.data.scissor_x2 != scissor_x2 || - fs_uniform_block_data.data.scissor_y1 != scissor_y1 || - fs_uniform_block_data.data.scissor_y2 != scissor_y2) { + if (fs_data.scissor_x1 != scissor_x1 || fs_data.scissor_x2 != scissor_x2 || + fs_data.scissor_y1 != scissor_y1 || fs_data.scissor_y2 != scissor_y2) { - fs_uniform_block_data.data.scissor_x1 = scissor_x1; - fs_uniform_block_data.data.scissor_x2 = scissor_x2; - fs_uniform_block_data.data.scissor_y1 = scissor_y1; - fs_uniform_block_data.data.scissor_y2 = scissor_y2; - fs_uniform_block_data.dirty = true; + fs_data.scissor_x1 = scissor_x1; + fs_data.scissor_x2 = scissor_x2; + fs_data.scissor_y1 = scissor_y1; + fs_data.scissor_y2 = scissor_y2; + fs_data_dirty = true; } // Sync and bind the texture surfaces @@ -521,10 +607,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { state.Apply(); // Sync and bind the shader - if (shader_dirty) { - curr_shader_manager->UseFragmentShader(regs, user_config); - shader_dirty = false; - } + curr_shader_manager->UseFragmentShader(regs, user_config); // Sync the LUTs within the texture buffer SyncAndUploadLUTs(); @@ -708,73 +791,6 @@ void RasterizerOpenGL::UnbindSpecial() { state.image_shadow_buffer = 0; } -void RasterizerOpenGL::NotifyFixedFunctionPicaRegisterChanged(u32 id) { - switch (id) { - // Clipping plane - case PICA_REG_INDEX(rasterizer.clip_enable): - SyncClipEnabled(); - break; - - // Culling - case PICA_REG_INDEX(rasterizer.cull_mode): - SyncCullMode(); - break; - - // Blending - case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): - SyncBlendEnabled(); - // Update since logic op emulation depends on alpha blend enable. - SyncLogicOp(); - SyncColorWriteMask(); - break; - case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): - SyncBlendFuncs(); - break; - case PICA_REG_INDEX(framebuffer.output_merger.blend_const): - SyncBlendColor(); - break; - - // Sync GL stencil test + stencil write mask - // (Pica stencil test function register also contains a stencil write mask) - case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): - SyncStencilTest(); - SyncStencilWriteMask(); - break; - case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): - case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): - SyncStencilTest(); - break; - - // Sync GL depth test + depth and color write mask - // (Pica depth test function register also contains a depth and color write mask) - case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): - SyncDepthTest(); - SyncDepthWriteMask(); - SyncColorWriteMask(); - break; - - // Sync GL depth and stencil write mask - // (This is a dedicated combined depth / stencil write-enable register) - case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): - SyncDepthWriteMask(); - SyncStencilWriteMask(); - break; - - // Sync GL color write mask - // (This is a dedicated color write-enable register) - case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): - SyncColorWriteMask(); - break; - - // Logic op - case PICA_REG_INDEX(framebuffer.output_merger.logic_op): - SyncLogicOp(); - // Update since color write mask is used to emulate no-op. - SyncColorWriteMask(); - break; - } -} - void RasterizerOpenGL::FlushAll() { res_cache.FlushAll(); } @@ -852,170 +868,12 @@ bool RasterizerOpenGL::AccelerateDisplay(const Pica::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SyncClipEnabled() { - state.clip_distance[1] = regs.rasterizer.clip_enable != 0; -} - -void RasterizerOpenGL::SyncCullMode() { - switch (regs.rasterizer.cull_mode) { - case Pica::RasterizerRegs::CullMode::KeepAll: - state.cull.enabled = false; - break; - case Pica::RasterizerRegs::CullMode::KeepClockWise: - state.cull.enabled = true; - state.cull.front_face = GL_CW; - break; - case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: - state.cull.enabled = true; - state.cull.front_face = GL_CCW; - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unknown cull mode {}", - static_cast(regs.rasterizer.cull_mode.Value())); - UNIMPLEMENTED(); - break; - } -} - -void RasterizerOpenGL::SyncBlendEnabled() { - state.blend.enabled = (regs.framebuffer.output_merger.alphablend_enable == 1); -} - -void RasterizerOpenGL::SyncBlendFuncs() { - const bool has_minmax_factor = driver.HasBlendMinMaxFactor(); - - state.blend.rgb_equation = PicaToGL::BlendEquation( - regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb, has_minmax_factor); - state.blend.a_equation = PicaToGL::BlendEquation( - regs.framebuffer.output_merger.alpha_blending.blend_equation_a, has_minmax_factor); - state.blend.src_rgb_func = - PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); - state.blend.dst_rgb_func = - PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); - state.blend.src_a_func = - PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a); - state.blend.dst_a_func = - PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a); - - if (has_minmax_factor) { - return; - } - - // Blending with min/max equations is emulated in the fragment shader so - // configure blending to not modify the incoming fragment color. - emulate_minmax_blend = false; - if (state.EmulateColorBlend()) { - emulate_minmax_blend = true; - state.blend.rgb_equation = GL_FUNC_ADD; - state.blend.src_rgb_func = GL_ONE; - state.blend.dst_rgb_func = GL_ZERO; - } - if (state.EmulateAlphaBlend()) { - emulate_minmax_blend = true; - state.blend.a_equation = GL_FUNC_ADD; - state.blend.src_a_func = GL_ONE; - state.blend.dst_a_func = GL_ZERO; - } -} - -void RasterizerOpenGL::SyncBlendColor() { - const auto blend_color = PicaToGL::ColorRGBA8(regs.framebuffer.output_merger.blend_const.raw); - state.blend.color.red = blend_color[0]; - state.blend.color.green = blend_color[1]; - state.blend.color.blue = blend_color[2]; - state.blend.color.alpha = blend_color[3]; - - if (blend_color != fs_uniform_block_data.data.blend_color) { - fs_uniform_block_data.data.blend_color = blend_color; - fs_uniform_block_data.dirty = true; - } -} - -void RasterizerOpenGL::SyncLogicOp() { - state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op); - - if (driver.IsOpenGLES()) { - if (!regs.framebuffer.output_merger.alphablend_enable) { - if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { - // Color output is disabled by logic operation. We use color write mask to skip - // color but allow depth write. - state.color_mask = {}; - } - } - } -} - -void RasterizerOpenGL::SyncColorWriteMask() { - if (driver.IsOpenGLES()) { - if (!regs.framebuffer.output_merger.alphablend_enable) { - if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { - // Color output is disabled by logic operation. We use color write mask to skip - // color but allow depth write. Return early to avoid overwriting this. - return; - } - } - } - - auto is_color_write_enabled = [&](u32 value) { - return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE - : GL_FALSE; - }; - - state.color_mask.red_enabled = - is_color_write_enabled(regs.framebuffer.output_merger.red_enable); - state.color_mask.green_enabled = - is_color_write_enabled(regs.framebuffer.output_merger.green_enable); - state.color_mask.blue_enabled = - is_color_write_enabled(regs.framebuffer.output_merger.blue_enable); - state.color_mask.alpha_enabled = - is_color_write_enabled(regs.framebuffer.output_merger.alpha_enable); -} - -void RasterizerOpenGL::SyncStencilWriteMask() { - state.stencil.write_mask = - (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) - ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) - : 0; -} - -void RasterizerOpenGL::SyncDepthWriteMask() { - state.depth.write_mask = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && - regs.framebuffer.output_merger.depth_write_enable) - ? GL_TRUE - : GL_FALSE; -} - -void RasterizerOpenGL::SyncStencilTest() { - state.stencil.test_enabled = - regs.framebuffer.output_merger.stencil_test.enable && - regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; - state.stencil.test_func = - PicaToGL::CompareFunc(regs.framebuffer.output_merger.stencil_test.func); - state.stencil.test_ref = regs.framebuffer.output_merger.stencil_test.reference_value; - state.stencil.test_mask = regs.framebuffer.output_merger.stencil_test.input_mask; - state.stencil.action_stencil_fail = - PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_stencil_fail); - state.stencil.action_depth_fail = - PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_fail); - state.stencil.action_depth_pass = - PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_pass); -} - -void RasterizerOpenGL::SyncDepthTest() { - state.depth.test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || - regs.framebuffer.output_merger.depth_write_enable == 1; - state.depth.test_func = - regs.framebuffer.output_merger.depth_test_enable == 1 - ? PicaToGL::CompareFunc(regs.framebuffer.output_merger.depth_test_func) - : GL_ALWAYS; -} - void RasterizerOpenGL::SyncAndUploadLUTsLF() { constexpr std::size_t max_size = sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(Common::Vec2f) * 128; // fog - if (!fs_uniform_block_data.lighting_lut_dirty_any && !fs_uniform_block_data.fog_lut_dirty) { + if (!pica.lighting.lut_dirty && !pica.fog.lut_dirty) { return; } @@ -1024,50 +882,37 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() { const auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); - // Sync the lighting luts - if (fs_uniform_block_data.lighting_lut_dirty_any || invalidate) { - for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) { - if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) { - std::array new_data; - const auto& source_lut = pica.lighting.luts[index]; - std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), - [](const auto& entry) { - return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; - }); + if (invalidate) { + pica.lighting.lut_dirty = pica.lighting.LutAllDirty; + pica.fog.lut_dirty = true; + } - if (new_data != lighting_lut_data[index] || invalidate) { - lighting_lut_data[index] = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec2f)); - fs_uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = - static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec2f); - } - fs_uniform_block_data.lighting_lut_dirty[index] = false; - } + // Sync the lighting luts + while (pica.lighting.lut_dirty) { + const u32 index = std::countr_zero(pica.lighting.lut_dirty); + pica.lighting.lut_dirty &= ~(1 << index); + + Common::Vec2f* new_data = reinterpret_cast(buffer + bytes_used); + const auto& source_lut = pica.lighting.luts[index]; + for (u32 i = 0; i < source_lut.size(); i++) { + new_data[i] = {source_lut[i].ToFloat(), source_lut[i].DiffToFloat()}; } - fs_uniform_block_data.lighting_lut_dirty_any = false; + fs_data.lighting_lut_offset[index / 4][index % 4] = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + fs_data_dirty = true; + bytes_used += source_lut.size() * sizeof(Common::Vec2f); } // Sync the fog lut - if (fs_uniform_block_data.fog_lut_dirty || invalidate) { - std::array new_data; - - std::transform( - pica.fog.lut.begin(), pica.fog.lut.end(), new_data.begin(), - [](const auto& entry) { return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; }); - - if (new_data != fog_lut_data || invalidate) { - fog_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec2f)); - fs_uniform_block_data.data.fog_lut_offset = - static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec2f); + if (pica.fog.lut_dirty) { + Common::Vec2f* new_data = reinterpret_cast(buffer + bytes_used); + for (u32 i = 0; i < pica.fog.lut.size(); i++) { + new_data[i] = {pica.fog.lut[i].ToFloat(), pica.fog.lut[i].DiffToFloat()}; } - fs_uniform_block_data.fog_lut_dirty = false; + fs_data.fog_lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + fs_data_dirty = true; + bytes_used += pica.fog.lut.size() * sizeof(Common::Vec2f); + pica.fog.lut_dirty = false; } texture_lf_buffer.Unmap(bytes_used); @@ -1079,10 +924,7 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { sizeof(Common::Vec4f) * 256 + // proctex sizeof(Common::Vec4f) * 256; // proctex diff - if (!fs_uniform_block_data.proctex_noise_lut_dirty && - !fs_uniform_block_data.proctex_color_map_dirty && - !fs_uniform_block_data.proctex_alpha_map_dirty && - !fs_uniform_block_data.proctex_lut_dirty && !fs_uniform_block_data.proctex_diff_lut_dirty) { + if (!pica.proctex.table_dirty) { return; } @@ -1090,90 +932,62 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); const auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f)); - // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap - const auto sync_proc_tex_value_lut = - [this, buffer = buffer, offset = offset, invalidate = invalidate, &bytes_used]( - const auto& lut, std::array& lut_data, GLint& lut_offset) { - std::array new_data; - std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { - return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; - }); + if (invalidate) { + pica.proctex.table_dirty = pica.proctex.TableAllDirty; + } - if (new_data != lut_data || invalidate) { - lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec2f)); - lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec2f); - } - }; + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + const auto sync_proc_tex_value_lut = [&](const auto& lut, GLint& lut_offset) { + Common::Vec2f* new_data = reinterpret_cast(buffer + bytes_used); + for (u32 i = 0; i < lut.size(); i++) { + new_data[i] = {lut[i].ToFloat(), lut[i].DiffToFloat()}; + } + lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + fs_data_dirty = true; + bytes_used += lut.size() * sizeof(Common::Vec2f); + }; // Sync the proctex noise lut - if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) { - sync_proc_tex_value_lut(pica.proctex.noise_table, proctex_noise_lut_data, - fs_uniform_block_data.data.proctex_noise_lut_offset); - fs_uniform_block_data.proctex_noise_lut_dirty = false; + if (pica.proctex.noise_lut_dirty) { + sync_proc_tex_value_lut(pica.proctex.noise_table, fs_data.proctex_noise_lut_offset); } // Sync the proctex color map - if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) { - sync_proc_tex_value_lut(pica.proctex.color_map_table, proctex_color_map_data, - fs_uniform_block_data.data.proctex_color_map_offset); - fs_uniform_block_data.proctex_color_map_dirty = false; + if (pica.proctex.color_map_dirty) { + sync_proc_tex_value_lut(pica.proctex.color_map_table, fs_data.proctex_color_map_offset); } // Sync the proctex alpha map - if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) { - sync_proc_tex_value_lut(pica.proctex.alpha_map_table, proctex_alpha_map_data, - fs_uniform_block_data.data.proctex_alpha_map_offset); - fs_uniform_block_data.proctex_alpha_map_dirty = false; + if (pica.proctex.alpha_map_dirty) { + sync_proc_tex_value_lut(pica.proctex.alpha_map_table, fs_data.proctex_alpha_map_offset); } // Sync the proctex lut - if (fs_uniform_block_data.proctex_lut_dirty || invalidate) { - std::array new_data; - - std::transform(pica.proctex.color_table.begin(), pica.proctex.color_table.end(), - new_data.begin(), [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_lut_data || invalidate) { - proctex_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec4f)); - fs_uniform_block_data.data.proctex_lut_offset = - static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec4f); + if (pica.proctex.lut_dirty) { + Common::Vec4f* new_data = reinterpret_cast(buffer + bytes_used); + for (u32 i = 0; i < pica.proctex.color_table.size(); i++) { + new_data[i] = pica.proctex.color_table[i].ToVector() / 255.0f; } - fs_uniform_block_data.proctex_lut_dirty = false; + fs_data.proctex_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + fs_data_dirty = true; + bytes_used += pica.proctex.color_table.size() * sizeof(Common::Vec4f); } // Sync the proctex difference lut - if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) { - std::array new_data; - - std::transform(pica.proctex.color_diff_table.begin(), pica.proctex.color_diff_table.end(), - new_data.begin(), [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_diff_lut_data || invalidate) { - proctex_diff_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec4f)); - fs_uniform_block_data.data.proctex_diff_lut_offset = - static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec4f); + if (pica.proctex.diff_lut_dirty) { + Common::Vec4f* new_data = reinterpret_cast(buffer + bytes_used); + for (u32 i = 0; i < pica.proctex.color_diff_table.size(); i++) { + new_data[i] = pica.proctex.color_diff_table[i].ToVector() / 255.0f; } - fs_uniform_block_data.proctex_diff_lut_dirty = false; + fs_data.proctex_diff_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + fs_data_dirty = true; + bytes_used += pica.proctex.color_diff_table.size() * sizeof(Common::Vec4f); } + pica.proctex.table_dirty = 0; + texture_buffer.Unmap(bytes_used); } @@ -1182,10 +996,8 @@ void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { state.draw.uniform_buffer = uniform_buffer.GetHandle(); state.Apply(); - const bool sync_vs_pica = accelerate_draw; - const bool sync_vs = vs_uniform_block_data.dirty; - const bool sync_fs = fs_uniform_block_data.dirty; - if (!sync_vs_pica && !sync_vs && !sync_fs) { + const bool sync_vs_pica = accelerate_draw && pica.vs_setup.uniforms_dirty; + if (!sync_vs_pica && !vs_data_dirty && !fs_data_dirty) { return; } @@ -1196,30 +1008,29 @@ void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { const auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size, uniform_buffer_alignment); - if (sync_vs || invalidate) { - std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data, - sizeof(vs_uniform_block_data.data)); + if (vs_data_dirty || invalidate) { + std::memcpy(uniforms + used_bytes, &vs_data, sizeof(vs_data)); glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::VSData, uniform_buffer.GetHandle(), - offset + used_bytes, sizeof(vs_uniform_block_data.data)); - vs_uniform_block_data.dirty = false; + offset + used_bytes, sizeof(vs_data)); + vs_data_dirty = false; used_bytes += uniform_size_aligned_vs; } - if (sync_fs || invalidate) { - std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data, - sizeof(fs_uniform_block_data.data)); + if (fs_data_dirty || invalidate) { + std::memcpy(uniforms + used_bytes, &fs_data, sizeof(fs_data)); glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::FSData, uniform_buffer.GetHandle(), - offset + used_bytes, sizeof(fs_uniform_block_data.data)); - fs_uniform_block_data.dirty = false; + offset + used_bytes, sizeof(fs_data)); + fs_data_dirty = false; used_bytes += uniform_size_aligned_fs; } - if (sync_vs_pica) { + if (sync_vs_pica || invalidate) { VSPicaUniformData vs_uniforms; - vs_uniforms.uniforms.SetFromRegs(regs.vs, pica.vs_setup); + vs_uniforms.SetFromRegs(pica.vs_setup); std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); glBindBufferRange(GL_UNIFORM_BUFFER, UniformBindings::VSPicaData, uniform_buffer.GetHandle(), offset + used_bytes, sizeof(vs_uniforms)); + pica.vs_setup.uniforms_dirty = false; used_bytes += uniform_size_aligned_vs_pica; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index deb3ccb3f..b529a56b2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -58,41 +58,8 @@ public: bool AccelerateDrawBatch(bool is_indexed) override; private: - void SyncFixedState() override; - void NotifyFixedFunctionPicaRegisterChanged(u32 id) override; - - /// Syncs the clip enabled status to match the PICA register - void SyncClipEnabled(); - - /// Syncs the cull mode to match the PICA register - void SyncCullMode(); - - /// Syncs the blend enabled status to match the PICA register - void SyncBlendEnabled(); - - /// Syncs the blend functions to match the PICA register - void SyncBlendFuncs(); - - /// Syncs the blend color to match the PICA register - void SyncBlendColor(); - - /// Syncs the logic op states to match the PICA register - void SyncLogicOp(); - - /// Syncs the color write mask to match the PICA register state - void SyncColorWriteMask(); - - /// Syncs the stencil write mask to match the PICA register state - void SyncStencilWriteMask(); - - /// Syncs the depth write mask to match the PICA register state - void SyncDepthWriteMask(); - - /// Syncs the stencil test states to match the PICA register - void SyncStencilTest(); - - /// Syncs the depth test states to match the PICA register - void SyncDepthTest(); + /// Syncs pipeline state from PICA registers + void SyncDrawState(); /// Syncs and uploads the lighting, fog and proctex LUTs void SyncAndUploadLUTs(); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 2d70f51ba..03c4ce3e3 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -901,8 +901,4 @@ void RendererOpenGL::CleanupVideoDumping() { mailbox->free_cv.notify_one(); } -void RendererOpenGL::Sync() { - rasterizer.SyncEntireState(); -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 7885a2f5b..ebbebf606 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -1,4 +1,4 @@ -// Copyright 2022 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -53,7 +53,6 @@ public: void TryPresent(int timeout_ms, bool is_secondary) override; void PrepareVideoDumping() override; void CleanupVideoDumping() override; - void Sync() override; private: void InitOpenGLObjects(); diff --git a/src/video_core/renderer_software/renderer_software.h b/src/video_core/renderer_software/renderer_software.h index 4f1d9d370..2c1b61fb6 100644 --- a/src/video_core/renderer_software/renderer_software.h +++ b/src/video_core/renderer_software/renderer_software.h @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -35,7 +35,6 @@ public: void SwapBuffers() override; void TryPresent(int timeout_ms, bool is_secondary) override {} - void Sync() override {} private: void PrepareRenderTarget(); diff --git a/src/video_core/renderer_software/sw_rasterizer.h b/src/video_core/renderer_software/sw_rasterizer.h index 026dc8b76..ab1244af9 100644 --- a/src/video_core/renderer_software/sw_rasterizer.h +++ b/src/video_core/renderer_software/sw_rasterizer.h @@ -1,4 +1,4 @@ -// Copyright 2015 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -27,7 +27,6 @@ public: void AddTriangle(const Pica::OutputVertex& v0, const Pica::OutputVertex& v1, const Pica::OutputVertex& v2) override; void DrawTriangles() override {} - void NotifyPicaRegisterChanged(u32 id) override {} void FlushAll() override {} void FlushRegion(PAddr addr, u32 size) override {} void InvalidateRegion(PAddr addr, u32 size) override {} diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 0324561f1..12bbf7df3 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -92,10 +92,6 @@ RendererVulkan::~RendererVulkan() { } } -void RendererVulkan::Sync() { - rasterizer.SyncEntireState(); -} - void RendererVulkan::PrepareRendertarget() { const auto& framebuffer_config = pica.regs.framebuffer_config; const auto& regs_lcd = pica.regs_lcd; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index b52142e88..a5ad3a72f 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -80,7 +80,6 @@ public: void SwapBuffers() override; void TryPresent(int timeout_ms, bool is_secondary) override {} - void Sync() override; private: void ReloadPipeline(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b0ba424bd..adcad826e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -100,8 +100,6 @@ bool GraphicsPipeline::TryBuild(bool wait_built) { bool GraphicsPipeline::Build(bool fail_on_compile_required) { MICROPROFILE_SCOPE(Vulkan_Pipeline); - const vk::Device device = instance.GetDevice(); - std::array bindings; for (u32 i = 0; i < info.vertex_layout.binding_count; i++) { const auto& binding = info.vertex_layout.bindings[i]; @@ -273,7 +271,7 @@ bool GraphicsPipeline::Build(bool fail_on_compile_required) { pipeline_info.flags |= vk::PipelineCreateFlagBits::eFailOnPipelineCompileRequiredEXT; } - auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info); + auto result = instance.GetDevice().createGraphicsPipelineUnique(pipeline_cache, pipeline_info); if (result.result == vk::Result::eSuccess) { pipeline = std::move(result.value); } else if (result.result == vk::Result::eErrorPipelineCompileRequiredEXT) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 942f2ec25..89f1a089b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -133,8 +133,6 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& update_queue.AddImageSampler(utility_set, 1, 0, null_surface.ImageView(), null_sampler.Handle()); update_queue.Flush(); - - SyncEntireState(); } RasterizerVulkan::~RasterizerVulkan() = default; @@ -148,17 +146,78 @@ void RasterizerVulkan::LoadDefaultDiskResources( pipeline_cache.LoadDiskCache(); } -void RasterizerVulkan::SyncFixedState() { - SyncCullMode(); - SyncBlendEnabled(); - SyncBlendFuncs(); - SyncBlendColor(); - SyncLogicOp(); - SyncStencilTest(); - SyncDepthTest(); - SyncColorWriteMask(); - SyncStencilWriteMask(); - SyncDepthWriteMask(); +void RasterizerVulkan::SyncDrawState() { + SyncDrawUniforms(); + + // SyncCullMode(); + pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode); + // If the framebuffer is flipped, request to also flip vulkan viewport + const bool is_flipped = regs.framebuffer.framebuffer.IsFlipped(); + pipeline_info.rasterization.flip_viewport.Assign(is_flipped); + // SyncBlendEnabled(); + pipeline_info.blending.blend_enable = regs.framebuffer.output_merger.alphablend_enable; + // SyncBlendFuncs(); + pipeline_info.blending.color_blend_eq.Assign( + regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); + pipeline_info.blending.alpha_blend_eq.Assign( + regs.framebuffer.output_merger.alpha_blending.blend_equation_a); + pipeline_info.blending.src_color_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); + pipeline_info.blending.dst_color_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); + pipeline_info.blending.src_alpha_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_source_a); + pipeline_info.blending.dst_alpha_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_dest_a); + // SyncBlendColor(); + pipeline_info.dynamic.blend_color = regs.framebuffer.output_merger.blend_const.raw; + // SyncLogicOp(); + // SyncColorWriteMask(); + pipeline_info.blending.logic_op = regs.framebuffer.output_merger.logic_op; + const bool is_logic_op_emulated = + instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable; + const bool is_logic_op_noop = + regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp; + if (is_logic_op_emulated && is_logic_op_noop) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. + pipeline_info.blending.color_write_mask = 0; + } else { + const u32 color_mask = regs.framebuffer.framebuffer.allow_color_write != 0 + ? (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF + : 0; + pipeline_info.blending.color_write_mask = color_mask; + } + // SyncStencilTest(); + const auto& stencil_test = regs.framebuffer.output_merger.stencil_test; + const bool test_enable = stencil_test.enable && regs.framebuffer.framebuffer.depth_format == + Pica::FramebufferRegs::DepthFormat::D24S8; + + pipeline_info.depth_stencil.stencil_test_enable.Assign(test_enable); + pipeline_info.depth_stencil.stencil_fail_op.Assign(stencil_test.action_stencil_fail); + pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass); + pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail); + pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func); + pipeline_info.dynamic.stencil_reference = stencil_test.reference_value; + pipeline_info.dynamic.stencil_compare_mask = stencil_test.input_mask; + // SyncStencilWriteMask(); + pipeline_info.dynamic.stencil_write_mask = + (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0; + // SyncDepthTest(); + const bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1; + const auto compare_op = regs.framebuffer.output_merger.depth_test_enable == 1 + ? regs.framebuffer.output_merger.depth_test_func.Value() + : Pica::FramebufferRegs::CompareFunc::Always; + + pipeline_info.depth_stencil.depth_test_enable.Assign(test_enabled); + pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op); + // SyncDepthWriteMask(); + const bool write_enable = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable); + pipeline_info.depth_stencil.depth_write_enable.Assign(write_enable); } void RasterizerVulkan::SetupVertexArray() { @@ -463,6 +522,7 @@ void RasterizerVulkan::DrawTriangles() { bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { MICROPROFILE_SCOPE(Vulkan_Drawing); + SyncDrawState(); const bool shadow_rendering = regs.framebuffer.IsShadowRendering(); const bool has_stencil = regs.framebuffer.HasStencil(); @@ -487,16 +547,14 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // Update scissor uniforms const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); - if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 || - fs_uniform_block_data.data.scissor_x2 != scissor_x2 || - fs_uniform_block_data.data.scissor_y1 != scissor_y1 || - fs_uniform_block_data.data.scissor_y2 != scissor_y2) { + if (fs_data.scissor_x1 != scissor_x1 || fs_data.scissor_x2 != scissor_x2 || + fs_data.scissor_y1 != scissor_y1 || fs_data.scissor_y2 != scissor_y2) { - fs_uniform_block_data.data.scissor_x1 = scissor_x1; - fs_uniform_block_data.data.scissor_x2 = scissor_x2; - fs_uniform_block_data.data.scissor_y1 = scissor_y1; - fs_uniform_block_data.data.scissor_y2 = scissor_y2; - fs_uniform_block_data.dirty = true; + fs_data.scissor_x1 = scissor_x1; + fs_data.scissor_x2 = scissor_x2; + fs_data.scissor_y1 = scissor_y1; + fs_data.scissor_y2 = scissor_y2; + fs_data_dirty = true; } // Sync and bind the texture surfaces @@ -504,16 +562,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { SyncUtilityTextures(framebuffer); // Sync and bind the shader - if (shader_dirty) { - pipeline_cache.UseFragmentShader(regs, user_config); - shader_dirty = false; - } - - // If the framebuffer is flipped, request to also flip vulkan viewport - const bool is_flipped = regs.framebuffer.framebuffer.IsFlipped(); - vs_uniform_block_data.dirty |= (vs_uniform_block_data.data.flip_viewport != 0) != is_flipped; - vs_uniform_block_data.data.flip_viewport = is_flipped; - pipeline_info.rasterization.flip_viewport.Assign(is_flipped); + pipeline_cache.UseFragmentShader(regs, user_config); // Sync the LUTs within the texture buffer SyncAndUploadLUTs(); @@ -667,68 +716,6 @@ void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureCon update_queue.AddImageSampler(texture_set, 0, 0, surface.ImageView(), sampler.Handle()); } -void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) { - switch (id) { - // Culling - case PICA_REG_INDEX(rasterizer.cull_mode): - SyncCullMode(); - break; - - // Blending - case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): - SyncBlendEnabled(); - // Update since logic op emulation depends on alpha blend enable. - SyncLogicOp(); - SyncColorWriteMask(); - break; - case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): - SyncBlendFuncs(); - break; - case PICA_REG_INDEX(framebuffer.output_merger.blend_const): - SyncBlendColor(); - break; - - // Sync VK stencil test + stencil write mask - // (Pica stencil test function register also contains a stencil write mask) - case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): - SyncStencilTest(); - SyncStencilWriteMask(); - break; - case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): - case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): - SyncStencilTest(); - break; - - // Sync VK depth test + depth and color write mask - // (Pica depth test function register also contains a depth and color write mask) - case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): - SyncDepthTest(); - SyncDepthWriteMask(); - SyncColorWriteMask(); - break; - - // Sync VK depth and stencil write mask - // (This is a dedicated combined depth / stencil write-enable register) - case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): - SyncDepthWriteMask(); - SyncStencilWriteMask(); - break; - - // Sync VK color write mask - // (This is a dedicated color write-enable register) - case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): - SyncColorWriteMask(); - break; - - // Logic op - case PICA_REG_INDEX(framebuffer.output_merger.logic_op): - SyncLogicOp(); - // Update since color write mask is used to emulate no-op. - SyncColorWriteMask(); - break; - } -} - void RasterizerVulkan::FlushAll() { res_cache.FlushAll(); } @@ -825,164 +812,49 @@ void RasterizerVulkan::MakeSoftwareVertexLayout() { } } -void RasterizerVulkan::SyncCullMode() { - pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode); -} - -void RasterizerVulkan::SyncBlendEnabled() { - pipeline_info.blending.blend_enable = regs.framebuffer.output_merger.alphablend_enable; -} - -void RasterizerVulkan::SyncBlendFuncs() { - pipeline_info.blending.color_blend_eq.Assign( - regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); - pipeline_info.blending.alpha_blend_eq.Assign( - regs.framebuffer.output_merger.alpha_blending.blend_equation_a); - pipeline_info.blending.src_color_blend_factor.Assign( - regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); - pipeline_info.blending.dst_color_blend_factor.Assign( - regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); - pipeline_info.blending.src_alpha_blend_factor.Assign( - regs.framebuffer.output_merger.alpha_blending.factor_source_a); - pipeline_info.blending.dst_alpha_blend_factor.Assign( - regs.framebuffer.output_merger.alpha_blending.factor_dest_a); -} - -void RasterizerVulkan::SyncBlendColor() { - pipeline_info.dynamic.blend_color = regs.framebuffer.output_merger.blend_const.raw; -} - -void RasterizerVulkan::SyncLogicOp() { - if (instance.NeedsLogicOpEmulation()) { - // We need this in the fragment shader to emulate logic operations - shader_dirty = true; - } - - pipeline_info.blending.logic_op = regs.framebuffer.output_merger.logic_op; - - const bool is_logic_op_emulated = - instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable; - const bool is_logic_op_noop = - regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp; - if (is_logic_op_emulated && is_logic_op_noop) { - // Color output is disabled by logic operation. We use color write mask to skip - // color but allow depth write. - pipeline_info.blending.color_write_mask = 0; - } -} - -void RasterizerVulkan::SyncColorWriteMask() { - const u32 color_mask = regs.framebuffer.framebuffer.allow_color_write != 0 - ? (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF - : 0; - - const bool is_logic_op_emulated = - instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable; - const bool is_logic_op_noop = - regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp; - if (is_logic_op_emulated && is_logic_op_noop) { - // Color output is disabled by logic operation. We use color write mask to skip - // color but allow depth write. Return early to avoid overwriting this. - return; - } - - pipeline_info.blending.color_write_mask = color_mask; -} - -void RasterizerVulkan::SyncStencilWriteMask() { - pipeline_info.dynamic.stencil_write_mask = - (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) - ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) - : 0; -} - -void RasterizerVulkan::SyncDepthWriteMask() { - const bool write_enable = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && - regs.framebuffer.output_merger.depth_write_enable); - pipeline_info.depth_stencil.depth_write_enable.Assign(write_enable); -} - -void RasterizerVulkan::SyncStencilTest() { - const auto& stencil_test = regs.framebuffer.output_merger.stencil_test; - const bool test_enable = stencil_test.enable && regs.framebuffer.framebuffer.depth_format == - Pica::FramebufferRegs::DepthFormat::D24S8; - - pipeline_info.depth_stencil.stencil_test_enable.Assign(test_enable); - pipeline_info.depth_stencil.stencil_fail_op.Assign(stencil_test.action_stencil_fail); - pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass); - pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail); - pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func); - pipeline_info.dynamic.stencil_reference = stencil_test.reference_value; - pipeline_info.dynamic.stencil_compare_mask = stencil_test.input_mask; -} - -void RasterizerVulkan::SyncDepthTest() { - const bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || - regs.framebuffer.output_merger.depth_write_enable == 1; - const auto compare_op = regs.framebuffer.output_merger.depth_test_enable == 1 - ? regs.framebuffer.output_merger.depth_test_func.Value() - : Pica::FramebufferRegs::CompareFunc::Always; - - pipeline_info.depth_stencil.depth_test_enable.Assign(test_enabled); - pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op); -} - void RasterizerVulkan::SyncAndUploadLUTsLF() { constexpr std::size_t max_size = sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(Common::Vec2f) * 128; // fog - if (!fs_uniform_block_data.lighting_lut_dirty_any && !fs_uniform_block_data.fog_lut_dirty) { + if (!pica.lighting.lut_dirty && !pica.fog.lut_dirty) { return; } std::size_t bytes_used = 0; auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); - // Sync the lighting luts - if (fs_uniform_block_data.lighting_lut_dirty_any || invalidate) { - for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) { - if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) { - std::array new_data; - const auto& source_lut = pica.lighting.luts[index]; - std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), - [](const auto& entry) { - return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; - }); + if (invalidate) { + pica.lighting.lut_dirty = pica.lighting.LutAllDirty; + pica.fog.lut_dirty = true; + } - if (new_data != lighting_lut_data[index] || invalidate) { - lighting_lut_data[index] = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec2f)); - fs_uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = - static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec2f); - } - fs_uniform_block_data.lighting_lut_dirty[index] = false; - } + // Sync the lighting luts + while (pica.lighting.lut_dirty) { + u32 index = std::countr_zero(pica.lighting.lut_dirty); + pica.lighting.lut_dirty &= ~(1 << index); + + Common::Vec2f* new_data = reinterpret_cast(buffer + bytes_used); + const auto& source_lut = pica.lighting.luts[index]; + for (u32 i = 0; i < source_lut.size(); i++) { + new_data[i] = {source_lut[i].ToFloat(), source_lut[i].DiffToFloat()}; } - fs_uniform_block_data.lighting_lut_dirty_any = false; + fs_data.lighting_lut_offset[index / 4][index % 4] = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + fs_data_dirty = true; + bytes_used += source_lut.size() * sizeof(Common::Vec2f); } // Sync the fog lut - if (fs_uniform_block_data.fog_lut_dirty || invalidate) { - std::array new_data; - - std::transform( - pica.fog.lut.begin(), pica.fog.lut.end(), new_data.begin(), - [](const auto& entry) { return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; }); - - if (new_data != fog_lut_data || invalidate) { - fog_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec2f)); - fs_uniform_block_data.data.fog_lut_offset = - static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec2f); + if (pica.fog.lut_dirty) { + Common::Vec2f* new_data = reinterpret_cast(buffer + bytes_used); + for (u32 i = 0; i < pica.fog.lut.size(); i++) { + new_data[i] = {pica.fog.lut[i].ToFloat(), pica.fog.lut[i].DiffToFloat()}; } - fs_uniform_block_data.fog_lut_dirty = false; + fs_data.fog_lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + fs_data_dirty = true; + bytes_used += pica.fog.lut.size() * sizeof(Common::Vec2f); + pica.fog.lut_dirty = false; } texture_lf_buffer.Commit(static_cast(bytes_used)); @@ -995,109 +867,76 @@ void RasterizerVulkan::SyncAndUploadLUTs() { sizeof(Common::Vec4f) * 256 + // proctex sizeof(Common::Vec4f) * 256; // proctex diff - if (!fs_uniform_block_data.proctex_noise_lut_dirty && - !fs_uniform_block_data.proctex_color_map_dirty && - !fs_uniform_block_data.proctex_alpha_map_dirty && - !fs_uniform_block_data.proctex_lut_dirty && !fs_uniform_block_data.proctex_diff_lut_dirty) { + if (!pica.proctex.lut_dirty) { return; } std::size_t bytes_used = 0; auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f)); - // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap - auto sync_proctex_value_lut = - [this, buffer = buffer, offset = offset, invalidate = invalidate, - &bytes_used](const std::array& lut, - std::array& lut_data, int& lut_offset) { - std::array new_data; - std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { - return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; - }); + if (invalidate) { + pica.proctex.table_dirty = pica.proctex.TableAllDirty; + } - if (new_data != lut_data || invalidate) { - lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec2f)); - lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec2f); + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + const auto sync_proctex_value_lut = + [&](const std::array& lut, int& lut_offset) { + Common::Vec2f* new_data = reinterpret_cast(buffer + bytes_used); + for (u32 i = 0; i < lut.size(); i++) { + new_data[i] = {lut[i].ToFloat(), lut[i].DiffToFloat()}; } + lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + fs_data_dirty = true; + bytes_used += lut.size() * sizeof(Common::Vec2f); }; // Sync the proctex noise lut - if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) { - sync_proctex_value_lut(proctex.noise_table, proctex_noise_lut_data, - fs_uniform_block_data.data.proctex_noise_lut_offset); - fs_uniform_block_data.proctex_noise_lut_dirty = false; + if (pica.proctex.noise_lut_dirty) { + sync_proctex_value_lut(proctex.noise_table, fs_data.proctex_noise_lut_offset); } // Sync the proctex color map - if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) { - sync_proctex_value_lut(proctex.color_map_table, proctex_color_map_data, - fs_uniform_block_data.data.proctex_color_map_offset); - fs_uniform_block_data.proctex_color_map_dirty = false; + if (pica.proctex.color_map_dirty) { + sync_proctex_value_lut(proctex.color_map_table, fs_data.proctex_color_map_offset); } // Sync the proctex alpha map - if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) { - sync_proctex_value_lut(proctex.alpha_map_table, proctex_alpha_map_data, - fs_uniform_block_data.data.proctex_alpha_map_offset); - fs_uniform_block_data.proctex_alpha_map_dirty = false; + if (pica.proctex.alpha_map_dirty) { + sync_proctex_value_lut(proctex.alpha_map_table, fs_data.proctex_alpha_map_offset); } // Sync the proctex lut - if (fs_uniform_block_data.proctex_lut_dirty || invalidate) { - std::array new_data; - - std::transform(proctex.color_table.begin(), proctex.color_table.end(), new_data.begin(), - [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_lut_data || invalidate) { - proctex_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec4f)); - fs_uniform_block_data.data.proctex_lut_offset = - static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec4f); + if (pica.proctex.lut_dirty) { + Common::Vec4f* new_data = reinterpret_cast(buffer + bytes_used); + for (u32 i = 0; i < proctex.color_table.size(); i++) { + new_data[i] = proctex.color_table[i].ToVector() / 255.0f; } - fs_uniform_block_data.proctex_lut_dirty = false; + fs_data.proctex_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + fs_data_dirty = true; + bytes_used += proctex.color_table.size() * sizeof(Common::Vec4f); } // Sync the proctex difference lut - if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) { - std::array new_data; - - std::transform(proctex.color_diff_table.begin(), proctex.color_diff_table.end(), - new_data.begin(), [](const auto& entry) { - auto rgba = entry.ToVector() / 255.0f; - return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; - }); - - if (new_data != proctex_diff_lut_data || invalidate) { - proctex_diff_lut_data = new_data; - std::memcpy(buffer + bytes_used, new_data.data(), - new_data.size() * sizeof(Common::Vec4f)); - fs_uniform_block_data.data.proctex_diff_lut_offset = - static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); - fs_uniform_block_data.dirty = true; - bytes_used += new_data.size() * sizeof(Common::Vec4f); + if (pica.proctex.diff_lut_dirty) { + Common::Vec4f* new_data = reinterpret_cast(buffer + bytes_used); + for (u32 i = 0; i < proctex.color_diff_table.size(); i++) { + new_data[i] = proctex.color_diff_table[i].ToVector() / 255.0f; } - fs_uniform_block_data.proctex_diff_lut_dirty = false; + fs_data.proctex_diff_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + fs_data_dirty = true; + bytes_used += proctex.color_diff_table.size() * sizeof(Common::Vec4f); } + pica.proctex.table_dirty = 0; + texture_buffer.Commit(static_cast(bytes_used)); } void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { - const bool sync_vs_pica = accelerate_draw; - const bool sync_vs = vs_uniform_block_data.dirty; - const bool sync_fs = fs_uniform_block_data.dirty; - if (!sync_vs_pica && !sync_vs && !sync_fs) { + const bool sync_vs_pica = accelerate_draw && pica.vs_setup.uniforms_dirty; + if (!sync_vs_pica && !vs_data_dirty && !fs_data_dirty) { return; } @@ -1108,30 +947,26 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { u32 used_bytes = 0; - if (sync_vs || invalidate) { - std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data, - sizeof(vs_uniform_block_data.data)); - + if (vs_data_dirty || invalidate) { + std::memcpy(uniforms + used_bytes, &vs_data, sizeof(vs_data)); pipeline_cache.UpdateRange(1, offset + used_bytes); - vs_uniform_block_data.dirty = false; + vs_data_dirty = false; used_bytes += uniform_size_aligned_vs; } - if (sync_fs || invalidate) { - std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data, - sizeof(fs_uniform_block_data.data)); - + if (fs_data_dirty || invalidate) { + std::memcpy(uniforms + used_bytes, &fs_data, sizeof(fs_data)); pipeline_cache.UpdateRange(2, offset + used_bytes); - fs_uniform_block_data.dirty = false; + fs_data_dirty = false; used_bytes += uniform_size_aligned_fs; } - if (sync_vs_pica) { + if (sync_vs_pica || invalidate) { VSPicaUniformData vs_uniforms; - vs_uniforms.uniforms.SetFromRegs(regs.vs, pica.vs_setup); + vs_uniforms.SetFromRegs(pica.vs_setup); std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); - pipeline_cache.UpdateRange(0, offset + used_bytes); + pica.vs_setup.uniforms_dirty = false; used_bytes += uniform_size_aligned_vs_pica; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 63337487c..1944a73f2 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -61,40 +61,9 @@ public: u32 pixel_stride, ScreenInfo& screen_info); bool AccelerateDrawBatch(bool is_indexed) override; - void SyncFixedState() override; - private: - void NotifyFixedFunctionPicaRegisterChanged(u32 id) override; - - /// Syncs the cull mode to match the PICA register - void SyncCullMode(); - - /// Syncs the blend enabled status to match the PICA register - void SyncBlendEnabled(); - - /// Syncs the blend functions to match the PICA register - void SyncBlendFuncs(); - - /// Syncs the blend color to match the PICA register - void SyncBlendColor(); - - /// Syncs the logic op states to match the PICA register - void SyncLogicOp(); - - /// Syncs the color write mask to match the PICA register state - void SyncColorWriteMask(); - - /// Syncs the stencil write mask to match the PICA register state - void SyncStencilWriteMask(); - - /// Syncs the depth write mask to match the PICA register state - void SyncDepthWriteMask(); - - /// Syncs the stencil test states to match the PICA register - void SyncStencilTest(); - - /// Syncs the depth test states to match the PICA register - void SyncDepthTest(); + /// Syncs pipeline state from PICA registers + void SyncDrawState(); /// Syncs and uploads the lighting, fog and proctex LUTs void SyncAndUploadLUTs(); diff --git a/src/video_core/renderer_vulkan/vk_render_manager.cpp b/src/video_core/renderer_vulkan/vk_render_manager.cpp index 968390120..7b986e6b2 100644 --- a/src/video_core/renderer_vulkan/vk_render_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_render_manager.cpp @@ -1,8 +1,7 @@ -// Copyright 2024 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include "common/assert.h" #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/renderer_vulkan/vk_instance.h" diff --git a/src/video_core/shader/generator/glsl_shader_decompiler.cpp b/src/video_core/shader/generator/glsl_shader_decompiler.cpp index 017e41a43..eca35131b 100644 --- a/src/video_core/shader/generator/glsl_shader_decompiler.cpp +++ b/src/video_core/shader/generator/glsl_shader_decompiler.cpp @@ -1,8 +1,7 @@ -// Copyright 2017 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include #include #include @@ -359,8 +358,8 @@ private: } /// Generates code representing a bool uniform - std::string GetUniformBool(u32 index) const { - return fmt::format("uniforms.b[{}]", index); + std::string GetUniformBool(u32 index, bool invert_test = false) const { + return fmt::format("(uniforms.b & {}u) {} 0u", 1 << index, invert_test ? "==" : "!="); } /** @@ -673,9 +672,8 @@ private: if (instr.opcode.Value() == OpCode::Id::JMPC) { condition = EvaluateCondition(instr.flow_control); } else { - bool invert_test = instr.flow_control.num_instructions & 1; - condition = (invert_test ? "!" : "") + - GetUniformBool(instr.flow_control.bool_uniform_id); + const bool invert_test = instr.flow_control.num_instructions & 1; + condition = GetUniformBool(instr.flow_control.bool_uniform_id, invert_test); } shader.AddLine("if ({}) {{", condition); diff --git a/src/video_core/shader/generator/glsl_shader_gen.cpp b/src/video_core/shader/generator/glsl_shader_gen.cpp index 64747a50f..154ff1908 100644 --- a/src/video_core/shader/generator/glsl_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_shader_gen.cpp @@ -15,19 +15,15 @@ using VSOutputAttributes = Pica::RasterizerRegs::VSOutputAttributes; namespace Pica::Shader::Generator::GLSL { constexpr std::string_view VSPicaUniformBlockDef = R"( -struct pica_uniforms { - bool b[16]; - uvec4 i[4]; - vec4 f[96]; -}; - #ifdef VULKAN layout (set = 0, binding = 0, std140) uniform vs_pica_data { #else layout (binding = 0, std140) uniform vs_pica_data { #endif - pica_uniforms uniforms; -}; + uint b; + uvec4 i[4]; + vec4 f[96]; +} uniforms; )"; constexpr std::string_view VSUniformBlockDef = R"( diff --git a/src/video_core/shader/generator/shader_uniforms.cpp b/src/video_core/shader/generator/shader_uniforms.cpp index f5e471015..fca83272d 100644 --- a/src/video_core/shader/generator/shader_uniforms.cpp +++ b/src/video_core/shader/generator/shader_uniforms.cpp @@ -1,26 +1,26 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include "video_core/pica/regs_shader.h" #include "video_core/pica/shader_setup.h" #include "video_core/shader/generator/shader_uniforms.h" namespace Pica::Shader::Generator { -void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs, const Pica::ShaderSetup& setup) { - std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools), - [](bool value) -> BoolAligned { return {value ? 1 : 0}; }); - std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i), - [](const auto& value) -> Common::Vec4u { - return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()}; - }); - std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f), - [](const auto& value) -> Common::Vec4f { - return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(), - value.w.ToFloat32()}; - }); +void VSPicaUniformData::SetFromRegs(const Pica::ShaderSetup& setup) { + b = 0; + for (u32 j = 0; j < setup.uniforms.b.size(); j++) { + b |= setup.uniforms.b[j] << j; + } + for (u32 j = 0; j < setup.uniforms.i.size(); j++) { + const auto& value = setup.uniforms.i[j]; + i[j] = Common::MakeVec(value.x, value.y, value.z, value.w); + } + for (u32 j = 0; j < setup.uniforms.f.size(); j++) { + const auto& value = setup.uniforms.f[j]; + f[j] = Common::MakeVec(value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(), + value.w.ToFloat32()); + } } } // namespace Pica::Shader::Generator diff --git a/src/video_core/shader/generator/shader_uniforms.h b/src/video_core/shader/generator/shader_uniforms.h index d68d0654e..0d3a963b6 100644 --- a/src/video_core/shader/generator/shader_uniforms.h +++ b/src/video_core/shader/generator/shader_uniforms.h @@ -8,7 +8,6 @@ #include "video_core/pica/regs_lighting.h" namespace Pica { -struct ShaderRegs; struct ShaderSetup; } // namespace Pica @@ -69,22 +68,6 @@ static_assert(sizeof(FSUniformData) == 0x530, static_assert(sizeof(FSUniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); -/** - * Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. - * NOTE: the same rule from UniformData also applies here. - */ -struct PicaUniformsData { - void SetFromRegs(const ShaderRegs& regs, const ShaderSetup& setup); - - struct BoolAligned { - alignas(16) int b; - }; - - std::array bools; - alignas(16) std::array i; - alignas(16) std::array f; -}; - struct VSUniformData { u32 enable_clip1; u32 flip_viewport; @@ -95,10 +78,18 @@ static_assert(sizeof(VSUniformData) == 32, static_assert(sizeof(VSUniformData) < 16384, "VSUniformData structure must be less than 16kb as per the OpenGL spec"); +/** + * Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms. + * NOTE: the same rule from UniformData also applies here. + */ struct VSPicaUniformData { - alignas(16) PicaUniformsData uniforms; + void SetFromRegs(const ShaderSetup& setup); + + u32 b; + alignas(16) std::array i; + alignas(16) std::array f; }; -static_assert(sizeof(VSPicaUniformData) == 1856, +static_assert(sizeof(VSPicaUniformData) == 1616, "The size of the VSPicaUniformData does not match the structure in the shader"); static_assert(sizeof(VSPicaUniformData) < 16384, "VSPicaUniformData structure must be less than 16kb as per the OpenGL spec");