From bf0b957c05013f33855e67c31a48e61b1e86d356 Mon Sep 17 00:00:00 2001 From: Feng Chen Date: Tue, 6 Dec 2022 13:45:26 +0800 Subject: [PATCH] video_core: Implement maxwell3d draw manager and split draw logic --- src/video_core/CMakeLists.txt | 2 + src/video_core/buffer_cache/buffer_cache.h | 32 +-- src/video_core/engines/draw_manager.cpp | 191 ++++++++++++++++++ src/video_core/engines/draw_manager.h | 69 +++++++ src/video_core/engines/maxwell_3d.cpp | 171 +--------------- src/video_core/engines/maxwell_3d.h | 25 +-- src/video_core/macro/macro_hle.cpp | 69 ++----- .../renderer_opengl/gl_rasterizer.cpp | 16 +- .../renderer_opengl/gl_shader_cache.cpp | 6 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 6 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- .../renderer_vulkan/vk_rasterizer.cpp | 18 +- 12 files changed, 341 insertions(+), 267 deletions(-) create mode 100644 src/video_core/engines/draw_manager.cpp create mode 100644 src/video_core/engines/draw_manager.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b9bad63ac..33bdae748 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -33,6 +33,8 @@ add_library(video_core STATIC engines/sw_blitter/converter.cpp engines/sw_blitter/converter.h engines/const_buffer_info.h + engines/draw_manager.cpp + engines/draw_manager.h engines/engine_interface.h engines/engine_upload.cpp engines/engine_upload.h diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6881b34c4..502b4d90a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -26,6 +26,7 @@ #include "video_core/control/channel_state_cache.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/dirty_flags.h" +#include "video_core/engines/draw_manager.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -664,9 +665,10 @@ void BufferCache

::BindHostGeometryBuffers(bool is_indexed) { if (is_indexed) { BindHostIndexBuffer(); } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { - const auto& regs = maxwell3d->regs; - if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { - runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { + runtime.BindQuadArrayIndexBuffer(draw_state.vertex_buffer.first, + draw_state.vertex_buffer.count); } } BindHostVertexBuffers(); @@ -993,28 +995,29 @@ void BufferCache

::BindHostIndexBuffer() { TouchBuffer(buffer, index_buffer.buffer_id); const u32 offset = buffer.Offset(index_buffer.cpu_addr); const u32 size = index_buffer.size; - if (maxwell3d->inline_index_draw_indexes.size()) { + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + if (!draw_state.inline_index_draw_indexes.empty()) { if constexpr (USE_MEMORY_MAPS) { auto upload_staging = runtime.UploadStagingBuffer(size); std::array copies{ {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; std::memcpy(upload_staging.mapped_span.data(), - maxwell3d->inline_index_draw_indexes.data(), size); + draw_state.inline_index_draw_indexes.data(), size); runtime.CopyBuffer(buffer, upload_staging.buffer, copies); } else { - buffer.ImmediateUpload(0, maxwell3d->inline_index_draw_indexes); + buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); } } else { SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); } if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { - const u32 new_offset = offset + maxwell3d->regs.index_buffer.first * - maxwell3d->regs.index_buffer.FormatSizeInBytes(); + const u32 new_offset = + offset + draw_state.index_buffer.first * draw_state.index_buffer.FormatSizeInBytes(); runtime.BindIndexBuffer(buffer, new_offset, size); } else { - runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_buffer.format, - maxwell3d->regs.index_buffer.first, - maxwell3d->regs.index_buffer.count, buffer, offset, size); + runtime.BindIndexBuffer(draw_state.topology, draw_state.index_buffer.format, + draw_state.index_buffer.first, draw_state.index_buffer.count, + buffer, offset, size); } } @@ -1282,15 +1285,16 @@ template void BufferCache

::UpdateIndexBuffer() { // We have to check for the dirty flags and index count // The index count is currently changed without updating the dirty flags - const auto& index_array = maxwell3d->regs.index_buffer; + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + const auto& index_array = draw_state.index_buffer; auto& flags = maxwell3d->dirty.flags; if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { return; } flags[Dirty::IndexBuffer] = false; last_index_count = index_array.count; - if (maxwell3d->inline_index_draw_indexes.size()) { - auto inline_index_size = static_cast(maxwell3d->inline_index_draw_indexes.size()); + if (!draw_state.inline_index_draw_indexes.empty()) { + auto inline_index_size = static_cast(draw_state.inline_index_draw_indexes.size()); index_buffer = Binding{ .cpu_addr = 0, .size = inline_index_size, diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp new file mode 100644 index 000000000..c59524e58 --- /dev/null +++ b/src/video_core/engines/draw_manager.cpp @@ -0,0 +1,191 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/dirty_flags.h" +#include "video_core/engines/draw_manager.h" +#include "video_core/rasterizer_interface.h" + +namespace Tegra::Engines { +DrawManager::DrawManager(Maxwell3D* maxwell3d_) : maxwell3d(maxwell3d_) {} + +void DrawManager::ProcessMethodCall(u32 method, u32 argument) { + const auto& regs{maxwell3d->regs}; + switch (method) { + case MAXWELL3D_REG_INDEX(clear_surface): + return Clear(1); + case MAXWELL3D_REG_INDEX(draw.begin): + return DrawBegin(); + case MAXWELL3D_REG_INDEX(draw.end): + return DrawEnd(); + case MAXWELL3D_REG_INDEX(vertex_buffer.first): + case MAXWELL3D_REG_INDEX(vertex_buffer.count): + case MAXWELL3D_REG_INDEX(index_buffer.first): + break; + case MAXWELL3D_REG_INDEX(index_buffer.count): + draw_state.draw_indexed = true; + break; + case MAXWELL3D_REG_INDEX(index_buffer32_subsequent): + case MAXWELL3D_REG_INDEX(index_buffer16_subsequent): + case MAXWELL3D_REG_INDEX(index_buffer8_subsequent): + draw_state.instance_count++; + [[fallthrough]]; + case MAXWELL3D_REG_INDEX(index_buffer32_first): + case MAXWELL3D_REG_INDEX(index_buffer16_first): + case MAXWELL3D_REG_INDEX(index_buffer8_first): + return DrawIndexSmall(argument); + case MAXWELL3D_REG_INDEX(draw_inline_index): + SetInlineIndexBuffer(argument); + break; + case MAXWELL3D_REG_INDEX(inline_index_2x16.even): + SetInlineIndexBuffer(regs.inline_index_2x16.even); + SetInlineIndexBuffer(regs.inline_index_2x16.odd); + break; + case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): + SetInlineIndexBuffer(regs.inline_index_4x8.index0); + SetInlineIndexBuffer(regs.inline_index_4x8.index1); + SetInlineIndexBuffer(regs.inline_index_4x8.index2); + SetInlineIndexBuffer(regs.inline_index_4x8.index3); + break; + case MAXWELL3D_REG_INDEX(topology_override): + use_topology_override = true; + break; + default: + break; + } +} + +void DrawManager::Clear(u32 layer_count) { + maxwell3d->rasterizer->Clear(layer_count); +} + +void DrawManager::DrawDeferred() { + if (draw_state.draw_mode != DrawMode::Instance || draw_state.instance_count == 0) + return; + DrawEnd(draw_state.instance_count + 1, true); + draw_state.instance_count = 0; +} + +void DrawManager::DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, + u32 base_instance, u32 num_instances) { + draw_state.topology = topology; + draw_state.vertex_buffer.first = vertex_first; + draw_state.vertex_buffer.count = vertex_count; + draw_state.base_instance = base_instance; + ProcessDraw(false, num_instances); +} + +void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, + u32 base_index, u32 base_instance, u32 num_instances) { + const auto& regs{maxwell3d->regs}; + draw_state.topology = topology; + draw_state.index_buffer = regs.index_buffer; + draw_state.index_buffer.first = index_first; + draw_state.index_buffer.count = index_count; + draw_state.base_index = base_index; + draw_state.base_instance = base_instance; + ProcessDraw(true, num_instances); +} + +void DrawManager::SetInlineIndexBuffer(u32 index) { + draw_state.inline_index_draw_indexes.push_back(static_cast(index & 0x000000ff)); + draw_state.inline_index_draw_indexes.push_back(static_cast((index & 0x0000ff00) >> 8)); + draw_state.inline_index_draw_indexes.push_back(static_cast((index & 0x00ff0000) >> 16)); + draw_state.inline_index_draw_indexes.push_back(static_cast((index & 0xff000000) >> 24)); + draw_state.draw_mode = DrawMode::InlineIndex; +} + +void DrawManager::DrawBegin() { + const auto& regs{maxwell3d->regs}; + auto reset_instance_count = regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::First; + auto increment_instance_count = + regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent; + if (reset_instance_count) { + DrawDeferred(); + draw_state.instance_count = 0; + draw_state.draw_mode = DrawMode::General; + } else if (increment_instance_count) { + draw_state.instance_count++; + draw_state.draw_mode = DrawMode::Instance; + } + + draw_state.topology = regs.draw.topology; +} + +void DrawManager::DrawEnd(u32 instance_count, bool force_draw) { + const auto& regs{maxwell3d->regs}; + switch (draw_state.draw_mode) { + case DrawMode::Instance: + if (!force_draw) + break; + [[fallthrough]]; + case DrawMode::General: + draw_state.base_instance = regs.global_base_instance_index; + draw_state.base_index = regs.global_base_vertex_index; + if (draw_state.draw_indexed) { + draw_state.index_buffer = regs.index_buffer; + ProcessDraw(true, instance_count); + } else { + draw_state.vertex_buffer = regs.vertex_buffer; + ProcessDraw(false, instance_count); + } + draw_state.draw_indexed = false; + break; + case DrawMode::InlineIndex: + draw_state.base_instance = regs.global_base_instance_index; + draw_state.base_index = regs.global_base_vertex_index; + draw_state.index_buffer = regs.index_buffer; + draw_state.index_buffer.count = + static_cast(draw_state.inline_index_draw_indexes.size() / 4); + draw_state.index_buffer.format = Maxwell3D::Regs::IndexFormat::UnsignedInt; + ProcessDraw(true, instance_count); + draw_state.inline_index_draw_indexes.clear(); + break; + } +} + +void DrawManager::DrawIndexSmall(u32 argument) { + const auto& regs{maxwell3d->regs}; + IndexBufferSmall index_small_params{argument}; + draw_state.base_instance = regs.global_base_instance_index; + draw_state.base_index = regs.global_base_vertex_index; + draw_state.index_buffer = regs.index_buffer; + draw_state.index_buffer.first = index_small_params.first; + draw_state.index_buffer.count = index_small_params.count; + draw_state.topology = index_small_params.topology; + maxwell3d->dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + ProcessDraw(true, 1); +} + +void DrawManager::ProcessTopologyOverride() { + if (!use_topology_override) + return; + + const auto& regs{maxwell3d->regs}; + switch (regs.topology_override) { + case PrimitiveTopologyOverride::None: + break; + case PrimitiveTopologyOverride::Points: + draw_state.topology = PrimitiveTopology::Points; + break; + case PrimitiveTopologyOverride::Lines: + draw_state.topology = PrimitiveTopology::Lines; + break; + case PrimitiveTopologyOverride::LineStrip: + draw_state.topology = PrimitiveTopology::LineStrip; + break; + default: + draw_state.topology = static_cast(regs.topology_override); + break; + } +} + +void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) { + LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology.Value(), + draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count); + + ProcessTopologyOverride(); + + if (maxwell3d->ShouldExecute()) + maxwell3d->rasterizer->Draw(draw_indexed, instance_count); +} +} // namespace Tegra::Engines diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h new file mode 100644 index 000000000..4f67027ca --- /dev/null +++ b/src/video_core/engines/draw_manager.h @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace Tegra::Engines { +using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; +using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; +using IndexBuffer = Maxwell3D::Regs::IndexBuffer; +using VertexBuffer = Maxwell3D::Regs::VertexBuffer; +using IndexBufferSmall = Maxwell3D::Regs::IndexBufferSmall; + +class DrawManager { +public: + enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; + struct State { + PrimitiveTopology topology{}; + DrawMode draw_mode{}; + bool draw_indexed{}; + u32 base_index{}; + VertexBuffer vertex_buffer; + IndexBuffer index_buffer; + u32 base_instance{}; + u32 instance_count{}; + std::vector inline_index_draw_indexes; + }; + + explicit DrawManager(Maxwell3D* maxwell_3d); + + void ProcessMethodCall(u32 method, u32 argument); + + void Clear(u32 layer_count); + + void DrawDeferred(); + + void DrawArray(PrimitiveTopology topology, u32 vertex_first, u32 vertex_count, + u32 base_instance, u32 num_instances); + + void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, + u32 base_instance, u32 num_instances); + + const State& GetDrawState() const { + return draw_state; + } + +private: + void SetInlineIndexBuffer(u32 index); + + void DrawBegin(); + + void DrawEnd(u32 instance_count = 1, bool force_draw = false); + + void DrawIndexSmall(u32 argument); + + void ProcessTopologyOverride(); + + void ProcessDraw(bool draw_indexed, u32 instance_count); + + Maxwell3D* maxwell3d{}; + State draw_state{}; + bool use_topology_override{}; +}; +} // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 34bbc72cf..d4ef8d7c5 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,6 +7,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "video_core/dirty_flags.h" +#include "video_core/engines/draw_manager.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -21,8 +22,10 @@ using VideoCore::QueryType; constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) - : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, - upload_state{memory_manager, regs.upload} { + : draw_manager{std::make_unique(this)}, system{system_}, + memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{ + memory_manager, + regs.upload} { dirty.flags.flip(); InitializeRegisterDefaults(); } @@ -116,16 +119,6 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill; shadow_state = regs; - - draw_command[MAXWELL3D_REG_INDEX(draw.end)] = true; - draw_command[MAXWELL3D_REG_INDEX(draw.begin)] = true; - draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.first)] = true; - draw_command[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; - draw_command[MAXWELL3D_REG_INDEX(index_buffer.first)] = true; - draw_command[MAXWELL3D_REG_INDEX(index_buffer.count)] = true; - draw_command[MAXWELL3D_REG_INDEX(draw_inline_index)] = true; - draw_command[MAXWELL3D_REG_INDEX(inline_index_2x16.even)] = true; - draw_command[MAXWELL3D_REG_INDEX(inline_index_4x8.index0)] = true; } void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { @@ -213,29 +206,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return ProcessCBBind(3); case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config): return ProcessCBBind(4); - case MAXWELL3D_REG_INDEX(index_buffer32_first): - regs.index_buffer.count = regs.index_buffer32_first.count; - regs.index_buffer.first = regs.index_buffer32_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - draw_indexed = true; - return ProcessDraw(); - case MAXWELL3D_REG_INDEX(index_buffer16_first): - regs.index_buffer.count = regs.index_buffer16_first.count; - regs.index_buffer.first = regs.index_buffer16_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - draw_indexed = true; - return ProcessDraw(); - case MAXWELL3D_REG_INDEX(index_buffer8_first): - regs.index_buffer.count = regs.index_buffer8_first.count; - regs.index_buffer.first = regs.index_buffer8_first.first; - dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - draw_indexed = true; - return ProcessDraw(); - case MAXWELL3D_REG_INDEX(topology_override): - use_topology_override = true; - return; - case MAXWELL3D_REG_INDEX(clear_surface): - return ProcessClearBuffers(1); case MAXWELL3D_REG_INDEX(report_semaphore.query): return ProcessQueryGet(); case MAXWELL3D_REG_INDEX(render_enable.mode): @@ -254,6 +224,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return rasterizer->FragmentBarrier(); case MAXWELL3D_REG_INDEX(tiled_cache_barrier): return rasterizer->TiledCacheBarrier(); + default: + draw_manager->ProcessMethodCall(method, argument); + break; } } @@ -268,7 +241,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector& parameters) // Execute the current macro. macro_engine->Execute(macro_positions[entry], parameters); - ProcessDeferredDraw(); + draw_manager->DrawDeferred(); } void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { @@ -291,62 +264,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { const u32 argument = ProcessShadowRam(method, method_argument); ProcessDirtyRegisters(method, argument); - if (draw_command[method]) { - regs.reg_array[method] = method_argument; - deferred_draw_method.push_back(method); - auto update_inline_index = [&](const u32 index) { - inline_index_draw_indexes.push_back(static_cast(index & 0x000000ff)); - inline_index_draw_indexes.push_back(static_cast((index & 0x0000ff00) >> 8)); - inline_index_draw_indexes.push_back(static_cast((index & 0x00ff0000) >> 16)); - inline_index_draw_indexes.push_back(static_cast((index & 0xff000000) >> 24)); - draw_mode = DrawMode::InlineIndex; - }; - switch (method) { - case MAXWELL3D_REG_INDEX(draw.begin): { - draw_mode = - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Subsequent) || - (regs.draw.instance_id == Maxwell3D::Regs::Draw::InstanceId::Unchanged) - ? DrawMode::Instance - : DrawMode::General; - break; - } - case MAXWELL3D_REG_INDEX(draw.end): - switch (draw_mode) { - case DrawMode::General: - ProcessDraw(); - break; - case DrawMode::InlineIndex: - regs.index_buffer.count = static_cast(inline_index_draw_indexes.size() / 4); - regs.index_buffer.format = Regs::IndexFormat::UnsignedInt; - draw_indexed = true; - ProcessDraw(); - inline_index_draw_indexes.clear(); - break; - case DrawMode::Instance: - break; - } - break; - case MAXWELL3D_REG_INDEX(index_buffer.count): - draw_indexed = true; - break; - case MAXWELL3D_REG_INDEX(draw_inline_index): - update_inline_index(method_argument); - break; - case MAXWELL3D_REG_INDEX(inline_index_2x16.even): - update_inline_index(regs.inline_index_2x16.even); - update_inline_index(regs.inline_index_2x16.odd); - break; - case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): - update_inline_index(regs.inline_index_4x8.index0); - update_inline_index(regs.inline_index_4x8.index1); - update_inline_index(regs.inline_index_4x8.index2); - update_inline_index(regs.inline_index_4x8.index3); - break; - } - } else { - ProcessDeferredDraw(); - ProcessMethodCall(method, argument, method_argument, is_last_call); - } + ProcessMethodCall(method, argument, method_argument, is_last_call); } void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, @@ -387,35 +305,6 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, } } -void Maxwell3D::ProcessTopologyOverride() { - using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; - using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; - - PrimitiveTopology topology{}; - - switch (regs.topology_override) { - case PrimitiveTopologyOverride::None: - topology = regs.draw.topology; - break; - case PrimitiveTopologyOverride::Points: - topology = PrimitiveTopology::Points; - break; - case PrimitiveTopologyOverride::Lines: - topology = PrimitiveTopology::Lines; - break; - case PrimitiveTopologyOverride::LineStrip: - topology = PrimitiveTopology::LineStrip; - break; - default: - topology = static_cast(regs.topology_override); - break; - } - - if (use_topology_override) { - regs.draw.topology.Assign(topology); - } -} - void Maxwell3D::ProcessMacroUpload(u32 data) { macro_engine->AddCode(regs.load_mme.instruction_ptr++, data); } @@ -625,44 +514,4 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { return regs.reg_array[method]; } -void Maxwell3D::ProcessClearBuffers(u32 layer_count) { - rasterizer->Clear(layer_count); -} - -void Maxwell3D::ProcessDraw(u32 instance_count) { - LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), - draw_indexed ? regs.index_buffer.count : regs.vertex_buffer.count); - - ProcessTopologyOverride(); - - if (ShouldExecute()) { - rasterizer->Draw(draw_indexed, instance_count); - } - - draw_indexed = false; - deferred_draw_method.clear(); -} - -void Maxwell3D::ProcessDeferredDraw() { - if (draw_mode != DrawMode::Instance || deferred_draw_method.empty()) { - return; - } - - const auto method_count = deferred_draw_method.size(); - u32 instance_count = 1; - u32 vertex_buffer_count = 0; - u32 index_buffer_count = 0; - for (size_t index = 0; index < method_count; ++index) { - const u32 method = deferred_draw_method[index]; - if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count)) { - instance_count = ++vertex_buffer_count; - } else if (method == MAXWELL3D_REG_INDEX(index_buffer.count)) { - instance_count = ++index_buffer_count; - } - } - ASSERT_MSG(!(vertex_buffer_count && index_buffer_count), "Instance both indexed and direct?"); - - ProcessDraw(instance_count); -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index a541cd95f..22b904319 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -37,6 +37,8 @@ class RasterizerInterface; namespace Tegra::Engines { +class DrawManager; + /** * This Engine is known as GF100_3D. Documentation can be found in: * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/3d/clb197.h @@ -2223,6 +2225,7 @@ public: struct IndexBufferSmall { union { + u32 raw; BitField<0, 16, u32> first; BitField<16, 12, u32> count; BitField<28, 4, PrimitiveTopology> topology; @@ -3061,10 +3064,8 @@ public: Tables tables{}; } dirty; - std::vector inline_index_draw_indexes; - - /// Handles a write to the CLEAR_BUFFERS register. - void ProcessClearBuffers(u32 layer_count); + std::unique_ptr draw_manager; + friend class DrawManager; private: void InitializeRegisterDefaults(); @@ -3122,15 +3123,6 @@ private: /// Handles a write to the CB_BIND register. void ProcessCBBind(size_t stage_index); - /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) - void ProcessTopologyOverride(); - - /// Handles deferred draw(e.g., instance draw). - void ProcessDeferredDraw(); - - /// Handles a draw. - void ProcessDraw(u32 instance_count = 1); - /// Returns a query's value or an empty object if the value will be deferred through a cache. std::optional GetQueryResult(); @@ -3153,13 +3145,6 @@ private: Upload::State upload_state; bool execute_on{true}; - bool use_topology_override{false}; - - std::array draw_command{}; - std::vector deferred_draw_method; - enum class DrawMode : u32 { General = 0, Instance, InlineIndex }; - DrawMode draw_mode{DrawMode::General}; - bool draw_indexed{}; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 0f3262edb..8549db2e4 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -5,6 +5,7 @@ #include #include "common/scope_exit.h" #include "video_core/dirty_flags.h" +#include "video_core/engines/draw_manager.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/macro/macro.h" #include "video_core/macro/macro_hle.h" @@ -18,57 +19,33 @@ using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); - - maxwell3d.regs.draw.topology.Assign( - static_cast(parameters[0] & 0x3ffffff)); - maxwell3d.regs.global_base_instance_index = parameters[5]; - maxwell3d.regs.global_base_vertex_index = parameters[3]; - maxwell3d.regs.index_buffer.count = parameters[1]; - maxwell3d.regs.index_buffer.first = parameters[4]; - - if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, instance_count); - } - maxwell3d.regs.index_buffer.count = 0; + maxwell3d.draw_manager->DrawIndex( + static_cast(parameters[0] & 0x3ffffff), + parameters[4], parameters[1], parameters[3], parameters[5], instance_count); } void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); - - maxwell3d.regs.vertex_buffer.first = parameters[3]; - maxwell3d.regs.vertex_buffer.count = parameters[1]; - maxwell3d.regs.global_base_instance_index = parameters[4]; - maxwell3d.regs.draw.topology.Assign( - static_cast(parameters[0])); - - if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(false, instance_count); - } - maxwell3d.regs.vertex_buffer.count = 0; + maxwell3d.draw_manager->DrawArray( + static_cast(parameters[0]), + parameters[3], parameters[1], parameters[4], instance_count); } void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector& parameters) { const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); const u32 element_base = parameters[4]; const u32 base_instance = parameters[5]; - maxwell3d.regs.index_buffer.first = parameters[3]; maxwell3d.regs.vertex_id_base = element_base; - maxwell3d.regs.index_buffer.count = parameters[1]; maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.regs.global_base_vertex_index = element_base; - maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.CallMethod(0x8e3, 0x640, true); maxwell3d.CallMethod(0x8e4, element_base, true); maxwell3d.CallMethod(0x8e5, base_instance, true); - maxwell3d.regs.draw.topology.Assign( - static_cast(parameters[0])); - if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, instance_count); - } + + maxwell3d.draw_manager->DrawIndex( + static_cast(parameters[0]), + parameters[3], parameters[1], element_base, base_instance, instance_count); + maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.regs.index_buffer.count = 0; - maxwell3d.regs.global_base_vertex_index = 0x0; - maxwell3d.regs.global_base_instance_index = 0x0; maxwell3d.CallMethod(0x8e3, 0x640, true); maxwell3d.CallMethod(0x8e4, 0x0, true); maxwell3d.CallMethod(0x8e5, 0x0, true); @@ -79,9 +56,6 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector& SCOPE_EXIT({ // Clean everything. maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.regs.index_buffer.count = 0; - maxwell3d.regs.global_base_vertex_index = 0x0; - maxwell3d.regs.global_base_instance_index = 0x0; maxwell3d.CallMethod(0x8e3, 0x640, true); maxwell3d.CallMethod(0x8e4, 0x0, true); maxwell3d.CallMethod(0x8e5, 0x0, true); @@ -93,9 +67,6 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector& // Nothing to do. return; } - const auto topology = - static_cast(parameters[2]); - maxwell3d.regs.draw.topology.Assign(topology); const u32 padding = parameters[3]; const std::size_t max_draws = parameters[4]; @@ -106,23 +77,17 @@ void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector& for (std::size_t index = first_draw; index < last_draw; index++) { const std::size_t base = index * indirect_words + 5; - const u32 num_vertices = parameters[base]; - const u32 instance_count = parameters[base + 1]; - const u32 first_index = parameters[base + 2]; const u32 base_vertex = parameters[base + 3]; const u32 base_instance = parameters[base + 4]; - maxwell3d.regs.index_buffer.first = first_index; maxwell3d.regs.vertex_id_base = base_vertex; - maxwell3d.regs.index_buffer.count = num_vertices; - maxwell3d.regs.global_base_vertex_index = base_vertex; - maxwell3d.regs.global_base_instance_index = base_instance; maxwell3d.CallMethod(0x8e3, 0x640, true); maxwell3d.CallMethod(0x8e4, base_vertex, true); maxwell3d.CallMethod(0x8e5, base_instance, true); maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - if (maxwell3d.ShouldExecute()) { - maxwell3d.Rasterizer().Draw(true, instance_count); - } + maxwell3d.draw_manager->DrawIndex( + static_cast(parameters[2]), + parameters[base + 2], parameters[base], base_vertex, base_instance, + parameters[base + 1]); } } @@ -136,7 +101,7 @@ void HLE_EAD26C3E2109B06B(Engines::Maxwell3D& maxwell3d, const std::vector& ASSERT(clear_params.layer == 0); maxwell3d.regs.clear_surface.raw = clear_params.raw; - maxwell3d.ProcessClearBuffers(num_layers); + maxwell3d.draw_manager->Clear(num_layers); } constexpr std::array, 5> hle_funcs{{ diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f71a316b6..64ed6f628 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -224,16 +224,18 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) { SyncState(); - const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d->regs.draw.topology); + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology); BeginTransformFeedback(pipeline, primitive_mode); - const GLuint base_instance = static_cast(maxwell3d->regs.global_base_instance_index); + const GLuint base_instance = static_cast(draw_state.base_instance); const GLsizei num_instances = static_cast(instance_count); if (is_indexed) { - const GLint base_vertex = static_cast(maxwell3d->regs.global_base_vertex_index); - const GLsizei num_vertices = static_cast(maxwell3d->regs.index_buffer.count); + const GLint base_vertex = static_cast(draw_state.base_index); + const GLsizei num_vertices = static_cast(draw_state.index_buffer.count); const GLvoid* const offset = buffer_cache_runtime.IndexOffset(); - const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format); + const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format); if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { glDrawElements(primitive_mode, num_vertices, format, offset); } else if (num_instances == 1 && base_instance == 0) { @@ -252,8 +254,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) { base_instance); } } else { - const GLint base_vertex = static_cast(maxwell3d->regs.vertex_buffer.first); - const GLsizei num_vertices = static_cast(maxwell3d->regs.vertex_buffer.count); + const GLint base_vertex = static_cast(draw_state.vertex_buffer.first); + const GLsizei num_vertices = static_cast(draw_state.vertex_buffer.count); if (num_instances == 1 && base_instance == 0) { glDrawArrays(primitive_mode, base_vertex, num_vertices); } else if (base_instance == 0) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index a38060100..a59d0d24e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,6 +22,7 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/profile.h" +#include "video_core/engines/draw_manager.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -327,7 +328,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { const auto& regs{maxwell3d->regs}; graphics_key.raw = 0; graphics_key.early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0); - graphics_key.gs_input_topology.Assign(regs.draw.topology.Value()); + graphics_key.gs_input_topology.Assign(maxwell3d->draw_manager->GetDrawState().topology); graphics_key.tessellation_primitive.Assign(regs.tessellation.params.domain_type.Value()); graphics_key.tessellation_spacing.Assign(regs.tessellation.params.spacing.Value()); graphics_key.tessellation_clockwise.Assign( @@ -371,7 +372,8 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n // If games are using a small index count, we can assume these are full screen quads. // Usually these shaders are only used once for building textures so we can assume they // can't be built async - if (maxwell3d->regs.index_buffer.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) { + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) { return pipeline; } return nullptr; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index f3f08b42c..24529c80f 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -8,6 +8,7 @@ #include "common/cityhash.h" #include "common/common_types.h" #include "common/polyfill_ranges.h" +#include "video_core/engines/draw_manager.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" @@ -50,12 +51,13 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { const Maxwell& regs = maxwell3d.regs; + const auto topology_ = maxwell3d.draw_manager->GetDrawState().topology; const std::array enabled_lut{ regs.polygon_offset_point_enable, regs.polygon_offset_line_enable, regs.polygon_offset_fill_enable, }; - const u32 topology_index = static_cast(regs.draw.topology.Value()); + const u32 topology_index = static_cast(topology_); raw1 = 0; extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); @@ -78,7 +80,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, Maxwell::Tessellation::OutputPrimitives::Triangles_CW); logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.op)); - topology.Assign(regs.draw.topology); + topology.Assign(topology_); msaa_mode.Assign(regs.anti_alias_samples_mode); raw2 = 0; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 38a6b7488..81f5f3e11 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -507,7 +507,8 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const // If games are using a small index count, we can assume these are full screen quads. // Usually these shaders are only used once for building textures so we can assume they // can't be built async - if (maxwell3d->regs.index_buffer.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) { + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) { return pipeline; } return nullptr; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d8ad8815c..8d7a5e400 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -12,6 +12,7 @@ #include "common/scope_exit.h" #include "common/settings.h" #include "video_core/control/channel_state.h" +#include "video_core/engines/draw_manager.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/blit_image.h" @@ -36,6 +37,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using MaxwellDrawState = Tegra::Engines::DrawManager::State; using VideoCommon::ImageViewId; using VideoCommon::ImageViewType; @@ -127,16 +129,16 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3 return scissor; } -DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_indexed) { +DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed) { DrawParams params{ - .base_instance = regs.global_base_instance_index, + .base_instance = draw_state.base_instance, .num_instances = num_instances, - .base_vertex = is_indexed ? regs.global_base_vertex_index : regs.vertex_buffer.first, - .num_vertices = is_indexed ? regs.index_buffer.count : regs.vertex_buffer.count, - .first_index = is_indexed ? regs.index_buffer.first : 0, + .base_vertex = is_indexed ? draw_state.base_index : draw_state.vertex_buffer.first, + .num_vertices = is_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count, + .first_index = is_indexed ? draw_state.index_buffer.first : 0, .is_indexed = is_indexed, }; - if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { + if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { // 6 triangle vertices per quad, base vertex is part of the index // See BindQuadArrayIndexBuffer for more details params.num_vertices = (params.num_vertices / 4) * 6; @@ -195,9 +197,9 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { UpdateDynamicStates(); - const auto& regs{maxwell3d->regs}; + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const u32 num_instances{instance_count}; - const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_indexed)}; + const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,