MacroHLE: Refactor MacroHLE system.
This commit is contained in:
parent
0f89828073
commit
c541559767
|
@ -1574,7 +1574,11 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
|
||||||
if (!is_async) {
|
if (!is_async) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
uncommitted_ranges.add(base_interval);
|
const bool is_high_accuracy =
|
||||||
|
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
|
||||||
|
if (is_high_accuracy) {
|
||||||
|
uncommitted_ranges.add(base_interval);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
|
@ -94,10 +94,10 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
||||||
|
|
||||||
if (dma_state.method_count) {
|
if (dma_state.method_count) {
|
||||||
// Data word of methods command
|
// Data word of methods command
|
||||||
|
dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
|
||||||
if (dma_state.non_incrementing) {
|
if (dma_state.non_incrementing) {
|
||||||
const u32 max_write = static_cast<u32>(
|
const u32 max_write = static_cast<u32>(
|
||||||
std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
|
std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
|
||||||
dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
|
|
||||||
CallMultiMethod(&command_header.argument, max_write);
|
CallMultiMethod(&command_header.argument, max_write);
|
||||||
dma_state.method_count -= max_write;
|
dma_state.method_count -= max_write;
|
||||||
dma_state.is_last_call = true;
|
dma_state.is_last_call = true;
|
||||||
|
@ -133,6 +133,8 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
|
||||||
case SubmissionMode::Inline:
|
case SubmissionMode::Inline:
|
||||||
dma_state.method = command_header.method;
|
dma_state.method = command_header.method;
|
||||||
dma_state.subchannel = command_header.subchannel;
|
dma_state.subchannel = command_header.subchannel;
|
||||||
|
dma_state.dma_word_offset = static_cast<u64>(
|
||||||
|
-static_cast<s64>(dma_state.dma_get)); // negate to set address as 0
|
||||||
CallMethod(command_header.arg_count);
|
CallMethod(command_header.arg_count);
|
||||||
dma_state.non_incrementing = true;
|
dma_state.non_incrementing = true;
|
||||||
dma_increment_once = false;
|
dma_increment_once = false;
|
||||||
|
@ -165,8 +167,9 @@ void DmaPusher::CallMethod(u32 argument) const {
|
||||||
dma_state.method_count,
|
dma_state.method_count,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
|
auto subchannel = subchannels[dma_state.subchannel];
|
||||||
dma_state.is_last_call);
|
subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
|
||||||
|
subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -157,7 +157,7 @@ private:
|
||||||
u32 method_count; ///< Current method count
|
u32 method_count; ///< Current method count
|
||||||
u32 length_pending; ///< Large NI command length pending
|
u32 length_pending; ///< Large NI command length pending
|
||||||
GPUVAddr dma_get; ///< Currently read segment
|
GPUVAddr dma_get; ///< Currently read segment
|
||||||
u32 dma_word_offset; ///< Current word ofset from address
|
u64 dma_word_offset; ///< Current word ofset from address
|
||||||
bool non_incrementing; ///< Current command's NI flag
|
bool non_incrementing; ///< Current command's NI flag
|
||||||
bool is_last_call;
|
bool is_last_call;
|
||||||
};
|
};
|
||||||
|
|
|
@ -91,6 +91,12 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
|
||||||
ProcessDraw(true, num_instances);
|
ProcessDraw(true, num_instances);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
|
||||||
|
draw_state.topology = topology;
|
||||||
|
|
||||||
|
ProcessDrawIndirect(true);
|
||||||
|
}
|
||||||
|
|
||||||
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) {
|
void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) {
|
||||||
const auto& regs{maxwell3d->regs};
|
const auto& regs{maxwell3d->regs};
|
||||||
draw_state.topology = topology;
|
draw_state.topology = topology;
|
||||||
|
|
|
@ -56,6 +56,8 @@ public:
|
||||||
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
|
void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
|
||||||
u32 base_instance, u32 num_instances);
|
u32 base_instance, u32 num_instances);
|
||||||
|
|
||||||
|
void DrawArrayIndirect(PrimitiveTopology topology);
|
||||||
|
|
||||||
void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
|
void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
|
||||||
|
|
||||||
const State& GetDrawState() const {
|
const State& GetDrawState() const {
|
||||||
|
|
|
@ -133,15 +133,52 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
||||||
for (size_t i = 0; i < amount; i++) {
|
for (size_t i = 0; i < amount; i++) {
|
||||||
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
|
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
|
||||||
}
|
}
|
||||||
|
macro_segments.emplace_back(current_dma_segment, amount);
|
||||||
|
|
||||||
// Call the macro when there are no more parameters in the command buffer
|
// Call the macro when there are no more parameters in the command buffer
|
||||||
if (is_last_call) {
|
if (is_last_call) {
|
||||||
CallMacroMethod(executing_macro, macro_params);
|
CallMacroMethod(executing_macro, macro_params);
|
||||||
macro_params.clear();
|
macro_params.clear();
|
||||||
macro_addresses.clear();
|
macro_addresses.clear();
|
||||||
|
macro_segments.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Maxwell3D::RefreshParameters() {
|
||||||
|
size_t current_index = 0;
|
||||||
|
for (auto& segment : macro_segments) {
|
||||||
|
if (segment.first == 0) {
|
||||||
|
current_index += segment.second;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
memory_manager.ReadBlock(segment.first, ¯o_params[current_index],
|
||||||
|
sizeof(u32) * segment.second);
|
||||||
|
current_index += segment.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 Maxwell3D::GetMaxCurrentVertices() {
|
||||||
|
u32 num_vertices = 0;
|
||||||
|
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
|
||||||
|
const auto& array = regs.vertex_streams[index];
|
||||||
|
if (array.enable == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto& attribute = regs.vertex_attrib_format[index];
|
||||||
|
if (attribute.constant) {
|
||||||
|
num_vertices = std::max(num_vertices, 1U);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto& limit = regs.vertex_stream_limits[index];
|
||||||
|
const GPUVAddr gpu_addr_begin = array.Address();
|
||||||
|
const GPUVAddr gpu_addr_end = limit.Address() + 1;
|
||||||
|
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
||||||
|
num_vertices = std::max(
|
||||||
|
num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value()));
|
||||||
|
}
|
||||||
|
return num_vertices;
|
||||||
|
}
|
||||||
|
|
||||||
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
|
||||||
// Keep track of the register value in shadow_state when requested.
|
// Keep track of the register value in shadow_state when requested.
|
||||||
const auto control = shadow_state.shadow_ram_control;
|
const auto control = shadow_state.shadow_ram_control;
|
||||||
|
|
|
@ -3068,10 +3068,14 @@ public:
|
||||||
friend class DrawManager;
|
friend class DrawManager;
|
||||||
|
|
||||||
std::vector<u8> inline_index_draw_indexes;
|
std::vector<u8> inline_index_draw_indexes;
|
||||||
std::vector<GPUVAddr> macro_addresses;
|
|
||||||
|
|
||||||
Core::System& system;
|
GPUVAddr getMacroAddress(size_t index) const {
|
||||||
MemoryManager& memory_manager;
|
return macro_addresses[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
void RefreshParameters();
|
||||||
|
|
||||||
|
u32 GetMaxCurrentVertices();
|
||||||
|
|
||||||
/// Handles a write to the CLEAR_BUFFERS register.
|
/// Handles a write to the CLEAR_BUFFERS register.
|
||||||
void ProcessClearBuffers(u32 layer_count);
|
void ProcessClearBuffers(u32 layer_count);
|
||||||
|
@ -3135,6 +3139,9 @@ private:
|
||||||
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
||||||
std::optional<u64> GetQueryResult();
|
std::optional<u64> GetQueryResult();
|
||||||
|
|
||||||
|
Core::System& system;
|
||||||
|
MemoryManager& memory_manager;
|
||||||
|
|
||||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||||
|
|
||||||
/// Start offsets of each macro in macro_memory
|
/// Start offsets of each macro in macro_memory
|
||||||
|
@ -3151,6 +3158,14 @@ private:
|
||||||
Upload::State upload_state;
|
Upload::State upload_state;
|
||||||
|
|
||||||
bool execute_on{true};
|
bool execute_on{true};
|
||||||
|
|
||||||
|
std::array<bool, Regs::NUM_REGS> draw_command{};
|
||||||
|
std::vector<u32> deferred_draw_method;
|
||||||
|
enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
|
||||||
|
DrawMode draw_mode{DrawMode::General};
|
||||||
|
bool draw_indexed{};
|
||||||
|
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
|
||||||
|
std::vector<GPUVAddr> macro_addresses;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ASSERT_REG_POSITION(field_name, position) \
|
#define ASSERT_REG_POSITION(field_name, position) \
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include "common/fs/fs.h"
|
#include "common/fs/fs.h"
|
||||||
#include "common/fs/path_util.h"
|
#include "common/fs/path_util.h"
|
||||||
#include "common/settings.h"
|
#include "common/settings.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/macro/macro.h"
|
#include "video_core/macro/macro.h"
|
||||||
#include "video_core/macro/macro_hle.h"
|
#include "video_core/macro/macro_hle.h"
|
||||||
#include "video_core/macro/macro_interpreter.h"
|
#include "video_core/macro/macro_interpreter.h"
|
||||||
|
@ -40,8 +41,8 @@ static void Dump(u64 hash, std::span<const u32> code) {
|
||||||
macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
|
macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
|
MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_)
|
||||||
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
|
: hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d_)}, maxwell3d{maxwell3d_} {}
|
||||||
|
|
||||||
MacroEngine::~MacroEngine() = default;
|
MacroEngine::~MacroEngine() = default;
|
||||||
|
|
||||||
|
@ -61,6 +62,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
|
||||||
if (cache_info.has_hle_program) {
|
if (cache_info.has_hle_program) {
|
||||||
cache_info.hle_program->Execute(parameters, method);
|
cache_info.hle_program->Execute(parameters, method);
|
||||||
} else {
|
} else {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
cache_info.lle_program->Execute(parameters, method);
|
cache_info.lle_program->Execute(parameters, method);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -106,6 +108,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
|
||||||
cache_info.hle_program = std::move(hle_program);
|
cache_info.hle_program = std::move(hle_program);
|
||||||
cache_info.hle_program->Execute(parameters, method);
|
cache_info.hle_program->Execute(parameters, method);
|
||||||
} else {
|
} else {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
cache_info.lle_program->Execute(parameters, method);
|
cache_info.lle_program->Execute(parameters, method);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -137,6 +137,7 @@ private:
|
||||||
std::unordered_map<u32, CacheInfo> macro_cache;
|
std::unordered_map<u32, CacheInfo> macro_cache;
|
||||||
std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
|
std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
|
||||||
std::unique_ptr<HLEMacro> hle_macros;
|
std::unique_ptr<HLEMacro> hle_macros;
|
||||||
|
Engines::Maxwell3D& maxwell3d;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
|
std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "common/assert.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "video_core/dirty_flags.h"
|
#include "video_core/dirty_flags.h"
|
||||||
#include "video_core/engines/draw_manager.h"
|
#include "video_core/engines/draw_manager.h"
|
||||||
|
@ -15,143 +16,365 @@
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
|
bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
|
||||||
|
switch (topology) {
|
||||||
// HLE'd functions
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
|
||||||
void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
|
||||||
const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop:
|
||||||
maxwell3d.draw_manager->DrawIndex(
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff),
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
|
||||||
parameters[4], parameters[1], parameters[3], parameters[5], instance_count);
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
|
||||||
}
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
|
||||||
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
|
||||||
void HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
|
||||||
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
|
||||||
maxwell3d.draw_manager->DrawArray(
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches:
|
||||||
parameters[3], parameters[1], parameters[4], instance_count);
|
return true;
|
||||||
}
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads:
|
||||||
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
|
||||||
void HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon:
|
||||||
const u32 element_base = parameters[4];
|
default:
|
||||||
const u32 base_instance = parameters[5];
|
return false;
|
||||||
maxwell3d.regs.vertex_id_base = element_base;
|
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
|
||||||
maxwell3d.CallMethod(0x8e4, element_base, true);
|
|
||||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
|
||||||
|
|
||||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
|
||||||
params.is_indexed = true;
|
|
||||||
params.include_count = false;
|
|
||||||
params.count_start_address = 0;
|
|
||||||
params.indirect_start_address = maxwell3d.macro_addresses[1];
|
|
||||||
params.buffer_size = 5 * sizeof(u32);
|
|
||||||
params.max_draw_counts = 1;
|
|
||||||
params.stride = 0;
|
|
||||||
|
|
||||||
maxwell3d.draw_manager->DrawIndexedIndirect(
|
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), 0,
|
|
||||||
1U << 18);
|
|
||||||
|
|
||||||
maxwell3d.regs.vertex_id_base = 0x0;
|
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
|
||||||
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
|
||||||
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Multidraw Indixed Indirect
|
|
||||||
void HLE_MultiDrawIndexedIndirect(Engines::Maxwell3D& maxwell3d,
|
|
||||||
const std::vector<u32>& parameters) {
|
|
||||||
const u32 start_indirect = parameters[0];
|
|
||||||
const u32 end_indirect = parameters[1];
|
|
||||||
if (start_indirect >= end_indirect) {
|
|
||||||
// Nothing to do.
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
const auto topology =
|
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
|
||||||
const u32 padding = parameters[3]; // padding is in words
|
|
||||||
|
|
||||||
// size of each indirect segment
|
|
||||||
const u32 indirect_words = 5 + padding;
|
|
||||||
const u32 stride = indirect_words * sizeof(u32);
|
|
||||||
const std::size_t draw_count = end_indirect - start_indirect;
|
|
||||||
u32 lowest_first = std::numeric_limits<u32>::max();
|
|
||||||
u32 highest_limit = std::numeric_limits<u32>::min();
|
|
||||||
for (std::size_t index = 0; index < draw_count; index++) {
|
|
||||||
const std::size_t base = index * indirect_words + 5;
|
|
||||||
const u32 count = parameters[base];
|
|
||||||
const u32 first_index = parameters[base + 2];
|
|
||||||
lowest_first = std::min(lowest_first, first_index);
|
|
||||||
highest_limit = std::max(highest_limit, first_index + count);
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 base_vertex = parameters[8];
|
|
||||||
const u32 base_instance = parameters[9];
|
|
||||||
maxwell3d.regs.vertex_id_base = base_vertex;
|
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
|
||||||
maxwell3d.CallMethod(0x8e4, base_vertex, true);
|
|
||||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
|
||||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
|
||||||
params.is_indexed = true;
|
|
||||||
params.include_count = true;
|
|
||||||
params.count_start_address = maxwell3d.macro_addresses[4];
|
|
||||||
params.indirect_start_address = maxwell3d.macro_addresses[5];
|
|
||||||
params.buffer_size = stride * draw_count;
|
|
||||||
params.max_draw_counts = draw_count;
|
|
||||||
params.stride = stride;
|
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
|
||||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Multi-layer Clear
|
class HLEMacroImpl : public CachedMacro {
|
||||||
void HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
|
|
||||||
ASSERT(parameters.size() == 1);
|
|
||||||
|
|
||||||
const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
|
|
||||||
const u32 rt_index = clear_params.RT;
|
|
||||||
const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
|
|
||||||
ASSERT(clear_params.layer == 0);
|
|
||||||
|
|
||||||
maxwell3d.regs.clear_surface.raw = clear_params.raw;
|
|
||||||
maxwell3d.draw_manager->Clear(num_layers);
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{
|
|
||||||
{0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
|
|
||||||
{0x0D61FC9FAAC9FCAD, &HLE_DrawArraysIndirect},
|
|
||||||
{0x0217920100488FF7, &HLE_DrawIndexedIndirect},
|
|
||||||
{0x3F5E74B9C9A50164, &HLE_MultiDrawIndexedIndirect},
|
|
||||||
{0xEAD26C3E2109B06B, &HLE_MultiLayerClear},
|
|
||||||
}};
|
|
||||||
|
|
||||||
class HLEMacroImpl final : public CachedMacro {
|
|
||||||
public:
|
public:
|
||||||
explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
|
explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
|
||||||
: maxwell3d{maxwell3d_}, func{func_} {}
|
|
||||||
|
|
||||||
void Execute(const std::vector<u32>& parameters, u32 method) override {
|
protected:
|
||||||
func(maxwell3d, parameters);
|
void advanceCheck() {
|
||||||
|
current_value = (current_value + 1) % fibonacci_post;
|
||||||
|
check_limit = current_value == 0;
|
||||||
|
if (check_limit) {
|
||||||
|
const u32 new_fibonacci = fibonacci_pre + fibonacci_post;
|
||||||
|
fibonacci_pre = fibonacci_post;
|
||||||
|
fibonacci_post = new_fibonacci;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Engines::Maxwell3D& maxwell3d;
|
||||||
|
u32 fibonacci_pre{89};
|
||||||
|
u32 fibonacci_post{144};
|
||||||
|
u32 current_value{fibonacci_post - 1};
|
||||||
|
bool check_limit{};
|
||||||
|
};
|
||||||
|
|
||||||
|
class HLE_771BB18C62444DA0 final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
|
||||||
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
|
maxwell3d.draw_manager->DrawIndex(
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
|
||||||
|
0x3ffffff),
|
||||||
|
parameters[4], parameters[1], parameters[3], parameters[5], instance_count);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class HLE_DrawArraysIndirect final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d_, bool extended_ = false)
|
||||||
|
: HLEMacroImpl(maxwell3d_), extended(extended_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
auto topology =
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||||
|
if (!IsTopologySafe(topology)) {
|
||||||
|
Fallback(parameters);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||||
|
params.is_indexed = false;
|
||||||
|
params.include_count = false;
|
||||||
|
params.count_start_address = 0;
|
||||||
|
params.indirect_start_address = maxwell3d.getMacroAddress(1);
|
||||||
|
params.buffer_size = 4 * sizeof(u32);
|
||||||
|
params.max_draw_counts = 1;
|
||||||
|
params.stride = 0;
|
||||||
|
|
||||||
|
if (extended) {
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, parameters[4], true);
|
||||||
|
}
|
||||||
|
|
||||||
|
maxwell3d.draw_manager->DrawArrayIndirect(topology);
|
||||||
|
|
||||||
|
if (extended) {
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, 0, true);
|
||||||
|
}
|
||||||
|
maxwell3d.regs.vertex_buffer.first = 0;
|
||||||
|
maxwell3d.regs.vertex_buffer.count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Engines::Maxwell3D& maxwell3d;
|
void Fallback(const std::vector<u32>& parameters) {
|
||||||
HLEFunction func;
|
SCOPE_EXIT({
|
||||||
|
if (extended) {
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, 0, true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||||
|
|
||||||
|
const u32 vertex_first = parameters[3];
|
||||||
|
const u32 vertex_count = parameters[1];
|
||||||
|
|
||||||
|
if (maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) {
|
||||||
|
ASSERT_MSG(false, "Faulty draw!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 base_instance = parameters[4];
|
||||||
|
if (extended) {
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, base_instance, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
maxwell3d.draw_manager->DrawArray(
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
||||||
|
vertex_first, vertex_count, base_instance, instance_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool extended;
|
||||||
|
};
|
||||||
|
|
||||||
|
class HLE_DrawIndexedIndirect final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
auto topology =
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
||||||
|
if (!IsTopologySafe(topology)) {
|
||||||
|
Fallback(parameters);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
advanceCheck();
|
||||||
|
if (check_limit) {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
minimum_limit = std::max(parameters[3], minimum_limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 base_vertex = parameters[8];
|
||||||
|
const u32 base_instance = parameters[9];
|
||||||
|
maxwell3d.regs.vertex_id_base = base_vertex;
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, base_vertex, true);
|
||||||
|
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
||||||
|
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||||
|
params.is_indexed = true;
|
||||||
|
params.include_count = false;
|
||||||
|
params.count_start_address = 0;
|
||||||
|
params.indirect_start_address = maxwell3d.getMacroAddress(1);
|
||||||
|
params.buffer_size = 5 * sizeof(u32);
|
||||||
|
params.max_draw_counts = 1;
|
||||||
|
params.stride = 0;
|
||||||
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
|
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, minimum_limit);
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
||||||
|
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Fallback(const std::vector<u32>& parameters) {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
|
||||||
|
const u32 element_base = parameters[4];
|
||||||
|
const u32 base_instance = parameters[5];
|
||||||
|
maxwell3d.regs.vertex_id_base = element_base;
|
||||||
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, element_base, true);
|
||||||
|
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
||||||
|
|
||||||
|
maxwell3d.draw_manager->DrawIndex(
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
||||||
|
parameters[3], parameters[1], element_base, base_instance, instance_count);
|
||||||
|
|
||||||
|
maxwell3d.regs.vertex_id_base = 0x0;
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
||||||
|
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 minimum_limit{1 << 18};
|
||||||
|
};
|
||||||
|
|
||||||
|
class HLE_MultiLayerClear final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
ASSERT(parameters.size() == 1);
|
||||||
|
|
||||||
|
const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
|
||||||
|
const u32 rt_index = clear_params.RT;
|
||||||
|
const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
|
||||||
|
ASSERT(clear_params.layer == 0);
|
||||||
|
|
||||||
|
maxwell3d.regs.clear_surface.raw = clear_params.raw;
|
||||||
|
maxwell3d.draw_manager->Clear(num_layers);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_MultiDrawIndexedIndirectCount(Engines::Maxwell3D& maxwell3d_)
|
||||||
|
: HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
const auto topology =
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
||||||
|
if (!IsTopologySafe(topology)) {
|
||||||
|
Fallback(parameters);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
advanceCheck();
|
||||||
|
if (check_limit) {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
}
|
||||||
|
const u32 start_indirect = parameters[0];
|
||||||
|
const u32 end_indirect = parameters[1];
|
||||||
|
if (start_indirect >= end_indirect) {
|
||||||
|
// Nothing to do.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
maxwell3d.regs.draw.topology.Assign(topology);
|
||||||
|
const u32 padding = parameters[3]; // padding is in words
|
||||||
|
|
||||||
|
// size of each indirect segment
|
||||||
|
const u32 indirect_words = 5 + padding;
|
||||||
|
const u32 stride = indirect_words * sizeof(u32);
|
||||||
|
const std::size_t draw_count = end_indirect - start_indirect;
|
||||||
|
u32 lowest_first = std::numeric_limits<u32>::max();
|
||||||
|
u32 highest_limit = std::numeric_limits<u32>::min();
|
||||||
|
for (std::size_t index = 0; index < draw_count; index++) {
|
||||||
|
const std::size_t base = index * indirect_words + 5;
|
||||||
|
const u32 count = parameters[base];
|
||||||
|
const u32 first_index = parameters[base + 2];
|
||||||
|
lowest_first = std::min(lowest_first, first_index);
|
||||||
|
highest_limit = std::max(highest_limit, first_index + count);
|
||||||
|
}
|
||||||
|
if (check_limit) {
|
||||||
|
minimum_limit = std::max(highest_limit, minimum_limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
maxwell3d.regs.index_buffer.first = 0;
|
||||||
|
maxwell3d.regs.index_buffer.count = std::max(highest_limit, minimum_limit);
|
||||||
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
|
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||||
|
params.is_indexed = true;
|
||||||
|
params.include_count = true;
|
||||||
|
params.count_start_address = maxwell3d.getMacroAddress(4);
|
||||||
|
params.indirect_start_address = maxwell3d.getMacroAddress(5);
|
||||||
|
params.buffer_size = stride * draw_count;
|
||||||
|
params.max_draw_counts = draw_count;
|
||||||
|
params.stride = stride;
|
||||||
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
|
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Fallback(const std::vector<u32>& parameters) {
|
||||||
|
SCOPE_EXIT({
|
||||||
|
// Clean everything.
|
||||||
|
// Clean everything.
|
||||||
|
maxwell3d.regs.vertex_id_base = 0x0;
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
||||||
|
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
||||||
|
});
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
const u32 start_indirect = parameters[0];
|
||||||
|
const u32 end_indirect = parameters[1];
|
||||||
|
if (start_indirect >= end_indirect) {
|
||||||
|
// Nothing to do.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const auto topology =
|
||||||
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
||||||
|
maxwell3d.regs.draw.topology.Assign(topology);
|
||||||
|
const u32 padding = parameters[3];
|
||||||
|
const std::size_t max_draws = parameters[4];
|
||||||
|
|
||||||
|
const u32 indirect_words = 5 + padding;
|
||||||
|
const std::size_t first_draw = start_indirect;
|
||||||
|
const std::size_t effective_draws = end_indirect - start_indirect;
|
||||||
|
const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws);
|
||||||
|
|
||||||
|
for (std::size_t index = first_draw; index < last_draw; index++) {
|
||||||
|
const std::size_t base = index * indirect_words + 5;
|
||||||
|
const u32 base_vertex = parameters[base + 3];
|
||||||
|
const u32 base_instance = parameters[base + 4];
|
||||||
|
maxwell3d.regs.vertex_id_base = base_vertex;
|
||||||
|
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
||||||
|
maxwell3d.CallMethod(0x8e4, base_vertex, true);
|
||||||
|
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
||||||
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
|
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
|
||||||
|
base_vertex, base_instance, parameters[base + 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 minimum_limit{1 << 12};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
|
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
||||||
|
builders.emplace(0x771BB18C62444DA0ULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_771BB18C62444DA0>(maxwell3d);
|
||||||
|
}));
|
||||||
|
builders.emplace(0x0D61FC9FAAC9FCADULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_DrawArraysIndirect>(maxwell3d);
|
||||||
|
}));
|
||||||
|
builders.emplace(0x8A4D173EB99A8603ULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_DrawArraysIndirect>(maxwell3d, true);
|
||||||
|
}));
|
||||||
|
builders.emplace(0x0217920100488FF7ULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_DrawIndexedIndirect>(maxwell3d);
|
||||||
|
}));
|
||||||
|
builders.emplace(0x3F5E74B9C9A50164ULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_MultiDrawIndexedIndirectCount>(maxwell3d);
|
||||||
|
}));
|
||||||
|
builders.emplace(0xEAD26C3E2109B06BULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_MultiLayerClear>(maxwell3d);
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
HLEMacro::~HLEMacro() = default;
|
HLEMacro::~HLEMacro() = default;
|
||||||
|
|
||||||
std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
|
std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
|
||||||
const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
|
const auto it = builders.find(hash);
|
||||||
[hash](const auto& pair) { return pair.first == hash; });
|
if (it == builders.end()) {
|
||||||
if (it == hle_funcs.end()) {
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
|
return it->second(maxwell3d);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
|
@ -3,7 +3,10 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
@ -23,6 +26,8 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Engines::Maxwell3D& maxwell3d;
|
Engines::Maxwell3D& maxwell3d;
|
||||||
|
std::unordered_map<u64, std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>>
|
||||||
|
builders;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
Reference in New Issue