Revert Buffer cache changes and setup additional macros.
This commit is contained in:
parent
18637766ef
commit
ce448ce770
|
@ -170,11 +170,6 @@ public:
|
||||||
void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
|
void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
|
||||||
bool is_written, bool is_image);
|
bool is_written, bool is_image);
|
||||||
|
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
|
|
||||||
bool synchronize = true,
|
|
||||||
bool mark_as_written = false,
|
|
||||||
bool discard_downloads = false);
|
|
||||||
|
|
||||||
void FlushCachedWrites();
|
void FlushCachedWrites();
|
||||||
|
|
||||||
/// Return true when there are uncommitted buffers to be downloaded
|
/// Return true when there are uncommitted buffers to be downloaded
|
||||||
|
@ -354,8 +349,6 @@ private:
|
||||||
|
|
||||||
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
|
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||||
|
|
||||||
bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
|
|
||||||
|
|
||||||
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
||||||
std::span<BufferCopy> copies);
|
std::span<BufferCopy> copies);
|
||||||
|
|
||||||
|
@ -442,7 +435,6 @@ private:
|
||||||
|
|
||||||
std::vector<BufferId> cached_write_buffer_ids;
|
std::vector<BufferId> cached_write_buffer_ids;
|
||||||
|
|
||||||
IntervalSet discarded_ranges;
|
|
||||||
IntervalSet uncommitted_ranges;
|
IntervalSet uncommitted_ranges;
|
||||||
IntervalSet common_ranges;
|
IntervalSet common_ranges;
|
||||||
std::deque<IntervalSet> committed_ranges;
|
std::deque<IntervalSet> committed_ranges;
|
||||||
|
@ -600,17 +592,13 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
|
||||||
}};
|
}};
|
||||||
|
|
||||||
boost::container::small_vector<IntervalType, 4> tmp_intervals;
|
boost::container::small_vector<IntervalType, 4> tmp_intervals;
|
||||||
const bool is_high_accuracy =
|
|
||||||
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
|
|
||||||
auto mirror = [&](VAddr base_address, VAddr base_address_end) {
|
auto mirror = [&](VAddr base_address, VAddr base_address_end) {
|
||||||
const u64 size = base_address_end - base_address;
|
const u64 size = base_address_end - base_address;
|
||||||
const VAddr diff = base_address - *cpu_src_address;
|
const VAddr diff = base_address - *cpu_src_address;
|
||||||
const VAddr new_base_address = *cpu_dest_address + diff;
|
const VAddr new_base_address = *cpu_dest_address + diff;
|
||||||
const IntervalType add_interval{new_base_address, new_base_address + size};
|
const IntervalType add_interval{new_base_address, new_base_address + size};
|
||||||
tmp_intervals.push_back(add_interval);
|
|
||||||
if (is_high_accuracy) {
|
|
||||||
uncommitted_ranges.add(add_interval);
|
uncommitted_ranges.add(add_interval);
|
||||||
}
|
tmp_intervals.push_back(add_interval);
|
||||||
};
|
};
|
||||||
ForEachWrittenRange(*cpu_src_address, amount, mirror);
|
ForEachWrittenRange(*cpu_src_address, amount, mirror);
|
||||||
// This subtraction in this order is important for overlapping copies.
|
// This subtraction in this order is important for overlapping copies.
|
||||||
|
@ -821,32 +809,6 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add
|
||||||
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
|
||||||
std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
|
|
||||||
bool synchronize,
|
|
||||||
bool mark_as_written,
|
|
||||||
bool discard_downloads) {
|
|
||||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
|
||||||
if (!cpu_addr) {
|
|
||||||
return {&slot_buffers[NULL_BUFFER_ID], 0};
|
|
||||||
}
|
|
||||||
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
|
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
|
||||||
if (synchronize) {
|
|
||||||
// SynchronizeBuffer(buffer, *cpu_addr, size);
|
|
||||||
SynchronizeBufferNoModified(buffer, *cpu_addr, size);
|
|
||||||
}
|
|
||||||
if (mark_as_written) {
|
|
||||||
MarkWrittenBuffer(buffer_id, *cpu_addr, size);
|
|
||||||
}
|
|
||||||
if (discard_downloads) {
|
|
||||||
IntervalType interval{*cpu_addr, size};
|
|
||||||
ClearDownload(interval);
|
|
||||||
discarded_ranges.subtract(interval);
|
|
||||||
}
|
|
||||||
return {&buffer, buffer.Offset(*cpu_addr)};
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::FlushCachedWrites() {
|
void BufferCache<P>::FlushCachedWrites() {
|
||||||
for (const BufferId buffer_id : cached_write_buffer_ids) {
|
for (const BufferId buffer_id : cached_write_buffer_ids) {
|
||||||
|
@ -862,6 +824,10 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::AccumulateFlushes() {
|
void BufferCache<P>::AccumulateFlushes() {
|
||||||
|
if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
|
||||||
|
uncommitted_ranges.clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (uncommitted_ranges.empty()) {
|
if (uncommitted_ranges.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -877,14 +843,12 @@ template <class P>
|
||||||
void BufferCache<P>::CommitAsyncFlushesHigh() {
|
void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||||
AccumulateFlushes();
|
AccumulateFlushes();
|
||||||
|
|
||||||
for (const auto& interval : discarded_ranges) {
|
|
||||||
common_ranges.subtract(interval);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (committed_ranges.empty()) {
|
if (committed_ranges.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
||||||
|
const bool is_accuracy_normal =
|
||||||
|
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
|
||||||
|
|
||||||
auto it = committed_ranges.begin();
|
auto it = committed_ranges.begin();
|
||||||
while (it != committed_ranges.end()) {
|
while (it != committed_ranges.end()) {
|
||||||
|
@ -909,6 +873,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||||
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||||
buffer.ForEachDownloadRangeAndClear(
|
buffer.ForEachDownloadRangeAndClear(
|
||||||
cpu_addr, size, [&](u64 range_offset, u64 range_size) {
|
cpu_addr, size, [&](u64 range_offset, u64 range_size) {
|
||||||
|
if (is_accuracy_normal) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
const VAddr buffer_addr = buffer.CpuAddr();
|
const VAddr buffer_addr = buffer.CpuAddr();
|
||||||
const auto add_download = [&](VAddr start, VAddr end) {
|
const auto add_download = [&](VAddr start, VAddr end) {
|
||||||
const u64 new_offset = start - buffer_addr;
|
const u64 new_offset = start - buffer_addr;
|
||||||
|
@ -973,7 +940,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::CommitAsyncFlushes() {
|
void BufferCache<P>::CommitAsyncFlushes() {
|
||||||
|
if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) {
|
||||||
CommitAsyncFlushesHigh();
|
CommitAsyncFlushesHigh();
|
||||||
|
} else {
|
||||||
|
uncommitted_ranges.clear();
|
||||||
|
committed_ranges.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1353,7 +1325,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
||||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||||
const auto& index_array = draw_state.index_buffer;
|
const auto& index_array = draw_state.index_buffer;
|
||||||
auto& flags = maxwell3d->dirty.flags;
|
auto& flags = maxwell3d->dirty.flags;
|
||||||
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
|
if (!flags[Dirty::IndexBuffer]) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
flags[Dirty::IndexBuffer] = false;
|
flags[Dirty::IndexBuffer] = false;
|
||||||
|
@ -1574,12 +1546,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
|
||||||
if (!is_async) {
|
if (!is_async) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const bool is_high_accuracy =
|
|
||||||
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
|
|
||||||
if (is_high_accuracy) {
|
|
||||||
uncommitted_ranges.add(base_interval);
|
uncommitted_ranges.add(base_interval);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
|
BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
|
||||||
|
@ -1771,51 +1739,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
|
||||||
bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
|
||||||
boost::container::small_vector<BufferCopy, 4> copies;
|
|
||||||
u64 total_size_bytes = 0;
|
|
||||||
u64 largest_copy = 0;
|
|
||||||
IntervalSet found_sets{};
|
|
||||||
auto make_copies = [&] {
|
|
||||||
for (auto& interval : found_sets) {
|
|
||||||
const std::size_t sub_size = interval.upper() - interval.lower();
|
|
||||||
const VAddr cpu_addr = interval.lower();
|
|
||||||
copies.push_back(BufferCopy{
|
|
||||||
.src_offset = total_size_bytes,
|
|
||||||
.dst_offset = cpu_addr - buffer.CpuAddr(),
|
|
||||||
.size = sub_size,
|
|
||||||
});
|
|
||||||
total_size_bytes += sub_size;
|
|
||||||
largest_copy = std::max(largest_copy, sub_size);
|
|
||||||
}
|
|
||||||
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
|
|
||||||
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
|
|
||||||
};
|
|
||||||
buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
|
|
||||||
const VAddr base_adr = buffer.CpuAddr() + range_offset;
|
|
||||||
const VAddr end_adr = base_adr + range_size;
|
|
||||||
const IntervalType add_interval{base_adr, end_adr};
|
|
||||||
found_sets.add(add_interval);
|
|
||||||
});
|
|
||||||
if (found_sets.empty()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
const IntervalType search_interval{cpu_addr, cpu_addr + size};
|
|
||||||
auto it = common_ranges.lower_bound(search_interval);
|
|
||||||
auto it_end = common_ranges.upper_bound(search_interval);
|
|
||||||
if (it == common_ranges.end()) {
|
|
||||||
make_copies();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
while (it != it_end) {
|
|
||||||
found_sets.subtract(*it);
|
|
||||||
it++;
|
|
||||||
}
|
|
||||||
make_copies();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
||||||
std::span<BufferCopy> copies) {
|
std::span<BufferCopy> copies) {
|
||||||
|
|
|
@ -77,12 +77,21 @@ bool DmaPusher::Step() {
|
||||||
command_headers.resize_destructive(command_list_header.size);
|
command_headers.resize_destructive(command_list_header.size);
|
||||||
constexpr u32 MacroRegistersStart = 0xE00;
|
constexpr u32 MacroRegistersStart = 0xE00;
|
||||||
if (dma_state.method < MacroRegistersStart) {
|
if (dma_state.method < MacroRegistersStart) {
|
||||||
|
if (Settings::IsGPULevelHigh()) {
|
||||||
memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
|
memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
|
||||||
command_list_header.size * sizeof(u32));
|
command_list_header.size * sizeof(u32));
|
||||||
} else {
|
} else {
|
||||||
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
|
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
|
||||||
command_list_header.size * sizeof(u32));
|
command_list_header.size * sizeof(u32));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
const size_t copy_size = command_list_header.size * sizeof(u32);
|
||||||
|
if (subchannels[dma_state.subchannel]) {
|
||||||
|
subchannels[dma_state.subchannel]->current_dirty =
|
||||||
|
memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size);
|
||||||
|
}
|
||||||
|
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size);
|
||||||
|
}
|
||||||
ProcessCommands(command_headers);
|
ProcessCommands(command_headers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ public:
|
||||||
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||||
u32 methods_pending) = 0;
|
u32 methods_pending) = 0;
|
||||||
|
|
||||||
|
bool current_dirty{};
|
||||||
GPUVAddr current_dma_segment;
|
GPUVAddr current_dma_segment;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "common/settings.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/core_timing.h"
|
#include "core/core_timing.h"
|
||||||
#include "video_core/dirty_flags.h"
|
#include "video_core/dirty_flags.h"
|
||||||
|
@ -14,6 +15,7 @@
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/textures/texture.h"
|
#include "video_core/textures/texture.h"
|
||||||
|
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
|
||||||
using VideoCore::QueryType;
|
using VideoCore::QueryType;
|
||||||
|
@ -134,6 +136,8 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
||||||
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
|
macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
|
||||||
}
|
}
|
||||||
macro_segments.emplace_back(current_dma_segment, amount);
|
macro_segments.emplace_back(current_dma_segment, amount);
|
||||||
|
current_macro_dirty |= current_dirty;
|
||||||
|
current_dirty = false;
|
||||||
|
|
||||||
// Call the macro when there are no more parameters in the command buffer
|
// Call the macro when there are no more parameters in the command buffer
|
||||||
if (is_last_call) {
|
if (is_last_call) {
|
||||||
|
@ -141,10 +145,14 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
|
||||||
macro_params.clear();
|
macro_params.clear();
|
||||||
macro_addresses.clear();
|
macro_addresses.clear();
|
||||||
macro_segments.clear();
|
macro_segments.clear();
|
||||||
|
current_macro_dirty = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::RefreshParameters() {
|
void Maxwell3D::RefreshParametersImpl() {
|
||||||
|
if (!Settings::IsGPULevelHigh()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
size_t current_index = 0;
|
size_t current_index = 0;
|
||||||
for (auto& segment : macro_segments) {
|
for (auto& segment : macro_segments) {
|
||||||
if (segment.first == 0) {
|
if (segment.first == 0) {
|
||||||
|
@ -157,21 +165,6 @@ void Maxwell3D::RefreshParameters() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Maxwell3D::AnyParametersDirty() {
|
|
||||||
size_t current_index = 0;
|
|
||||||
for (auto& segment : macro_segments) {
|
|
||||||
if (segment.first == 0) {
|
|
||||||
current_index += segment.second;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (memory_manager.IsMemoryDirty(segment.first, sizeof(u32) * segment.second)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
current_index += segment.second;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 Maxwell3D::GetMaxCurrentVertices() {
|
u32 Maxwell3D::GetMaxCurrentVertices() {
|
||||||
u32 num_vertices = 0;
|
u32 num_vertices = 0;
|
||||||
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
|
for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
|
||||||
|
@ -332,7 +325,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||||
|
|
||||||
const u32 argument = ProcessShadowRam(method, method_argument);
|
const u32 argument = ProcessShadowRam(method, method_argument);
|
||||||
ProcessDirtyRegisters(method, argument);
|
ProcessDirtyRegisters(method, argument);
|
||||||
|
|
||||||
ProcessMethodCall(method, argument, method_argument, is_last_call);
|
ProcessMethodCall(method, argument, method_argument, is_last_call);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -272,6 +272,7 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
union {
|
union {
|
||||||
|
u32 raw;
|
||||||
BitField<0, 1, Mode> mode;
|
BitField<0, 1, Mode> mode;
|
||||||
BitField<4, 8, u32> pad;
|
BitField<4, 8, u32> pad;
|
||||||
};
|
};
|
||||||
|
@ -1217,10 +1218,12 @@ public:
|
||||||
|
|
||||||
struct Window {
|
struct Window {
|
||||||
union {
|
union {
|
||||||
|
u32 raw_1;
|
||||||
BitField<0, 16, u32> x_min;
|
BitField<0, 16, u32> x_min;
|
||||||
BitField<16, 16, u32> x_max;
|
BitField<16, 16, u32> x_max;
|
||||||
};
|
};
|
||||||
union {
|
union {
|
||||||
|
u32 raw_2;
|
||||||
BitField<0, 16, u32> y_min;
|
BitField<0, 16, u32> y_min;
|
||||||
BitField<16, 16, u32> y_max;
|
BitField<16, 16, u32> y_max;
|
||||||
};
|
};
|
||||||
|
@ -3090,9 +3093,16 @@ public:
|
||||||
return macro_addresses[index];
|
return macro_addresses[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
void RefreshParameters();
|
void RefreshParameters() {
|
||||||
|
if (!current_macro_dirty) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
RefreshParametersImpl();
|
||||||
|
}
|
||||||
|
|
||||||
bool AnyParametersDirty();
|
bool AnyParametersDirty() {
|
||||||
|
return current_macro_dirty;
|
||||||
|
}
|
||||||
|
|
||||||
u32 GetMaxCurrentVertices();
|
u32 GetMaxCurrentVertices();
|
||||||
|
|
||||||
|
@ -3101,6 +3111,9 @@ public:
|
||||||
/// Handles a write to the CLEAR_BUFFERS register.
|
/// Handles a write to the CLEAR_BUFFERS register.
|
||||||
void ProcessClearBuffers(u32 layer_count);
|
void ProcessClearBuffers(u32 layer_count);
|
||||||
|
|
||||||
|
/// Handles a write to the CB_BIND register.
|
||||||
|
void ProcessCBBind(size_t stage_index);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void InitializeRegisterDefaults();
|
void InitializeRegisterDefaults();
|
||||||
|
|
||||||
|
@ -3154,12 +3167,11 @@ private:
|
||||||
void ProcessCBData(u32 value);
|
void ProcessCBData(u32 value);
|
||||||
void ProcessCBMultiData(const u32* start_base, u32 amount);
|
void ProcessCBMultiData(const u32* start_base, u32 amount);
|
||||||
|
|
||||||
/// Handles a write to the CB_BIND register.
|
|
||||||
void ProcessCBBind(size_t stage_index);
|
|
||||||
|
|
||||||
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
||||||
std::optional<u64> GetQueryResult();
|
std::optional<u64> GetQueryResult();
|
||||||
|
|
||||||
|
void RefreshParametersImpl();
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
MemoryManager& memory_manager;
|
MemoryManager& memory_manager;
|
||||||
|
|
||||||
|
@ -3187,6 +3199,7 @@ private:
|
||||||
bool draw_indexed{};
|
bool draw_indexed{};
|
||||||
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
|
std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
|
||||||
std::vector<GPUVAddr> macro_addresses;
|
std::vector<GPUVAddr> macro_addresses;
|
||||||
|
bool current_macro_dirty{};
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ASSERT_REG_POSITION(field_name, position) \
|
#define ASSERT_REG_POSITION(field_name, position) \
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/fs/fs.h"
|
#include "common/fs/fs.h"
|
||||||
#include "common/fs/path_util.h"
|
#include "common/fs/path_util.h"
|
||||||
|
#include "common/microprofile.h"
|
||||||
#include "common/settings.h"
|
#include "common/settings.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/macro/macro.h"
|
#include "video_core/macro/macro.h"
|
||||||
|
@ -22,6 +23,8 @@
|
||||||
#include "video_core/macro/macro_jit_x64.h"
|
#include "video_core/macro/macro_jit_x64.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro hle", MP_RGB(128, 192, 192));
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
static void Dump(u64 hash, std::span<const u32> code) {
|
static void Dump(u64 hash, std::span<const u32> code) {
|
||||||
|
@ -60,6 +63,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
|
||||||
if (compiled_macro != macro_cache.end()) {
|
if (compiled_macro != macro_cache.end()) {
|
||||||
const auto& cache_info = compiled_macro->second;
|
const auto& cache_info = compiled_macro->second;
|
||||||
if (cache_info.has_hle_program) {
|
if (cache_info.has_hle_program) {
|
||||||
|
MICROPROFILE_SCOPE(MacroHLE);
|
||||||
cache_info.hle_program->Execute(parameters, method);
|
cache_info.hle_program->Execute(parameters, method);
|
||||||
} else {
|
} else {
|
||||||
maxwell3d.RefreshParameters();
|
maxwell3d.RefreshParameters();
|
||||||
|
@ -106,6 +110,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
|
||||||
if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
|
if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
|
||||||
cache_info.has_hle_program = true;
|
cache_info.has_hle_program = true;
|
||||||
cache_info.hle_program = std::move(hle_program);
|
cache_info.hle_program = std::move(hle_program);
|
||||||
|
MICROPROFILE_SCOPE(MacroHLE);
|
||||||
cache_info.hle_program->Execute(parameters, method);
|
cache_info.hle_program->Execute(parameters, method);
|
||||||
} else {
|
} else {
|
||||||
maxwell3d.RefreshParameters();
|
maxwell3d.RefreshParameters();
|
||||||
|
|
|
@ -86,7 +86,7 @@ public:
|
||||||
|
|
||||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
|
auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
|
||||||
if (!IsTopologySafe(topology)) {
|
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
|
||||||
Fallback(parameters);
|
Fallback(parameters);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -117,8 +117,8 @@ private:
|
||||||
void Fallback(const std::vector<u32>& parameters) {
|
void Fallback(const std::vector<u32>& parameters) {
|
||||||
SCOPE_EXIT({
|
SCOPE_EXIT({
|
||||||
if (extended) {
|
if (extended) {
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||||
maxwell3d.CallMethod(0x8e4, 0, true);
|
maxwell3d.replace_table.clear();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
maxwell3d.RefreshParameters();
|
maxwell3d.RefreshParameters();
|
||||||
|
@ -127,7 +127,8 @@ private:
|
||||||
const u32 vertex_first = parameters[3];
|
const u32 vertex_first = parameters[3];
|
||||||
const u32 vertex_count = parameters[1];
|
const u32 vertex_count = parameters[1];
|
||||||
|
|
||||||
if (maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) {
|
if (maxwell3d.AnyParametersDirty() &&
|
||||||
|
maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) {
|
||||||
ASSERT_MSG(false, "Faulty draw!");
|
ASSERT_MSG(false, "Faulty draw!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -157,7 +158,7 @@ public:
|
||||||
|
|
||||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
|
auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
|
||||||
if (!IsTopologySafe(topology)) {
|
if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
|
||||||
Fallback(parameters);
|
Fallback(parameters);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -169,7 +170,11 @@ public:
|
||||||
}
|
}
|
||||||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
||||||
const u32 base_size = std::max<u32>(minimum_limit, estimate);
|
const u32 base_size = std::max<u32>(minimum_limit, estimate);
|
||||||
maxwell3d.regs.draw.topology.Assign(topology);
|
const u32 element_base = parameters[4];
|
||||||
|
const u32 base_instance = parameters[5];
|
||||||
|
maxwell3d.regs.vertex_id_base = element_base;
|
||||||
|
maxwell3d.regs.global_base_vertex_index = element_base;
|
||||||
|
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||||
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||||
|
@ -186,6 +191,9 @@ public:
|
||||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
|
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
|
||||||
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||||
maxwell3d.replace_table.clear();
|
maxwell3d.replace_table.clear();
|
||||||
|
maxwell3d.regs.vertex_id_base = 0x0;
|
||||||
|
maxwell3d.regs.global_base_vertex_index = 0x0;
|
||||||
|
maxwell3d.regs.global_base_instance_index = 0x0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -195,6 +203,8 @@ private:
|
||||||
const u32 element_base = parameters[4];
|
const u32 element_base = parameters[4];
|
||||||
const u32 base_instance = parameters[5];
|
const u32 base_instance = parameters[5];
|
||||||
maxwell3d.regs.vertex_id_base = element_base;
|
maxwell3d.regs.vertex_id_base = element_base;
|
||||||
|
maxwell3d.regs.global_base_vertex_index = element_base;
|
||||||
|
maxwell3d.regs.global_base_instance_index = base_instance;
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||||
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||||
|
@ -205,6 +215,8 @@ private:
|
||||||
parameters[3], parameters[1], element_base, base_instance, instance_count);
|
parameters[3], parameters[1], element_base, base_instance, instance_count);
|
||||||
|
|
||||||
maxwell3d.regs.vertex_id_base = 0x0;
|
maxwell3d.regs.vertex_id_base = 0x0;
|
||||||
|
maxwell3d.regs.global_base_vertex_index = 0x0;
|
||||||
|
maxwell3d.regs.global_base_instance_index = 0x0;
|
||||||
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||||
maxwell3d.replace_table.clear();
|
maxwell3d.replace_table.clear();
|
||||||
}
|
}
|
||||||
|
@ -253,7 +265,6 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
maxwell3d.regs.draw.topology.Assign(topology);
|
|
||||||
const u32 padding = parameters[3]; // padding is in words
|
const u32 padding = parameters[3]; // padding is in words
|
||||||
|
|
||||||
// size of each indirect segment
|
// size of each indirect segment
|
||||||
|
@ -335,6 +346,83 @@ private:
|
||||||
u32 minimum_limit{1 << 12};
|
u32 minimum_limit{1 << 12};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_C713C83D8F63CCF3(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
|
||||||
|
const u32 address = maxwell3d.regs.shadow_scratch[24];
|
||||||
|
auto& const_buffer = maxwell3d.regs.const_buffer;
|
||||||
|
const_buffer.size = 0x7000;
|
||||||
|
const_buffer.address_high = (address >> 24) & 0xFF;
|
||||||
|
const_buffer.address_low = address << 8;
|
||||||
|
const_buffer.offset = offset;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class HLE_D7333D26E0A93EDE final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_D7333D26E0A93EDE(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
const size_t index = parameters[0];
|
||||||
|
const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
|
||||||
|
const u32 size = maxwell3d.regs.shadow_scratch[47 + index];
|
||||||
|
auto& const_buffer = maxwell3d.regs.const_buffer;
|
||||||
|
const_buffer.size = size;
|
||||||
|
const_buffer.address_high = (address >> 24) & 0xFF;
|
||||||
|
const_buffer.address_low = address << 8;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class HLE_BindShader final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_BindShader(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
auto& regs = maxwell3d.regs;
|
||||||
|
const u32 index = parameters[0];
|
||||||
|
if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
regs.pipelines[index & 0xF].offset = parameters[2];
|
||||||
|
maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true;
|
||||||
|
regs.shadow_scratch[28 + index] = parameters[1];
|
||||||
|
regs.shadow_scratch[34 + index] = parameters[2];
|
||||||
|
|
||||||
|
const u32 address = parameters[4];
|
||||||
|
auto& const_buffer = regs.const_buffer;
|
||||||
|
const_buffer.size = 0x10000;
|
||||||
|
const_buffer.address_high = (address >> 24) & 0xFF;
|
||||||
|
const_buffer.address_low = address << 8;
|
||||||
|
|
||||||
|
const size_t bind_group_id = parameters[3] & 0x7F;
|
||||||
|
auto& bind_group = regs.bind_groups[bind_group_id];
|
||||||
|
bind_group.raw_config = 0x11;
|
||||||
|
maxwell3d.ProcessCBBind(bind_group_id);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class HLE_SetRasterBoundingBox final : public HLEMacroImpl {
|
||||||
|
public:
|
||||||
|
explicit HLE_SetRasterBoundingBox(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
|
maxwell3d.RefreshParameters();
|
||||||
|
const u32 raster_mode = parameters[0];
|
||||||
|
auto& regs = maxwell3d.regs;
|
||||||
|
const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable;
|
||||||
|
const u32 scratch_data = maxwell3d.regs.shadow_scratch[52];
|
||||||
|
regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
|
||||||
|
regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
||||||
|
@ -368,6 +456,26 @@ HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
|
||||||
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
return std::make_unique<HLE_MultiLayerClear>(maxwell3d);
|
return std::make_unique<HLE_MultiLayerClear>(maxwell3d);
|
||||||
}));
|
}));
|
||||||
|
builders.emplace(0xC713C83D8F63CCF3ULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d);
|
||||||
|
}));
|
||||||
|
builders.emplace(0xD7333D26E0A93EDEULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d);
|
||||||
|
}));
|
||||||
|
builders.emplace(0xEB29B2A09AA06D38ULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_BindShader>(maxwell3d);
|
||||||
|
}));
|
||||||
|
builders.emplace(0xDB1341DBEB4C8AF7ULL,
|
||||||
|
std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
|
||||||
|
[](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
|
||||||
|
return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d);
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
HLEMacro::~HLEMacro() = default;
|
HLEMacro::~HLEMacro() = default;
|
||||||
|
|
Reference in New Issue