Move buffer bindings to per-channel state
This commit is contained in:
parent
d6db422098
commit
b0bea13ed8
|
@ -2,6 +2,8 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/buffer_cache/buffer_cache_base.h"
|
||||
#include "video_core/control/channel_state_cache.inc"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
|
@ -9,4 +11,6 @@ MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 12
|
|||
MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
|
||||
MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
|
||||
|
||||
template class VideoCommon::ChannelSetupCaches<VideoCommon::BufferCacheChannelInfo>;
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
@ -64,17 +64,22 @@ void BufferCache<P>::RunGarbageCollector() {
|
|||
template <class P>
|
||||
void BufferCache<P>::TickFrame() {
|
||||
// Calculate hits and shots and move hit bits to the right
|
||||
const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
|
||||
const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
|
||||
std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1,
|
||||
uniform_cache_hits.begin() + 1);
|
||||
std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1,
|
||||
uniform_cache_shots.begin() + 1);
|
||||
uniform_cache_hits[0] = 0;
|
||||
uniform_cache_shots[0] = 0;
|
||||
|
||||
const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
|
||||
channel_state->uniform_cache_hits.end());
|
||||
const u32 shots = std::reduce(channel_state->uniform_cache_shots.begin(),
|
||||
channel_state->uniform_cache_shots.end());
|
||||
std::copy_n(channel_state->uniform_cache_hits.begin(),
|
||||
channel_state->uniform_cache_hits.size() - 1,
|
||||
channel_state->uniform_cache_hits.begin() + 1);
|
||||
std::copy_n(channel_state->uniform_cache_shots.begin(),
|
||||
channel_state->uniform_cache_shots.size() - 1,
|
||||
channel_state->uniform_cache_shots.begin() + 1);
|
||||
channel_state->uniform_cache_hits[0] = 0;
|
||||
channel_state->uniform_cache_shots[0] = 0;
|
||||
|
||||
const bool skip_preferred = hits * 256 < shots * 251;
|
||||
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||
channel_state->uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||
|
||||
// If we can obtain the memory info, use it instead of the estimate.
|
||||
if (runtime.CanReportMemoryUsage()) {
|
||||
|
@ -164,10 +169,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
|
|||
BufferId buffer_a;
|
||||
BufferId buffer_b;
|
||||
do {
|
||||
has_deleted_buffers = false;
|
||||
channel_state->has_deleted_buffers = false;
|
||||
buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
|
||||
buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
|
||||
} while (has_deleted_buffers);
|
||||
} while (channel_state->has_deleted_buffers);
|
||||
auto& src_buffer = slot_buffers[buffer_a];
|
||||
auto& dest_buffer = slot_buffers[buffer_b];
|
||||
SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
|
||||
|
@ -272,30 +277,30 @@ void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr
|
|||
.size = size,
|
||||
.buffer_id = BufferId{},
|
||||
};
|
||||
uniform_buffers[stage][index] = binding;
|
||||
channel_state->uniform_buffers[stage][index] = binding;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
|
||||
uniform_buffers[stage][index] = NULL_BINDING;
|
||||
channel_state->uniform_buffers[stage][index] = NULL_BINDING;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
|
||||
MICROPROFILE_SCOPE(GPU_PrepareBuffers);
|
||||
do {
|
||||
has_deleted_buffers = false;
|
||||
channel_state->has_deleted_buffers = false;
|
||||
DoUpdateGraphicsBuffers(is_indexed);
|
||||
} while (has_deleted_buffers);
|
||||
} while (channel_state->has_deleted_buffers);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateComputeBuffers() {
|
||||
MICROPROFILE_SCOPE(GPU_PrepareBuffers);
|
||||
do {
|
||||
has_deleted_buffers = false;
|
||||
channel_state->has_deleted_buffers = false;
|
||||
DoUpdateComputeBuffers();
|
||||
} while (has_deleted_buffers);
|
||||
} while (channel_state->has_deleted_buffers);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -338,98 +343,102 @@ template <class P>
|
|||
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
|
||||
const UniformBufferSizes* sizes) {
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
if (enabled_uniform_buffer_masks != mask) {
|
||||
if (channel_state->enabled_uniform_buffer_masks != mask) {
|
||||
if constexpr (IS_OPENGL) {
|
||||
fast_bound_uniform_buffers.fill(0);
|
||||
channel_state->fast_bound_uniform_buffers.fill(0);
|
||||
}
|
||||
dirty_uniform_buffers.fill(~u32{0});
|
||||
uniform_buffer_binding_sizes.fill({});
|
||||
channel_state->dirty_uniform_buffers.fill(~u32{0});
|
||||
channel_state->uniform_buffer_binding_sizes.fill({});
|
||||
}
|
||||
}
|
||||
enabled_uniform_buffer_masks = mask;
|
||||
uniform_buffer_sizes = sizes;
|
||||
channel_state->enabled_uniform_buffer_masks = mask;
|
||||
channel_state->uniform_buffer_sizes = sizes;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
|
||||
const ComputeUniformBufferSizes* sizes) {
|
||||
enabled_compute_uniform_buffer_mask = mask;
|
||||
compute_uniform_buffer_sizes = sizes;
|
||||
channel_state->enabled_compute_uniform_buffer_mask = mask;
|
||||
channel_state->compute_uniform_buffer_sizes = sizes;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
|
||||
enabled_storage_buffers[stage] = 0;
|
||||
written_storage_buffers[stage] = 0;
|
||||
channel_state->enabled_storage_buffers[stage] = 0;
|
||||
channel_state->written_storage_buffers[stage] = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
|
||||
u32 cbuf_offset, bool is_written) {
|
||||
enabled_storage_buffers[stage] |= 1U << ssbo_index;
|
||||
written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
|
||||
channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
|
||||
const auto& cbufs = maxwell3d->state.shader_stages[stage];
|
||||
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
|
||||
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
||||
channel_state->storage_buffers[stage][ssbo_index] =
|
||||
StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
|
||||
enabled_texture_buffers[stage] = 0;
|
||||
written_texture_buffers[stage] = 0;
|
||||
image_texture_buffers[stage] = 0;
|
||||
channel_state->enabled_texture_buffers[stage] = 0;
|
||||
channel_state->written_texture_buffers[stage] = 0;
|
||||
channel_state->image_texture_buffers[stage] = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
|
||||
u32 size, PixelFormat format, bool is_written,
|
||||
bool is_image) {
|
||||
enabled_texture_buffers[stage] |= 1U << tbo_index;
|
||||
written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
|
||||
channel_state->enabled_texture_buffers[stage] |= 1U << tbo_index;
|
||||
channel_state->written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
|
||||
channel_state->image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
|
||||
}
|
||||
texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
||||
channel_state->texture_buffers[stage][tbo_index] =
|
||||
GetTextureBufferBinding(gpu_addr, size, format);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindComputeStorageBuffers() {
|
||||
enabled_compute_storage_buffers = 0;
|
||||
written_compute_storage_buffers = 0;
|
||||
image_compute_texture_buffers = 0;
|
||||
channel_state->enabled_compute_storage_buffers = 0;
|
||||
channel_state->written_compute_storage_buffers = 0;
|
||||
channel_state->image_compute_texture_buffers = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
|
||||
bool is_written) {
|
||||
enabled_compute_storage_buffers |= 1U << ssbo_index;
|
||||
written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
|
||||
channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
|
||||
const auto& launch_desc = kepler_compute->launch_description;
|
||||
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
|
||||
|
||||
const auto& cbufs = launch_desc.const_buffer_config;
|
||||
const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
|
||||
compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
||||
channel_state->compute_storage_buffers[ssbo_index] =
|
||||
StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UnbindComputeTextureBuffers() {
|
||||
enabled_compute_texture_buffers = 0;
|
||||
written_compute_texture_buffers = 0;
|
||||
image_compute_texture_buffers = 0;
|
||||
channel_state->enabled_compute_texture_buffers = 0;
|
||||
channel_state->written_compute_texture_buffers = 0;
|
||||
channel_state->image_compute_texture_buffers = 0;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
|
||||
PixelFormat format, bool is_written, bool is_image) {
|
||||
enabled_compute_texture_buffers |= 1U << tbo_index;
|
||||
written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
|
||||
channel_state->enabled_compute_texture_buffers |= 1U << tbo_index;
|
||||
channel_state->written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
|
||||
channel_state->image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
|
||||
}
|
||||
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
|
||||
channel_state->compute_texture_buffers[tbo_index] =
|
||||
GetTextureBufferBinding(gpu_addr, size, format);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -672,10 +681,10 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostIndexBuffer() {
|
||||
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
|
||||
TouchBuffer(buffer, index_buffer.buffer_id);
|
||||
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
|
||||
const u32 size = index_buffer.size;
|
||||
Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id];
|
||||
TouchBuffer(buffer, channel_state->index_buffer.buffer_id);
|
||||
const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr);
|
||||
const u32 size = channel_state->index_buffer.size;
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
|
||||
if constexpr (USE_MEMORY_MAPS) {
|
||||
|
@ -689,7 +698,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
|
|||
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
|
||||
}
|
||||
} else {
|
||||
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
|
||||
SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size);
|
||||
}
|
||||
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
||||
const u32 new_offset =
|
||||
|
@ -706,7 +715,7 @@ template <class P>
|
|||
void BufferCache<P>::BindHostVertexBuffers() {
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
||||
const Binding& binding = vertex_buffers[index];
|
||||
const Binding& binding = channel_state->vertex_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||
|
@ -729,19 +738,19 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() {
|
|||
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
bind_buffer(count_buffer_binding);
|
||||
bind_buffer(channel_state->count_buffer_binding);
|
||||
}
|
||||
bind_buffer(indirect_buffer_binding);
|
||||
bind_buffer(channel_state->indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
||||
u32 dirty = ~0U;
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
dirty = std::exchange(dirty_uniform_buffers[stage], 0);
|
||||
dirty = std::exchange(channel_state->dirty_uniform_buffers[stage], 0);
|
||||
}
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
const bool needs_bind = ((dirty >> index) & 1) != 0;
|
||||
BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
|
||||
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
||||
|
@ -753,13 +762,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
|
|||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
|
||||
bool needs_bind) {
|
||||
const Binding& binding = uniform_buffers[stage][index];
|
||||
const Binding& binding = channel_state->uniform_buffers[stage][index];
|
||||
const VAddr cpu_addr = binding.cpu_addr;
|
||||
const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
|
||||
const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
|
||||
size <= uniform_buffer_skip_cache_size &&
|
||||
size <= channel_state->uniform_buffer_skip_cache_size &&
|
||||
!memory_tracker.IsRegionGpuModified(cpu_addr, size);
|
||||
if (use_fast_buffer) {
|
||||
if constexpr (IS_OPENGL) {
|
||||
|
@ -767,11 +776,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
// Fast path for Nvidia
|
||||
const bool should_fast_bind =
|
||||
!HasFastUniformBufferBound(stage, binding_index) ||
|
||||
uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
if (should_fast_bind) {
|
||||
// We only have to bind when the currently bound buffer is not the fast version
|
||||
fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
runtime.BindFastUniformBuffer(stage, binding_index, size);
|
||||
}
|
||||
const auto span = ImmediateBufferWithData(cpu_addr, size);
|
||||
|
@ -780,8 +789,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
}
|
||||
}
|
||||
if constexpr (IS_OPENGL) {
|
||||
fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
}
|
||||
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
|
||||
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
|
||||
|
@ -791,15 +800,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
// Classic cached path
|
||||
const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size);
|
||||
if (sync_cached) {
|
||||
++uniform_cache_hits[0];
|
||||
++channel_state->uniform_cache_hits[0];
|
||||
}
|
||||
++uniform_cache_shots[0];
|
||||
++channel_state->uniform_cache_shots[0];
|
||||
|
||||
// Skip binding if it's not needed and if the bound buffer is not the fast version
|
||||
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
|
||||
needs_bind |= HasFastUniformBufferBound(stage, binding_index);
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
}
|
||||
if (!needs_bind) {
|
||||
return;
|
||||
|
@ -807,14 +816,14 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
const u32 offset = buffer.Offset(cpu_addr);
|
||||
if constexpr (IS_OPENGL) {
|
||||
// Fast buffer will be unbound
|
||||
fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
|
||||
channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
|
||||
|
||||
// Mark the index as dirty if offset doesn't match
|
||||
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
||||
dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
||||
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
||||
}
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
}
|
||||
if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
|
||||
runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
|
||||
|
@ -826,15 +835,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
||||
const Binding& binding = storage_buffers[stage][index];
|
||||
ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
|
||||
const Binding& binding = channel_state->storage_buffers[stage][index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0;
|
||||
const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
|
||||
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
|
||||
runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
|
||||
++binding_index;
|
||||
|
@ -846,8 +855,8 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
||||
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
|
||||
const TextureBufferBinding& binding = texture_buffers[stage][index];
|
||||
ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
|
||||
const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
@ -855,7 +864,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
|||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
if (((image_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
runtime.BindImageBuffer(buffer, offset, size, format);
|
||||
} else {
|
||||
runtime.BindTextureBuffer(buffer, offset, size, format);
|
||||
|
@ -872,7 +881,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
|
|||
return;
|
||||
}
|
||||
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
||||
const Binding& binding = transform_feedback_buffers[index];
|
||||
const Binding& binding = channel_state->transform_feedback_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size = binding.size;
|
||||
|
@ -887,15 +896,16 @@ template <class P>
|
|||
void BufferCache<P>::BindHostComputeUniformBuffers() {
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
// Mark all uniform buffers as dirty
|
||||
dirty_uniform_buffers.fill(~u32{0});
|
||||
fast_bound_uniform_buffers.fill(0);
|
||||
channel_state->dirty_uniform_buffers.fill(~u32{0});
|
||||
channel_state->fast_bound_uniform_buffers.fill(0);
|
||||
}
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||
const Binding& binding = compute_uniform_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||
const Binding& binding = channel_state->compute_uniform_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
|
||||
const u32 size =
|
||||
std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
|
@ -911,15 +921,16 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
|
|||
template <class P>
|
||||
void BufferCache<P>::BindHostComputeStorageBuffers() {
|
||||
u32 binding_index = 0;
|
||||
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
||||
const Binding& binding = compute_storage_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
|
||||
const Binding& binding = channel_state->compute_storage_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
TouchBuffer(buffer, binding.buffer_id);
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0;
|
||||
const bool is_written =
|
||||
((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
|
||||
if constexpr (NEEDS_BIND_STORAGE_INDEX) {
|
||||
runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
|
||||
++binding_index;
|
||||
|
@ -931,8 +942,8 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostComputeTextureBuffers() {
|
||||
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
|
||||
const TextureBufferBinding& binding = compute_texture_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
|
||||
const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
const u32 size = binding.size;
|
||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||
|
@ -940,7 +951,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
|
|||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
const PixelFormat format = binding.format;
|
||||
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
|
||||
if (((image_compute_texture_buffers >> index) & 1) != 0) {
|
||||
if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
|
||||
runtime.BindImageBuffer(buffer, offset, size, format);
|
||||
} else {
|
||||
runtime.BindTextureBuffer(buffer, offset, size, format);
|
||||
|
@ -954,7 +965,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
|
|||
template <class P>
|
||||
void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
|
||||
do {
|
||||
has_deleted_buffers = false;
|
||||
channel_state->has_deleted_buffers = false;
|
||||
if (is_indexed) {
|
||||
UpdateIndexBuffer();
|
||||
}
|
||||
|
@ -968,7 +979,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
|
|||
if (current_draw_indirect) {
|
||||
UpdateDrawIndirect();
|
||||
}
|
||||
} while (has_deleted_buffers);
|
||||
} while (channel_state->has_deleted_buffers);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -999,7 +1010,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
|||
slot_buffers.erase(inline_buffer_id);
|
||||
inline_buffer_id = CreateBuffer(0, buffer_size);
|
||||
}
|
||||
index_buffer = Binding{
|
||||
channel_state->index_buffer = Binding{
|
||||
.cpu_addr = 0,
|
||||
.size = inline_index_size,
|
||||
.buffer_id = inline_buffer_id,
|
||||
|
@ -1015,10 +1026,10 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
|||
(index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
|
||||
const u32 size = std::min(address_size, draw_size);
|
||||
if (size == 0 || !cpu_addr) {
|
||||
index_buffer = NULL_BINDING;
|
||||
channel_state->index_buffer = NULL_BINDING;
|
||||
return;
|
||||
}
|
||||
index_buffer = Binding{
|
||||
channel_state->index_buffer = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = size,
|
||||
.buffer_id = FindBuffer(*cpu_addr, size),
|
||||
|
@ -1051,13 +1062,13 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
|
|||
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
||||
u32 size = address_size; // TODO: Analyze stride and number of vertices
|
||||
if (array.enable == 0 || size == 0 || !cpu_addr) {
|
||||
vertex_buffers[index] = NULL_BINDING;
|
||||
channel_state->vertex_buffers[index] = NULL_BINDING;
|
||||
return;
|
||||
}
|
||||
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
|
||||
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
|
||||
}
|
||||
vertex_buffers[index] = Binding{
|
||||
channel_state->vertex_buffers[index] = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = size,
|
||||
.buffer_id = FindBuffer(*cpu_addr, size),
|
||||
|
@ -1079,23 +1090,24 @@ void BufferCache<P>::UpdateDrawIndirect() {
|
|||
};
|
||||
};
|
||||
if (current_draw_indirect->include_count) {
|
||||
update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding);
|
||||
update(current_draw_indirect->count_start_address, sizeof(u32),
|
||||
channel_state->count_buffer_binding);
|
||||
}
|
||||
update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
|
||||
indirect_buffer_binding);
|
||||
channel_state->indirect_buffer_binding);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
||||
ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
Binding& binding = uniform_buffers[stage][index];
|
||||
ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) {
|
||||
Binding& binding = channel_state->uniform_buffers[stage][index];
|
||||
if (binding.buffer_id) {
|
||||
// Already updated
|
||||
return;
|
||||
}
|
||||
// Mark as dirty
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
dirty_uniform_buffers[stage] |= 1U << index;
|
||||
channel_state->dirty_uniform_buffers[stage] |= 1U << index;
|
||||
}
|
||||
// Resolve buffer
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
|
@ -1104,10 +1116,10 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
|
||||
const u32 written_mask = written_storage_buffers[stage];
|
||||
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
||||
const u32 written_mask = channel_state->written_storage_buffers[stage];
|
||||
ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
|
||||
// Resolve buffer
|
||||
Binding& binding = storage_buffers[stage][index];
|
||||
Binding& binding = channel_state->storage_buffers[stage][index];
|
||||
const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
binding.buffer_id = buffer_id;
|
||||
// Mark buffer as written if needed
|
||||
|
@ -1119,11 +1131,11 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
|
||||
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
|
||||
Binding& binding = texture_buffers[stage][index];
|
||||
ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
|
||||
Binding& binding = channel_state->texture_buffers[stage][index];
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark buffer as written if needed
|
||||
if (((written_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
|
@ -1146,11 +1158,11 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
|
|||
const u32 size = binding.size;
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (binding.enable == 0 || size == 0 || !cpu_addr) {
|
||||
transform_feedback_buffers[index] = NULL_BINDING;
|
||||
channel_state->transform_feedback_buffers[index] = NULL_BINDING;
|
||||
return;
|
||||
}
|
||||
const BufferId buffer_id = FindBuffer(*cpu_addr, size);
|
||||
transform_feedback_buffers[index] = Binding{
|
||||
channel_state->transform_feedback_buffers[index] = Binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = size,
|
||||
.buffer_id = buffer_id,
|
||||
|
@ -1160,8 +1172,8 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateComputeUniformBuffers() {
|
||||
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||
Binding& binding = compute_uniform_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||
Binding& binding = channel_state->compute_uniform_buffers[index];
|
||||
binding = NULL_BINDING;
|
||||
const auto& launch_desc = kepler_compute->launch_description;
|
||||
if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
|
||||
|
@ -1178,12 +1190,12 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateComputeStorageBuffers() {
|
||||
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
||||
ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
|
||||
// Resolve buffer
|
||||
Binding& binding = compute_storage_buffers[index];
|
||||
Binding& binding = channel_state->compute_storage_buffers[index];
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark as written if needed
|
||||
if (((written_compute_storage_buffers >> index) & 1) != 0) {
|
||||
if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
|
@ -1191,11 +1203,11 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateComputeTextureBuffers() {
|
||||
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
|
||||
Binding& binding = compute_texture_buffers[index];
|
||||
ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
|
||||
Binding& binding = channel_state->compute_texture_buffers[index];
|
||||
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
|
||||
// Mark as written if needed
|
||||
if (((written_compute_texture_buffers >> index) & 1) != 0) {
|
||||
if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) {
|
||||
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
|
||||
}
|
||||
});
|
||||
|
@ -1610,13 +1622,13 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
|||
const auto replace = [scalar_replace](std::span<Binding> bindings) {
|
||||
std::ranges::for_each(bindings, scalar_replace);
|
||||
};
|
||||
scalar_replace(index_buffer);
|
||||
replace(vertex_buffers);
|
||||
std::ranges::for_each(uniform_buffers, replace);
|
||||
std::ranges::for_each(storage_buffers, replace);
|
||||
replace(transform_feedback_buffers);
|
||||
replace(compute_uniform_buffers);
|
||||
replace(compute_storage_buffers);
|
||||
scalar_replace(channel_state->index_buffer);
|
||||
replace(channel_state->vertex_buffers);
|
||||
std::ranges::for_each(channel_state->uniform_buffers, replace);
|
||||
std::ranges::for_each(channel_state->storage_buffers, replace);
|
||||
replace(channel_state->transform_feedback_buffers);
|
||||
replace(channel_state->compute_uniform_buffers);
|
||||
replace(channel_state->compute_storage_buffers);
|
||||
|
||||
// Mark the whole buffer as CPU written to stop tracking CPU writes
|
||||
if (!do_not_mark) {
|
||||
|
@ -1634,8 +1646,8 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
|||
template <class P>
|
||||
void BufferCache<P>::NotifyBufferDeletion() {
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
dirty_uniform_buffers.fill(~u32{0});
|
||||
uniform_buffer_binding_sizes.fill({});
|
||||
channel_state->dirty_uniform_buffers.fill(~u32{0});
|
||||
channel_state->uniform_buffer_binding_sizes.fill({});
|
||||
}
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
flags[Dirty::IndexBuffer] = true;
|
||||
|
@ -1643,12 +1655,11 @@ void BufferCache<P>::NotifyBufferDeletion() {
|
|||
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
||||
flags[Dirty::VertexBuffer0 + index] = true;
|
||||
}
|
||||
has_deleted_buffers = true;
|
||||
channel_state->has_deleted_buffers = true;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
|
||||
u32 cbuf_index,
|
||||
Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
|
||||
bool is_written) const {
|
||||
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
|
||||
const auto size = [&]() {
|
||||
|
@ -1681,8 +1692,8 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
|
|||
}
|
||||
|
||||
template <class P>
|
||||
typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
|
||||
GPUVAddr gpu_addr, u32 size, PixelFormat format) {
|
||||
TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
|
||||
PixelFormat format) {
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
TextureBufferBinding binding;
|
||||
if (!cpu_addr || size == 0) {
|
||||
|
@ -1721,7 +1732,7 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
|
|||
template <class P>
|
||||
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
|
||||
if constexpr (IS_OPENGL) {
|
||||
return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
|
||||
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
|
||||
} else {
|
||||
// Only OpenGL has fast uniform buffers
|
||||
return false;
|
||||
|
@ -1730,14 +1741,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
|
|||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
|
||||
auto& buffer = slot_buffers[count_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr));
|
||||
auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
|
||||
auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr));
|
||||
auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id];
|
||||
return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr));
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
@ -86,8 +86,78 @@ enum class ObtainBufferOperation : u32 {
|
|||
MarkQuery = 3,
|
||||
};
|
||||
|
||||
template <typename P>
|
||||
class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
|
||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
|
||||
|
||||
struct Binding {
|
||||
VAddr cpu_addr{};
|
||||
u32 size{};
|
||||
BufferId buffer_id;
|
||||
};
|
||||
|
||||
struct TextureBufferBinding : Binding {
|
||||
PixelFormat format;
|
||||
};
|
||||
|
||||
static constexpr Binding NULL_BINDING{
|
||||
.cpu_addr = 0,
|
||||
.size = 0,
|
||||
.buffer_id = NULL_BUFFER_ID,
|
||||
};
|
||||
|
||||
class BufferCacheChannelInfo : public ChannelInfo {
|
||||
public:
|
||||
BufferCacheChannelInfo() = delete;
|
||||
BufferCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept : ChannelInfo(state) {}
|
||||
BufferCacheChannelInfo(const BufferCacheChannelInfo& state) = delete;
|
||||
BufferCacheChannelInfo& operator=(const BufferCacheChannelInfo&) = delete;
|
||||
|
||||
Binding index_buffer;
|
||||
std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
|
||||
std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
|
||||
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
|
||||
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
|
||||
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
|
||||
Binding count_buffer_binding;
|
||||
Binding indirect_buffer_binding;
|
||||
|
||||
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
|
||||
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
|
||||
std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
|
||||
u32 enabled_compute_uniform_buffer_mask = 0;
|
||||
|
||||
const UniformBufferSizes* uniform_buffer_sizes{};
|
||||
const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_storage_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_storage_buffers{};
|
||||
u32 enabled_compute_storage_buffers = 0;
|
||||
u32 written_compute_storage_buffers = 0;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> image_texture_buffers{};
|
||||
u32 enabled_compute_texture_buffers = 0;
|
||||
u32 written_compute_texture_buffers = 0;
|
||||
u32 image_compute_texture_buffers = 0;
|
||||
|
||||
std::array<u32, 16> uniform_cache_hits{};
|
||||
std::array<u32, 16> uniform_cache_shots{};
|
||||
|
||||
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
||||
|
||||
bool has_deleted_buffers = false;
|
||||
|
||||
std::array<u32, NUM_STAGES> dirty_uniform_buffers{};
|
||||
std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
|
||||
std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>
|
||||
uniform_buffer_binding_sizes{};
|
||||
};
|
||||
|
||||
template <class P>
|
||||
class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInfo> {
|
||||
// Page size for caching purposes.
|
||||
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
|
||||
static constexpr u32 CACHING_PAGEBITS = 16;
|
||||
|
@ -104,8 +174,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
|
|||
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
|
||||
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
|
||||
|
||||
static constexpr BufferId NULL_BUFFER_ID{0};
|
||||
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
|
||||
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
|
||||
|
@ -149,8 +217,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
|
|||
using OverlapSection = boost::icl::inter_section<int>;
|
||||
using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
|
||||
|
||||
struct Empty {};
|
||||
|
||||
struct OverlapResult {
|
||||
std::vector<BufferId> ids;
|
||||
VAddr begin;
|
||||
|
@ -158,25 +224,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
|
|||
bool has_stream_leap = false;
|
||||
};
|
||||
|
||||
struct Binding {
|
||||
VAddr cpu_addr{};
|
||||
u32 size{};
|
||||
BufferId buffer_id;
|
||||
};
|
||||
|
||||
struct TextureBufferBinding : Binding {
|
||||
PixelFormat format;
|
||||
};
|
||||
|
||||
static constexpr Binding NULL_BINDING{
|
||||
.cpu_addr = 0,
|
||||
.size = 0,
|
||||
.buffer_id = NULL_BUFFER_ID,
|
||||
};
|
||||
|
||||
public:
|
||||
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
|
||||
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
|
||||
|
||||
|
@ -496,51 +544,6 @@ private:
|
|||
|
||||
u32 last_index_count = 0;
|
||||
|
||||
Binding index_buffer;
|
||||
std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
|
||||
std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
|
||||
std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
|
||||
std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
|
||||
std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
|
||||
Binding count_buffer_binding;
|
||||
Binding indirect_buffer_binding;
|
||||
|
||||
std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
|
||||
std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
|
||||
std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
|
||||
u32 enabled_compute_uniform_buffer_mask = 0;
|
||||
|
||||
const UniformBufferSizes* uniform_buffer_sizes{};
|
||||
const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_storage_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_storage_buffers{};
|
||||
u32 enabled_compute_storage_buffers = 0;
|
||||
u32 written_compute_storage_buffers = 0;
|
||||
|
||||
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> written_texture_buffers{};
|
||||
std::array<u32, NUM_STAGES> image_texture_buffers{};
|
||||
u32 enabled_compute_texture_buffers = 0;
|
||||
u32 written_compute_texture_buffers = 0;
|
||||
u32 image_compute_texture_buffers = 0;
|
||||
|
||||
std::array<u32, 16> uniform_cache_hits{};
|
||||
std::array<u32, 16> uniform_cache_shots{};
|
||||
|
||||
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
||||
|
||||
bool has_deleted_buffers = false;
|
||||
|
||||
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
|
||||
dirty_uniform_buffers{};
|
||||
std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
|
||||
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
|
||||
std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
|
||||
uniform_buffer_binding_sizes{};
|
||||
|
||||
MemoryTracker memory_tracker;
|
||||
IntervalSet uncommitted_ranges;
|
||||
IntervalSet common_ranges;
|
||||
|
|
|
@ -117,7 +117,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
|
|||
for (auto& stage_uniforms : fast_uniforms) {
|
||||
for (OGLBuffer& buffer : stage_uniforms) {
|
||||
buffer.Create();
|
||||
glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr,
|
||||
glNamedBufferData(buffer.handle, VideoCommon::DEFAULT_SKIP_CACHE_SIZE, nullptr,
|
||||
GL_STREAM_DRAW);
|
||||
}
|
||||
}
|
||||
|
|
Reference in New Issue