buffer_cache: Skip cache on small uploads on Vulkan

Ports from OpenGL the optimization to skip small 3D uniform buffer uploads. This will take advantage of the previously introduced stream buffer. Fixes instances where the staging buffer offset was being ignored.
2021-01-17 03:16:15 -03:00 · 2021-01-17 03:16:15 -03:00 · a02b4e1df6
parent 35df1d1864
commit a02b4e1df6
3 changed files with 18 additions and 9 deletions
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@ -660,25 +660,26 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
    const VAddr cpu_addr = binding.cpu_addr;
    const u32 size = binding.size;
    Buffer& buffer = slot_buffers[binding.buffer_id];
    if constexpr (IS_OPENGL) {
    if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
        if constexpr (IS_OPENGL) {
            if (runtime.HasFastBufferSubData()) {
                // Fast path for Nvidia
                if (!HasFastUniformBufferBound(stage, binding_index)) {
                    // We only have to bind when the currently bound buffer is not the fast version
                    fast_bound_uniform_buffers[stage] |= 1U << binding_index;
                    runtime.BindFastUniformBuffer(stage, binding_index, size);
                }
                const auto span = ImmediateBufferWithData(cpu_addr, size);
                runtime.PushFastUniformBuffer(stage, binding_index, span);
            } else {
                // Stream buffer path to avoid stalling on non-Nvidia drivers
                const auto span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
                cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
            }
                return;
            }
        }
        fast_bound_uniform_buffers[stage] |= 1U << binding_index;
        // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
        const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
        cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
        return;
    }
    // Classic cached path
    SynchronizeBuffer(buffer, cpu_addr, size);
    if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@ -69,6 +69,13 @@ public:
    void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size);
    std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
                                          [[maybe_unused]] u32 binding_index, u32 size) {
        const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
        BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
        return ref.mapped_span;
    }
    void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
        BindBuffer(buffer, offset, size);
    }
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@ -86,7 +86,8 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
        .pNext = nullptr,
        .flags = 0,
        .size = STREAM_BUFFER_SIZE,
-        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
                 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
        .queueFamilyIndexCount = 0,
        .pQueueFamilyIndices = nullptr,