vk_query_cache: Implement generic query cache on Vulkan
This commit is contained in:
parent
c31382ced5
commit
bcd348f238
|
@ -180,6 +180,8 @@ if (ENABLE_VULKAN)
|
||||||
renderer_vulkan/vk_memory_manager.h
|
renderer_vulkan/vk_memory_manager.h
|
||||||
renderer_vulkan/vk_pipeline_cache.cpp
|
renderer_vulkan/vk_pipeline_cache.cpp
|
||||||
renderer_vulkan/vk_pipeline_cache.h
|
renderer_vulkan/vk_pipeline_cache.h
|
||||||
|
renderer_vulkan/vk_query_cache.cpp
|
||||||
|
renderer_vulkan/vk_query_cache.h
|
||||||
renderer_vulkan/vk_rasterizer.cpp
|
renderer_vulkan/vk_rasterizer.cpp
|
||||||
renderer_vulkan/vk_rasterizer.h
|
renderer_vulkan/vk_rasterizer.h
|
||||||
renderer_vulkan/vk_renderpass_cache.cpp
|
renderer_vulkan/vk_renderpass_cache.cpp
|
||||||
|
|
|
@ -88,7 +88,8 @@ private:
|
||||||
std::shared_ptr<HostCounter> last;
|
std::shared_ptr<HostCounter> last;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
|
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
|
||||||
|
class QueryPool>
|
||||||
class QueryCacheBase {
|
class QueryCacheBase {
|
||||||
public:
|
public:
|
||||||
explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
||||||
|
@ -127,15 +128,25 @@ public:
|
||||||
|
|
||||||
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
|
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
|
||||||
void UpdateCounters() {
|
void UpdateCounters() {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
|
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resets a counter to zero. It doesn't disable the query after resetting.
|
/// Resets a counter to zero. It doesn't disable the query after resetting.
|
||||||
void ResetCounter(VideoCore::QueryType type) {
|
void ResetCounter(VideoCore::QueryType type) {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
Stream(type).Reset();
|
Stream(type).Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Disable all active streams. Expected to be called at the end of a command buffer.
|
||||||
|
void DisableStreams() {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
|
for (auto& stream : streams) {
|
||||||
|
stream.Update(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns a new host counter.
|
/// Returns a new host counter.
|
||||||
std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
|
std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
|
||||||
VideoCore::QueryType type) {
|
VideoCore::QueryType type) {
|
||||||
|
@ -148,6 +159,9 @@ public:
|
||||||
return streams[static_cast<std::size_t>(type)];
|
return streams[static_cast<std::size_t>(type)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Flushes a memory range to guest memory and removes it from the cache.
|
/// Flushes a memory range to guest memory and removes it from the cache.
|
||||||
void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
|
void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
|
||||||
|
@ -213,8 +227,16 @@ private:
|
||||||
template <class QueryCache, class HostCounter>
|
template <class QueryCache, class HostCounter>
|
||||||
class HostCounterBase {
|
class HostCounterBase {
|
||||||
public:
|
public:
|
||||||
explicit HostCounterBase(std::shared_ptr<HostCounter> dependency)
|
explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
|
||||||
: dependency{std::move(dependency)} {}
|
: dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
|
||||||
|
// Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
|
||||||
|
static constexpr u64 depth_threshold = 96;
|
||||||
|
if (depth > depth_threshold) {
|
||||||
|
depth = 0;
|
||||||
|
base_result = dependency->Query();
|
||||||
|
dependency = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the current value of the query.
|
/// Returns the current value of the query.
|
||||||
u64 Query() {
|
u64 Query() {
|
||||||
|
@ -222,9 +244,10 @@ public:
|
||||||
return *result;
|
return *result;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 value = BlockingQuery();
|
u64 value = BlockingQuery() + base_result;
|
||||||
if (dependency) {
|
if (dependency) {
|
||||||
value += dependency->Query();
|
value += dependency->Query();
|
||||||
|
dependency = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return *(result = value);
|
return *(result = value);
|
||||||
|
@ -235,6 +258,10 @@ public:
|
||||||
return result.has_value();
|
return result.has_value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 Depth() const noexcept {
|
||||||
|
return depth;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/// Returns the value of query from the backend API blocking as needed.
|
/// Returns the value of query from the backend API blocking as needed.
|
||||||
virtual u64 BlockingQuery() const = 0;
|
virtual u64 BlockingQuery() const = 0;
|
||||||
|
@ -242,6 +269,8 @@ protected:
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
|
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
|
||||||
std::optional<u64> result; ///< Filled with the already returned value.
|
std::optional<u64> result; ///< Filled with the already returned value.
|
||||||
|
u64 depth; ///< Number of nested dependencies.
|
||||||
|
u64 base_result = 0; ///< Equivalent to nested dependencies value.
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class HostCounter>
|
template <class HostCounter>
|
||||||
|
|
|
@ -31,15 +31,16 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
|
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
|
||||||
: VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
|
: VideoCommon::QueryCacheBase<
|
||||||
HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>(
|
QueryCache, CachedQuery, CounterStream, HostCounter,
|
||||||
gl_rasterizer)},
|
std::vector<OGLQuery>>{system,
|
||||||
|
static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
|
||||||
gl_rasterizer{gl_rasterizer} {}
|
gl_rasterizer{gl_rasterizer} {}
|
||||||
|
|
||||||
QueryCache::~QueryCache() = default;
|
QueryCache::~QueryCache() = default;
|
||||||
|
|
||||||
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
|
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
|
||||||
auto& reserve = queries_reserve[static_cast<std::size_t>(type)];
|
auto& reserve = query_pools[static_cast<std::size_t>(type)];
|
||||||
OGLQuery query;
|
OGLQuery query;
|
||||||
if (reserve.empty()) {
|
if (reserve.empty()) {
|
||||||
query.Create(GetTarget(type));
|
query.Create(GetTarget(type));
|
||||||
|
@ -52,7 +53,7 @@ OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
|
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
|
||||||
queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query));
|
query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool QueryCache::AnyCommandQueued() const noexcept {
|
bool QueryCache::AnyCommandQueued() const noexcept {
|
||||||
|
|
|
@ -6,12 +6,8 @@
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <glad/glad.h>
|
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/query_cache.h"
|
#include "video_core/query_cache.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
@ -30,8 +26,8 @@ class RasterizerOpenGL;
|
||||||
|
|
||||||
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
||||||
|
|
||||||
class QueryCache final
|
class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
|
||||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
HostCounter, std::vector<OGLQuery>> {
|
||||||
public:
|
public:
|
||||||
explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
|
explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
|
||||||
~QueryCache();
|
~QueryCache();
|
||||||
|
@ -44,7 +40,6 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
RasterizerOpenGL& gl_rasterizer;
|
RasterizerOpenGL& gl_rasterizer;
|
||||||
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
||||||
|
@ -59,7 +54,7 @@ private:
|
||||||
u64 BlockingQuery() const override;
|
u64 BlockingQuery() const override;
|
||||||
|
|
||||||
QueryCache& cache;
|
QueryCache& cache;
|
||||||
VideoCore::QueryType type;
|
const VideoCore::QueryType type;
|
||||||
OGLQuery query;
|
OGLQuery query;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||||
features.depthBiasClamp = true;
|
features.depthBiasClamp = true;
|
||||||
features.geometryShader = true;
|
features.geometryShader = true;
|
||||||
features.tessellationShader = true;
|
features.tessellationShader = true;
|
||||||
|
features.occlusionQueryPrecise = true;
|
||||||
features.fragmentStoresAndAtomics = true;
|
features.fragmentStoresAndAtomics = true;
|
||||||
features.shaderImageGatherExtended = true;
|
features.shaderImageGatherExtended = true;
|
||||||
features.shaderStorageImageWriteWithoutFormat = true;
|
features.shaderStorageImageWriteWithoutFormat = true;
|
||||||
|
@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||||
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
|
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
|
||||||
SetNext(next, bit8_storage);
|
SetNext(next, bit8_storage);
|
||||||
|
|
||||||
|
vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
|
||||||
|
host_query_reset.hostQueryReset = true;
|
||||||
|
SetNext(next, host_query_reset);
|
||||||
|
|
||||||
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
|
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
|
||||||
if (is_float16_supported) {
|
if (is_float16_supported) {
|
||||||
float16_int8.shaderFloat16 = true;
|
float16_int8.shaderFloat16 = true;
|
||||||
|
@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||||
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
|
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
|
||||||
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
|
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
|
||||||
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
||||||
|
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
|
||||||
};
|
};
|
||||||
std::bitset<required_extensions.size()> available_extensions{};
|
std::bitset<required_extensions.size()> available_extensions{};
|
||||||
|
|
||||||
|
@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||||
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
|
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
|
||||||
std::make_pair(features.geometryShader, "geometryShader"),
|
std::make_pair(features.geometryShader, "geometryShader"),
|
||||||
std::make_pair(features.tessellationShader, "tessellationShader"),
|
std::make_pair(features.tessellationShader, "tessellationShader"),
|
||||||
|
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
|
||||||
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
|
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
|
||||||
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
|
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
|
||||||
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
|
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
|
||||||
|
@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
extensions.reserve(13);
|
extensions.reserve(14);
|
||||||
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
|
||||||
|
@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||||
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
|
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
|
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
|
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
|
||||||
|
extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
|
||||||
|
|
||||||
[[maybe_unused]] const bool nsight =
|
[[maybe_unused]] const bool nsight =
|
||||||
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||||
|
|
|
@ -0,0 +1,122 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_device.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion};
|
||||||
|
|
||||||
|
constexpr vk::QueryType GetTarget(VideoCore::QueryType type) {
|
||||||
|
return QUERY_TARGETS[static_cast<std::size_t>(type)];
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
|
||||||
|
|
||||||
|
QueryPool::~QueryPool() = default;
|
||||||
|
|
||||||
|
void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
|
||||||
|
device = &device_;
|
||||||
|
type = type_;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) {
|
||||||
|
std::size_t index;
|
||||||
|
do {
|
||||||
|
index = CommitResource(fence);
|
||||||
|
} while (usage[index]);
|
||||||
|
usage[index] = true;
|
||||||
|
|
||||||
|
return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)};
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
|
||||||
|
usage.resize(end);
|
||||||
|
|
||||||
|
const auto dev = device->GetLogical();
|
||||||
|
const u32 size = static_cast<u32>(end - begin);
|
||||||
|
const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {});
|
||||||
|
pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) {
|
||||||
|
const auto it =
|
||||||
|
std::find_if(std::begin(pools), std::end(pools),
|
||||||
|
[query_pool = query.first](auto& pool) { return query_pool == *pool; });
|
||||||
|
ASSERT(it != std::end(pools));
|
||||||
|
|
||||||
|
const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
|
||||||
|
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
const VKDevice& device, VKScheduler& scheduler)
|
||||||
|
: VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
|
||||||
|
QueryPool>{system, rasterizer},
|
||||||
|
device{device}, scheduler{scheduler} {
|
||||||
|
for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
|
||||||
|
query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
VKQueryCache::~VKQueryCache() = default;
|
||||||
|
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
|
||||||
|
return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKQueryCache::Reserve(VideoCore::QueryType type,
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> query) {
|
||||||
|
query_pools[static_cast<std::size_t>(type)].Reserve(query);
|
||||||
|
}
|
||||||
|
|
||||||
|
HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type)
|
||||||
|
: VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
|
||||||
|
type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
|
||||||
|
const auto dev = cache.Device().GetLogical();
|
||||||
|
cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) {
|
||||||
|
dev.resetQueryPoolEXT(query.first, query.second, 1, dld);
|
||||||
|
cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
HostCounter::~HostCounter() {
|
||||||
|
cache.Reserve(type, query);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostCounter::EndQuery() {
|
||||||
|
cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) {
|
||||||
|
cmdbuf.endQuery(query.first, query.second, dld);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 HostCounter::BlockingQuery() const {
|
||||||
|
if (ticks >= cache.Scheduler().Ticks()) {
|
||||||
|
cache.Scheduler().Flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto dev = cache.Device().GetLogical();
|
||||||
|
const auto& dld = cache.Device().GetDispatchLoader();
|
||||||
|
u64 value;
|
||||||
|
dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
|
||||||
|
vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
|
@ -0,0 +1,104 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/query_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
class RasterizerInterface;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
|
||||||
|
class CachedQuery;
|
||||||
|
class HostCounter;
|
||||||
|
class VKDevice;
|
||||||
|
class VKQueryCache;
|
||||||
|
class VKScheduler;
|
||||||
|
|
||||||
|
using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
|
||||||
|
|
||||||
|
class QueryPool final : public VKFencedPool {
|
||||||
|
public:
|
||||||
|
explicit QueryPool();
|
||||||
|
~QueryPool() override;
|
||||||
|
|
||||||
|
void Initialize(const VKDevice& device, VideoCore::QueryType type);
|
||||||
|
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence);
|
||||||
|
|
||||||
|
void Reserve(std::pair<vk::QueryPool, std::uint32_t> query);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void Allocate(std::size_t begin, std::size_t end) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr std::size_t GROW_STEP = 512;
|
||||||
|
|
||||||
|
const VKDevice* device = nullptr;
|
||||||
|
VideoCore::QueryType type = {};
|
||||||
|
|
||||||
|
std::vector<UniqueQueryPool> pools;
|
||||||
|
std::vector<bool> usage;
|
||||||
|
};
|
||||||
|
|
||||||
|
class VKQueryCache final
|
||||||
|
: public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
|
||||||
|
QueryPool> {
|
||||||
|
public:
|
||||||
|
explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
const VKDevice& device, VKScheduler& scheduler);
|
||||||
|
~VKQueryCache();
|
||||||
|
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type);
|
||||||
|
|
||||||
|
void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query);
|
||||||
|
|
||||||
|
const VKDevice& Device() const noexcept {
|
||||||
|
return device;
|
||||||
|
}
|
||||||
|
|
||||||
|
VKScheduler& Scheduler() const noexcept {
|
||||||
|
return scheduler;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const VKDevice& device;
|
||||||
|
VKScheduler& scheduler;
|
||||||
|
};
|
||||||
|
|
||||||
|
class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
|
||||||
|
public:
|
||||||
|
explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type);
|
||||||
|
~HostCounter();
|
||||||
|
|
||||||
|
void EndQuery();
|
||||||
|
|
||||||
|
private:
|
||||||
|
u64 BlockingQuery() const override;
|
||||||
|
|
||||||
|
VKQueryCache& cache;
|
||||||
|
const VideoCore::QueryType type;
|
||||||
|
const std::pair<vk::QueryPool, std::uint32_t> query;
|
||||||
|
const u64 ticks;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||||
|
public:
|
||||||
|
explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
|
||||||
|
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
|
@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
|
||||||
staging_pool),
|
staging_pool),
|
||||||
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
|
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
|
||||||
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
|
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
|
||||||
sampler_cache(device) {}
|
sampler_cache(device), query_cache(system, *this, device, scheduler) {
|
||||||
|
scheduler.SetQueryCache(query_cache);
|
||||||
|
}
|
||||||
|
|
||||||
RasterizerVulkan::~RasterizerVulkan() = default;
|
RasterizerVulkan::~RasterizerVulkan() = default;
|
||||||
|
|
||||||
|
@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||||
|
|
||||||
FlushWork();
|
FlushWork();
|
||||||
|
|
||||||
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
const auto& gpu = system.GPU().Maxwell3D();
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
|
GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
|
||||||
|
|
||||||
|
@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||||
void RasterizerVulkan::Clear() {
|
void RasterizerVulkan::Clear() {
|
||||||
MICROPROFILE_SCOPE(Vulkan_Clearing);
|
MICROPROFILE_SCOPE(Vulkan_Clearing);
|
||||||
|
|
||||||
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
const auto& gpu = system.GPU().Maxwell3D();
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
if (!system.GPU().Maxwell3D().ShouldExecute()) {
|
if (!system.GPU().Maxwell3D().ShouldExecute()) {
|
||||||
return;
|
return;
|
||||||
|
@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||||
sampled_views.clear();
|
sampled_views.clear();
|
||||||
image_views.clear();
|
image_views.clear();
|
||||||
|
|
||||||
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||||
const ComputePipelineCacheKey key{
|
const ComputePipelineCacheKey key{
|
||||||
code_addr,
|
code_addr,
|
||||||
|
@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
|
||||||
|
query_cache.ResetCounter(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
||||||
|
std::optional<u64> timestamp) {
|
||||||
|
query_cache.Query(gpu_addr, type, timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::FlushAll() {}
|
void RasterizerVulkan::FlushAll() {}
|
||||||
|
|
||||||
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
|
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
|
||||||
texture_cache.FlushRegion(addr, size);
|
texture_cache.FlushRegion(addr, size);
|
||||||
buffer_cache.FlushRegion(addr, size);
|
buffer_cache.FlushRegion(addr, size);
|
||||||
|
query_cache.FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
|
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
texture_cache.InvalidateRegion(addr, size);
|
texture_cache.InvalidateRegion(addr, size);
|
||||||
pipeline_cache.InvalidateRegion(addr, size);
|
pipeline_cache.InvalidateRegion(addr, size);
|
||||||
buffer_cache.InvalidateRegion(addr, size);
|
buffer_cache.InvalidateRegion(addr, size);
|
||||||
|
query_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
|
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
|
||||||
|
@ -96,7 +97,7 @@ struct ImageView {
|
||||||
vk::ImageLayout* layout = nullptr;
|
vk::ImageLayout* layout = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
|
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
|
||||||
public:
|
public:
|
||||||
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
|
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
|
||||||
VKScreenInfo& screen_info, const VKDevice& device,
|
VKScreenInfo& screen_info, const VKDevice& device,
|
||||||
|
@ -108,6 +109,8 @@ public:
|
||||||
bool DrawMultiBatch(bool is_indexed) override;
|
bool DrawMultiBatch(bool is_indexed) override;
|
||||||
void Clear() override;
|
void Clear() override;
|
||||||
void DispatchCompute(GPUVAddr code_addr) override;
|
void DispatchCompute(GPUVAddr code_addr) override;
|
||||||
|
void ResetCounter(VideoCore::QueryType type) override;
|
||||||
|
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
|
@ -247,6 +250,7 @@ private:
|
||||||
VKPipelineCache pipeline_cache;
|
VKPipelineCache pipeline_cache;
|
||||||
VKBufferCache buffer_cache;
|
VKBufferCache buffer_cache;
|
||||||
VKSamplerCache sampler_cache;
|
VKSamplerCache sampler_cache;
|
||||||
|
VKQueryCache query_cache;
|
||||||
|
|
||||||
std::array<View, Maxwell::NumRenderTargets> color_attachments;
|
std::array<View, Maxwell::NumRenderTargets> color_attachments;
|
||||||
View zeta_attachment;
|
View zeta_attachment;
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "video_core/renderer_vulkan/declarations.h"
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
#include "video_core/renderer_vulkan/vk_device.h"
|
#include "video_core/renderer_vulkan/vk_device.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
|
||||||
|
@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKScheduler::AllocateNewContext() {
|
void VKScheduler::AllocateNewContext() {
|
||||||
|
++ticks;
|
||||||
|
|
||||||
std::unique_lock lock{mutex};
|
std::unique_lock lock{mutex};
|
||||||
current_fence = next_fence;
|
current_fence = next_fence;
|
||||||
next_fence = &resource_manager.CommitFence();
|
next_fence = &resource_manager.CommitFence();
|
||||||
|
@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {
|
||||||
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
|
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
|
||||||
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
|
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
|
||||||
device.GetDispatchLoader());
|
device.GetDispatchLoader());
|
||||||
|
// Enable counters once again. These are disabled when a command buffer is finished.
|
||||||
|
if (query_cache) {
|
||||||
|
query_cache->UpdateCounters();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKScheduler::InvalidateState() {
|
void VKScheduler::InvalidateState() {
|
||||||
|
@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKScheduler::EndPendingOperations() {
|
void VKScheduler::EndPendingOperations() {
|
||||||
|
query_cache->DisableStreams();
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
@ -18,6 +19,7 @@ namespace Vulkan {
|
||||||
|
|
||||||
class VKDevice;
|
class VKDevice;
|
||||||
class VKFence;
|
class VKFence;
|
||||||
|
class VKQueryCache;
|
||||||
class VKResourceManager;
|
class VKResourceManager;
|
||||||
|
|
||||||
class VKFenceView {
|
class VKFenceView {
|
||||||
|
@ -67,6 +69,11 @@ public:
|
||||||
/// Binds a pipeline to the current execution context.
|
/// Binds a pipeline to the current execution context.
|
||||||
void BindGraphicsPipeline(vk::Pipeline pipeline);
|
void BindGraphicsPipeline(vk::Pipeline pipeline);
|
||||||
|
|
||||||
|
/// Assigns the query cache.
|
||||||
|
void SetQueryCache(VKQueryCache& query_cache_) {
|
||||||
|
query_cache = &query_cache_;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when viewports have been set in the current command buffer.
|
/// Returns true when viewports have been set in the current command buffer.
|
||||||
bool TouchViewports() {
|
bool TouchViewports() {
|
||||||
return std::exchange(state.viewports, true);
|
return std::exchange(state.viewports, true);
|
||||||
|
@ -112,6 +119,11 @@ public:
|
||||||
return current_fence;
|
return current_fence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the current command buffer tick.
|
||||||
|
u64 Ticks() const {
|
||||||
|
return ticks;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class Command {
|
class Command {
|
||||||
public:
|
public:
|
||||||
|
@ -205,6 +217,8 @@ private:
|
||||||
|
|
||||||
const VKDevice& device;
|
const VKDevice& device;
|
||||||
VKResourceManager& resource_manager;
|
VKResourceManager& resource_manager;
|
||||||
|
VKQueryCache* query_cache = nullptr;
|
||||||
|
|
||||||
vk::CommandBuffer current_cmdbuf;
|
vk::CommandBuffer current_cmdbuf;
|
||||||
VKFence* current_fence = nullptr;
|
VKFence* current_fence = nullptr;
|
||||||
VKFence* next_fence = nullptr;
|
VKFence* next_fence = nullptr;
|
||||||
|
@ -227,6 +241,7 @@ private:
|
||||||
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
|
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
std::condition_variable cv;
|
std::condition_variable cv;
|
||||||
|
std::atomic<u64> ticks = 0;
|
||||||
bool quit = false;
|
bool quit = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Reference in New Issue