Merge pull request #12412 from ameerj/gl-query-prims
OpenGL: Add GL_PRIMITIVES_GENERATED and GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries
This commit is contained in:
commit
91290b9be4
|
@ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessCounterReset() {
|
void Maxwell3D::ProcessCounterReset() {
|
||||||
switch (regs.clear_report_value) {
|
const auto query_type = [clear_report = regs.clear_report_value]() {
|
||||||
case Regs::ClearReport::ZPassPixelCount:
|
switch (clear_report) {
|
||||||
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
|
case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount:
|
||||||
break;
|
return VideoCommon::QueryType::ZPassPixelCount64;
|
||||||
|
case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded:
|
||||||
|
return VideoCommon::QueryType::StreamingPrimitivesSucceeded;
|
||||||
|
case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated:
|
||||||
|
return VideoCommon::QueryType::PrimitivesGenerated;
|
||||||
|
case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut:
|
||||||
|
return VideoCommon::QueryType::VtgPrimitivesOut;
|
||||||
default:
|
default:
|
||||||
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
|
LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report);
|
||||||
break;
|
return VideoCommon::QueryType::Payload;
|
||||||
}
|
}
|
||||||
|
}();
|
||||||
|
rasterizer->ResetCounter(query_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessSyncPoint() {
|
void Maxwell3D::ProcessSyncPoint() {
|
||||||
|
|
|
@ -28,8 +28,11 @@
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
enum class QueryType {
|
enum class QueryType {
|
||||||
SamplesPassed,
|
SamplesPassed,
|
||||||
|
PrimitivesGenerated,
|
||||||
|
TfbPrimitivesWritten,
|
||||||
|
Count,
|
||||||
};
|
};
|
||||||
constexpr std::size_t NumQueryTypes = 1;
|
constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count);
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
||||||
namespace VideoCommon {
|
namespace VideoCommon {
|
||||||
|
@ -44,15 +47,6 @@ public:
|
||||||
explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
|
explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
|
||||||
: cache{cache_}, type{type_} {}
|
: cache{cache_}, type{type_} {}
|
||||||
|
|
||||||
/// Updates the state of the stream, enabling or disabling as needed.
|
|
||||||
void Update(bool enabled) {
|
|
||||||
if (enabled) {
|
|
||||||
Enable();
|
|
||||||
} else {
|
|
||||||
Disable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Resets the stream to zero. It doesn't disable the query after resetting.
|
/// Resets the stream to zero. It doesn't disable the query after resetting.
|
||||||
void Reset() {
|
void Reset() {
|
||||||
if (current) {
|
if (current) {
|
||||||
|
@ -80,7 +74,6 @@ public:
|
||||||
return current != nullptr;
|
return current != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
/// Enables the stream.
|
/// Enables the stream.
|
||||||
void Enable() {
|
void Enable() {
|
||||||
if (current) {
|
if (current) {
|
||||||
|
@ -97,6 +90,7 @@ private:
|
||||||
last = std::exchange(current, nullptr);
|
last = std::exchange(current, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
QueryCache& cache;
|
QueryCache& cache;
|
||||||
const VideoCore::QueryType type;
|
const VideoCore::QueryType type;
|
||||||
|
|
||||||
|
@ -112,8 +106,14 @@ public:
|
||||||
: rasterizer{rasterizer_},
|
: rasterizer{rasterizer_},
|
||||||
// Use reinterpret_cast instead of static_cast as workaround for
|
// Use reinterpret_cast instead of static_cast as workaround for
|
||||||
// UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
|
// UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
|
||||||
cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this),
|
cpu_memory{cpu_memory_}, streams{{
|
||||||
VideoCore::QueryType::SamplesPassed}}} {
|
{CounterStream{reinterpret_cast<QueryCache&>(*this),
|
||||||
|
VideoCore::QueryType::SamplesPassed}},
|
||||||
|
{CounterStream{reinterpret_cast<QueryCache&>(*this),
|
||||||
|
VideoCore::QueryType::PrimitivesGenerated}},
|
||||||
|
{CounterStream{reinterpret_cast<QueryCache&>(*this),
|
||||||
|
VideoCore::QueryType::TfbPrimitivesWritten}},
|
||||||
|
}} {
|
||||||
(void)slot_async_jobs.insert(); // Null value
|
(void)slot_async_jobs.insert(); // Null value
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,12 +157,11 @@ public:
|
||||||
AsyncFlushQuery(query, timestamp, lock);
|
AsyncFlushQuery(query, timestamp, lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
|
/// Enables all available GPU counters
|
||||||
void UpdateCounters() {
|
void EnableCounters() {
|
||||||
std::unique_lock lock{mutex};
|
std::unique_lock lock{mutex};
|
||||||
if (maxwell3d) {
|
for (auto& stream : streams) {
|
||||||
const auto& regs = maxwell3d->regs;
|
stream.Enable();
|
||||||
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -176,7 +175,7 @@ public:
|
||||||
void DisableStreams() {
|
void DisableStreams() {
|
||||||
std::unique_lock lock{mutex};
|
std::unique_lock lock{mutex};
|
||||||
for (auto& stream : streams) {
|
for (auto& stream : streams) {
|
||||||
stream.Update(false);
|
stream.Disable();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -353,7 +352,7 @@ private:
|
||||||
|
|
||||||
std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
|
std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
|
||||||
std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
|
std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
|
||||||
};
|
}; // namespace VideoCommon
|
||||||
|
|
||||||
template <class QueryCache, class HostCounter>
|
template <class QueryCache, class HostCounter>
|
||||||
class HostCounterBase {
|
class HostCounterBase {
|
||||||
|
|
|
@ -18,16 +18,27 @@ namespace OpenGL {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
|
|
||||||
|
|
||||||
constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
||||||
return QueryTargets[static_cast<std::size_t>(type)];
|
switch (type) {
|
||||||
|
case VideoCore::QueryType::SamplesPassed:
|
||||||
|
return GL_SAMPLES_PASSED;
|
||||||
|
case VideoCore::QueryType::PrimitivesGenerated:
|
||||||
|
return GL_PRIMITIVES_GENERATED;
|
||||||
|
case VideoCore::QueryType::TfbPrimitivesWritten:
|
||||||
|
return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
UNIMPLEMENTED_MSG("Query type {}", type);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
|
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
|
||||||
: QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
|
: QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {
|
||||||
|
EnableCounters();
|
||||||
|
}
|
||||||
|
|
||||||
QueryCache::~QueryCache() = default;
|
QueryCache::~QueryCache() = default;
|
||||||
|
|
||||||
|
@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) {
|
||||||
auto& stream = cache->Stream(type);
|
auto& stream = cache->Stream(type);
|
||||||
const bool slice_counter = WaitPending() && stream.IsEnabled();
|
const bool slice_counter = WaitPending() && stream.IsEnabled();
|
||||||
if (slice_counter) {
|
if (slice_counter) {
|
||||||
stream.Update(false);
|
stream.Disable();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
|
auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
|
||||||
|
|
||||||
if (slice_counter) {
|
if (slice_counter) {
|
||||||
stream.Update(true);
|
stream.Enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||||
void oglEnable(GLenum cap, bool state) {
|
void oglEnable(GLenum cap, bool state) {
|
||||||
(state ? glEnable : glDisable)(cap);
|
(state ? glEnable : glDisable)(cap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) {
|
||||||
|
switch (type) {
|
||||||
|
case VideoCommon::QueryType::PrimitivesGenerated:
|
||||||
|
case VideoCommon::QueryType::VtgPrimitivesOut:
|
||||||
|
return VideoCore::QueryType::PrimitivesGenerated;
|
||||||
|
case VideoCommon::QueryType::ZPassPixelCount64:
|
||||||
|
return VideoCore::QueryType::SamplesPassed;
|
||||||
|
case VideoCommon::QueryType::StreamingPrimitivesSucceeded:
|
||||||
|
// case VideoCommon::QueryType::StreamingByteCount:
|
||||||
|
// TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride
|
||||||
|
return VideoCore::QueryType::TfbPrimitivesWritten;
|
||||||
|
default:
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||||
|
@ -216,7 +232,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
|
||||||
|
|
||||||
SCOPE_EXIT({ gpu.TickWork(); });
|
SCOPE_EXIT({ gpu.TickWork(); });
|
||||||
gpu_memory->FlushCaching();
|
gpu_memory->FlushCaching();
|
||||||
query_cache.UpdateCounters();
|
|
||||||
|
|
||||||
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
|
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
|
||||||
if (!pipeline) {
|
if (!pipeline) {
|
||||||
|
@ -334,7 +349,6 @@ void RasterizerOpenGL::DrawTexture() {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||||
|
|
||||||
SCOPE_EXIT({ gpu.TickWork(); });
|
SCOPE_EXIT({ gpu.TickWork(); });
|
||||||
query_cache.UpdateCounters();
|
|
||||||
|
|
||||||
texture_cache.SynchronizeGraphicsDescriptors();
|
texture_cache.SynchronizeGraphicsDescriptors();
|
||||||
texture_cache.UpdateRenderTargets(false);
|
texture_cache.UpdateRenderTargets(false);
|
||||||
|
@ -401,21 +415,28 @@ void RasterizerOpenGL::DispatchCompute() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
|
void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
|
||||||
if (type == VideoCommon::QueryType::ZPassPixelCount64) {
|
const auto query_cache_type = MaxwellToVideoCoreQuery(type);
|
||||||
query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed);
|
if (!query_cache_type.has_value()) {
|
||||||
|
UNIMPLEMENTED_IF_MSG(type != VideoCommon::QueryType::Payload, "Reset query type: {}", type);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
query_cache.ResetCounter(*query_cache_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||||
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
|
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
|
||||||
if (type == VideoCommon::QueryType::ZPassPixelCount64) {
|
const auto query_cache_type = MaxwellToVideoCoreQuery(type);
|
||||||
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
|
if (!query_cache_type.has_value()) {
|
||||||
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
|
return QueryFallback(gpu_addr, type, flags, payload, subreport);
|
||||||
} else {
|
|
||||||
query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout);
|
||||||
|
const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt;
|
||||||
|
query_cache.Query(gpu_addr, *query_cache_type, timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||||
|
VideoCommon::QueryPropertiesFlags flags, u32 payload,
|
||||||
|
u32 subreport) {
|
||||||
if (type != VideoCommon::QueryType::Payload) {
|
if (type != VideoCommon::QueryType::Payload) {
|
||||||
payload = 1u;
|
payload = 1u;
|
||||||
}
|
}
|
||||||
|
|
|
@ -225,6 +225,9 @@ private:
|
||||||
/// End a transform feedback
|
/// End a transform feedback
|
||||||
void EndTransformFeedback();
|
void EndTransformFeedback();
|
||||||
|
|
||||||
|
void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
|
||||||
|
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport);
|
||||||
|
|
||||||
Tegra::GPU& gpu;
|
Tegra::GPU& gpu;
|
||||||
|
|
||||||
const Device& device;
|
const Device& device;
|
||||||
|
|
|
@ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
|
void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
|
||||||
|
if (type != VideoCommon::QueryType::ZPassPixelCount64) {
|
||||||
|
LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type);
|
||||||
|
return;
|
||||||
|
}
|
||||||
query_cache.CounterReset(type);
|
query_cache.CounterReset(type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Reference in New Issue