QueryCache: Implement dependant queries.
This commit is contained in:
parent
57401589c2
commit
aa6587d854
|
@ -18,6 +18,7 @@ enum class QueryFlagBits : u32 {
|
||||||
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
|
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
|
||||||
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
|
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
|
||||||
IsFence = 1 << 8, ///< Indicates the query is a fence.
|
IsFence = 1 << 8, ///< Indicates the query is a fence.
|
||||||
|
IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
|
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
|
||||||
|
|
||||||
|
|
|
@ -489,8 +489,22 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
|
||||||
if (mask == 0) {
|
if (mask == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
impl->ForEachStreamerIn(mask,
|
u64 ran_mask = 0;
|
||||||
[](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); });
|
u64 next_phase = 0;
|
||||||
|
while (mask) {
|
||||||
|
impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) {
|
||||||
|
u64 dep_mask = streamer->GetDependenceMask();
|
||||||
|
if ((dep_mask & ~ran_mask) != 0) {
|
||||||
|
next_phase |= dep_mask;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
u64 index = streamer->GetId();
|
||||||
|
ran_mask |= (1ULL << index);
|
||||||
|
mask &= ~(1ULL << index);
|
||||||
|
streamer->PopUnsyncedQueries();
|
||||||
|
});
|
||||||
|
ran_mask |= next_phase;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Invalidation
|
// Invalidation
|
||||||
|
|
|
@ -70,6 +70,10 @@ public:
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 GetDependenceMask() const {
|
||||||
|
return dependance_mask;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const size_t id;
|
const size_t id;
|
||||||
const u64 dependance_mask;
|
const u64 dependance_mask;
|
||||||
|
@ -78,7 +82,7 @@ protected:
|
||||||
template <typename QueryType>
|
template <typename QueryType>
|
||||||
class SimpleStreamer : public StreamerInterface {
|
class SimpleStreamer : public StreamerInterface {
|
||||||
public:
|
public:
|
||||||
SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
|
SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {}
|
||||||
virtual ~SimpleStreamer() = default;
|
virtual ~SimpleStreamer() = default;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -525,6 +525,9 @@ private:
|
||||||
vk::Buffer buffer;
|
vk::Buffer buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Traits>
|
||||||
|
class PrimitivesSucceededStreamer;
|
||||||
|
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
class TFBCounterStreamer : public BaseStreamer {
|
class TFBCounterStreamer : public BaseStreamer {
|
||||||
public:
|
public:
|
||||||
|
@ -537,6 +540,7 @@ public:
|
||||||
current_bank = nullptr;
|
current_bank = nullptr;
|
||||||
counter_buffers.fill(VK_NULL_HANDLE);
|
counter_buffers.fill(VK_NULL_HANDLE);
|
||||||
offsets.fill(0);
|
offsets.fill(0);
|
||||||
|
last_queries.fill(0);
|
||||||
const VkBufferCreateInfo buffer_ci = {
|
const VkBufferCreateInfo buffer_ci = {
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
|
@ -630,7 +634,7 @@ public:
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
const size_t subreport = static_cast<size_t>(*subreport_);
|
const size_t subreport = static_cast<size_t>(*subreport_);
|
||||||
UpdateBuffers();
|
last_queries[subreport] = address;
|
||||||
if ((streams_mask & (1ULL << subreport)) == 0) {
|
if ((streams_mask & (1ULL << subreport)) == 0) {
|
||||||
new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
||||||
return index;
|
return index;
|
||||||
|
@ -646,6 +650,13 @@ public:
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<VAddr> GetLastQueryStream(size_t stream) {
|
||||||
|
if (last_queries[stream] != 0) {
|
||||||
|
return {last_queries[stream]};
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
bool HasUnsyncedQueries() override {
|
bool HasUnsyncedQueries() override {
|
||||||
return !pending_flush_queries.empty();
|
return !pending_flush_queries.empty();
|
||||||
}
|
}
|
||||||
|
@ -657,6 +668,7 @@ public:
|
||||||
size_t offset_base = staging_ref.offset;
|
size_t offset_base = staging_ref.offset;
|
||||||
for (auto q : pending_flush_queries) {
|
for (auto q : pending_flush_queries) {
|
||||||
auto* query = GetQuery(q);
|
auto* query = GetQuery(q);
|
||||||
|
query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush;
|
||||||
auto& bank = bank_pool.GetBank(query->start_bank_id);
|
auto& bank = bank_pool.GetBank(query->start_bank_id);
|
||||||
bank.Sync(staging_ref, offset_base, query->start_slot, 1);
|
bank.Sync(staging_ref, offset_base, query->start_slot, 1);
|
||||||
offset_base += TFBQueryBank::QUERY_SIZE;
|
offset_base += TFBQueryBank::QUERY_SIZE;
|
||||||
|
@ -741,13 +753,15 @@ private:
|
||||||
cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([this,
|
||||||
|
total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data());
|
cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateBuffers() {
|
void UpdateBuffers() {
|
||||||
|
last_queries.fill(0);
|
||||||
runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
|
runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||||
buffers_count = 0;
|
buffers_count = 0;
|
||||||
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
|
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
|
||||||
|
@ -804,6 +818,9 @@ private:
|
||||||
return {current_bank_id, slot};
|
return {current_bank_id, slot};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Traits>
|
||||||
|
friend class PrimitivesSucceededStreamer;
|
||||||
|
|
||||||
static constexpr size_t NUM_STREAMS = 4;
|
static constexpr size_t NUM_STREAMS = 4;
|
||||||
static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL;
|
static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL;
|
||||||
|
|
||||||
|
@ -833,9 +850,143 @@ private:
|
||||||
size_t buffers_count{};
|
size_t buffers_count{};
|
||||||
std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
|
std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
|
||||||
std::array<VkDeviceSize, NUM_STREAMS> offsets{};
|
std::array<VkDeviceSize, NUM_STREAMS> offsets{};
|
||||||
|
std::array<VAddr, NUM_STREAMS> last_queries;
|
||||||
u64 streams_mask;
|
u64 streams_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class PrimitivesQueryBase : public VideoCommon::QueryBase {
|
||||||
|
public:
|
||||||
|
// Default constructor
|
||||||
|
PrimitivesQueryBase()
|
||||||
|
: VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
|
||||||
|
dependant_index{}, dependant_manage{} {}
|
||||||
|
|
||||||
|
// Parameterized constructor
|
||||||
|
PrimitivesQueryBase(bool is_long, VAddr address)
|
||||||
|
: VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
|
||||||
|
dependant_index{}, dependant_manage{} {
|
||||||
|
if (is_long) {
|
||||||
|
flags |= VideoCommon::QueryFlagBits::HasTimestamp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 stride;
|
||||||
|
VAddr dependant_address;
|
||||||
|
size_t dependant_index;
|
||||||
|
bool dependant_manage;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Traits>
|
||||||
|
class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
|
||||||
|
public:
|
||||||
|
PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_,
|
||||||
|
TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_)
|
||||||
|
: VideoCommon::SimpleStreamer<PrimitivesQueryBase>(
|
||||||
|
id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)),
|
||||||
|
runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {}
|
||||||
|
|
||||||
|
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
|
||||||
|
std::optional<u32> subreport_) override {
|
||||||
|
auto index = BuildQuery();
|
||||||
|
auto* new_query = GetQuery(index);
|
||||||
|
new_query->guest_address = address;
|
||||||
|
new_query->value = 0;
|
||||||
|
if (has_timestamp) {
|
||||||
|
new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
|
||||||
|
}
|
||||||
|
if (!subreport_) {
|
||||||
|
new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
const size_t subreport = static_cast<size_t>(*subreport_);
|
||||||
|
auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
|
||||||
|
bool must_manage_dependance = false;
|
||||||
|
if (dependant_address_opt) {
|
||||||
|
new_query->dependant_address = *dependant_address_opt;
|
||||||
|
} else {
|
||||||
|
new_query->dependant_index =
|
||||||
|
tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_);
|
||||||
|
auto* dependant_query = tfb_streamer.GetQuery(new_query->dependant_index);
|
||||||
|
dependant_query->flags |= VideoCommon::QueryFlagBits::IsInvalidated;
|
||||||
|
must_manage_dependance = true;
|
||||||
|
if (True(dependant_query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) {
|
||||||
|
new_query->value = 0;
|
||||||
|
new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
||||||
|
if (must_manage_dependance) {
|
||||||
|
tfb_streamer.Free(new_query->dependant_index);
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
new_query->dependant_manage = must_manage_dependance;
|
||||||
|
runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||||
|
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
|
||||||
|
i++) {
|
||||||
|
const auto& tf = regs.transform_feedback;
|
||||||
|
if (tf.controls[i].stream != subreport) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
new_query->stride = tf.controls[i].stride;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
pending_flush_queries.push_back(index);
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasUnsyncedQueries() override {
|
||||||
|
return !pending_flush_queries.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PushUnsyncedQueries() override {
|
||||||
|
std::scoped_lock lk(flush_guard);
|
||||||
|
pending_flush_sets.emplace_back(std::move(pending_flush_queries));
|
||||||
|
pending_flush_queries.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PopUnsyncedQueries() override {
|
||||||
|
std::vector<size_t> flushed_queries;
|
||||||
|
{
|
||||||
|
std::scoped_lock lk(flush_guard);
|
||||||
|
flushed_queries = std::move(pending_flush_sets.front());
|
||||||
|
pending_flush_sets.pop_front();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto q : flushed_queries) {
|
||||||
|
auto* query = GetQuery(q);
|
||||||
|
if (True(query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
||||||
|
if (query->dependant_manage) {
|
||||||
|
auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index);
|
||||||
|
query->value = dependant_query->value / query->stride;
|
||||||
|
tfb_streamer.Free(query->dependant_index);
|
||||||
|
} else {
|
||||||
|
u8* pointer = cpu_memory.GetPointer(query->dependant_address);
|
||||||
|
u32 result;
|
||||||
|
std::memcpy(&result, pointer, sizeof(u32));
|
||||||
|
query->value = static_cast<u64>(result) / query->stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
QueryCacheRuntime& runtime;
|
||||||
|
TFBCounterStreamer<QueryCacheParams>& tfb_streamer;
|
||||||
|
Core::Memory::Memory& cpu_memory;
|
||||||
|
|
||||||
|
// syncing queue
|
||||||
|
std::vector<size_t> pending_sync;
|
||||||
|
|
||||||
|
// flush levels
|
||||||
|
std::vector<size_t> pending_flush_queries;
|
||||||
|
std::deque<std::vector<size_t>> pending_flush_sets;
|
||||||
|
std::mutex flush_guard;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
struct QueryCacheRuntimeImpl {
|
struct QueryCacheRuntimeImpl {
|
||||||
|
@ -853,6 +1004,8 @@ struct QueryCacheRuntimeImpl {
|
||||||
scheduler, memory_allocator),
|
scheduler, memory_allocator),
|
||||||
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
|
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
|
||||||
scheduler, memory_allocator, staging_pool),
|
scheduler, memory_allocator, staging_pool),
|
||||||
|
primitives_succeeded_streamer(
|
||||||
|
static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_),
|
||||||
hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
|
hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
|
||||||
|
|
||||||
hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
|
hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
|
||||||
|
@ -889,6 +1042,7 @@ struct QueryCacheRuntimeImpl {
|
||||||
VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
|
VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
|
||||||
SamplesStreamer<QueryCacheParams> sample_streamer;
|
SamplesStreamer<QueryCacheParams> sample_streamer;
|
||||||
TFBCounterStreamer<QueryCacheParams> tfb_streamer;
|
TFBCounterStreamer<QueryCacheParams> tfb_streamer;
|
||||||
|
PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer;
|
||||||
|
|
||||||
std::vector<std::pair<VAddr, VAddr>> little_cache;
|
std::vector<std::pair<VAddr, VAddr>> little_cache;
|
||||||
std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
|
std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
|
||||||
|
@ -1086,6 +1240,8 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp
|
||||||
return &impl->sample_streamer;
|
return &impl->sample_streamer;
|
||||||
case QueryType::StreamingByteCount:
|
case QueryType::StreamingByteCount:
|
||||||
return &impl->tfb_streamer;
|
return &impl->tfb_streamer;
|
||||||
|
case QueryType::StreamingPrimitivesSucceeded:
|
||||||
|
return &impl->primitives_succeeded_streamer;
|
||||||
default:
|
default:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
Reference in New Issue