buffer_cache: Heuristically detect stream buffers
Detect when a memory region has been joined several times and increase the size of the created buffer on those instances. The buffer is assumed to be a "stream buffer", increasing its size should stop us from constantly recreating it and fragmenting memory.
This commit is contained in:
parent
ec9354d6d9
commit
2b95c137ff
|
@ -251,6 +251,16 @@ public:
|
||||||
flags &= ~BufferFlagBits::Picked;
|
flags &= ~BufferFlagBits::Picked;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Increases the likeliness of this being a stream buffer
|
||||||
|
void IncreaseStreamScore(int score) noexcept {
|
||||||
|
stream_score += score;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the likeliness of this being a stream buffer
|
||||||
|
[[nodiscard]] int StreamScore() const noexcept {
|
||||||
|
return stream_score;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
|
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
|
||||||
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
|
[[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
|
||||||
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
|
return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
|
||||||
|
@ -574,6 +584,7 @@ private:
|
||||||
VAddr cpu_addr = 0;
|
VAddr cpu_addr = 0;
|
||||||
Words words;
|
Words words;
|
||||||
BufferFlagBits flags{};
|
BufferFlagBits flags{};
|
||||||
|
int stream_score = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -75,6 +75,7 @@ class BufferCache {
|
||||||
std::vector<BufferId> ids;
|
std::vector<BufferId> ids;
|
||||||
VAddr begin;
|
VAddr begin;
|
||||||
VAddr end;
|
VAddr end;
|
||||||
|
bool has_stream_leap = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Binding {
|
struct Binding {
|
||||||
|
@ -228,7 +229,7 @@ private:
|
||||||
|
|
||||||
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
|
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
|
||||||
|
|
||||||
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id);
|
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
|
||||||
|
|
||||||
[[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
|
[[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
|
||||||
|
|
||||||
|
@ -670,7 +671,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
const VAddr cpu_addr = binding.cpu_addr;
|
const VAddr cpu_addr = binding.cpu_addr;
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
|
if (size <= runtime.SkipCacheSize() && !buffer.IsRegionGpuModified(cpu_addr, size)) {
|
||||||
if constexpr (IS_OPENGL) {
|
if constexpr (IS_OPENGL) {
|
||||||
if (runtime.HasFastBufferSubData()) {
|
if (runtime.HasFastBufferSubData()) {
|
||||||
// Fast path for Nvidia
|
// Fast path for Nvidia
|
||||||
|
@ -1000,9 +1001,12 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
|
||||||
template <class P>
|
template <class P>
|
||||||
typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
|
typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
|
||||||
u32 wanted_size) {
|
u32 wanted_size) {
|
||||||
|
static constexpr int STREAM_LEAP_THRESHOLD = 16;
|
||||||
std::vector<BufferId> overlap_ids;
|
std::vector<BufferId> overlap_ids;
|
||||||
VAddr begin = cpu_addr;
|
VAddr begin = cpu_addr;
|
||||||
VAddr end = cpu_addr + wanted_size;
|
VAddr end = cpu_addr + wanted_size;
|
||||||
|
int stream_score = 0;
|
||||||
|
bool has_stream_leap = false;
|
||||||
for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) {
|
for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) {
|
||||||
const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
|
const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
|
||||||
if (!overlap_id) {
|
if (!overlap_id) {
|
||||||
|
@ -1012,26 +1016,38 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
|
||||||
if (overlap.IsPicked()) {
|
if (overlap.IsPicked()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
overlap.Pick();
|
|
||||||
overlap_ids.push_back(overlap_id);
|
overlap_ids.push_back(overlap_id);
|
||||||
|
overlap.Pick();
|
||||||
const VAddr overlap_cpu_addr = overlap.CpuAddr();
|
const VAddr overlap_cpu_addr = overlap.CpuAddr();
|
||||||
if (overlap_cpu_addr < begin) {
|
if (overlap_cpu_addr < begin) {
|
||||||
cpu_addr = begin = overlap_cpu_addr;
|
cpu_addr = begin = overlap_cpu_addr;
|
||||||
}
|
}
|
||||||
end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
|
end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
|
||||||
|
|
||||||
|
stream_score += overlap.StreamScore();
|
||||||
|
if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
|
||||||
|
// When this memory region has been joined a bunch of times, we assume it's being used
|
||||||
|
// as a stream buffer. Increase the size to skip constantly recreating buffers.
|
||||||
|
has_stream_leap = true;
|
||||||
|
end += PAGE_SIZE * 256;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return OverlapResult{
|
return OverlapResult{
|
||||||
.ids = std::move(overlap_ids),
|
.ids = std::move(overlap_ids),
|
||||||
.begin = begin,
|
.begin = begin,
|
||||||
.end = end,
|
.end = end,
|
||||||
|
.has_stream_leap = has_stream_leap,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id) {
|
void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
||||||
|
bool accumulate_stream_score) {
|
||||||
Buffer& new_buffer = slot_buffers[new_buffer_id];
|
Buffer& new_buffer = slot_buffers[new_buffer_id];
|
||||||
Buffer& overlap = slot_buffers[overlap_id];
|
Buffer& overlap = slot_buffers[overlap_id];
|
||||||
|
if (accumulate_stream_score) {
|
||||||
|
new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
|
||||||
|
}
|
||||||
std::vector<BufferCopy> copies;
|
std::vector<BufferCopy> copies;
|
||||||
const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
|
const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
|
||||||
overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) {
|
overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) {
|
||||||
|
@ -1056,7 +1072,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
|
||||||
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
||||||
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
|
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
|
||||||
for (const BufferId overlap_id : overlap.ids) {
|
for (const BufferId overlap_id : overlap.ids) {
|
||||||
JoinOverlap(new_buffer_id, overlap_id);
|
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||||
}
|
}
|
||||||
Register(new_buffer_id);
|
Register(new_buffer_id);
|
||||||
return new_buffer_id;
|
return new_buffer_id;
|
||||||
|
|
Reference in New Issue