gl_rasterizer_cache: Implement caching for texture and framebuffer surfaces.
gl_rasterizer_cache: Improved cache management based on Citra's implementation. gl_surface_cache: Add some docstrings.
This commit is contained in:
parent
8af1ae46aa
commit
1dd754590f
|
@ -435,22 +435,35 @@ void RasterizerOpenGL::DrawArrays() {
|
||||||
|
|
||||||
// Mark framebuffer surfaces as dirty
|
// Mark framebuffer surfaces as dirty
|
||||||
if (color_surface != nullptr && write_color_fb) {
|
if (color_surface != nullptr && write_color_fb) {
|
||||||
res_cache.FlushSurface(color_surface);
|
res_cache.MarkSurfaceAsDirty(color_surface);
|
||||||
}
|
}
|
||||||
if (depth_surface != nullptr && write_depth_fb) {
|
if (depth_surface != nullptr && write_depth_fb) {
|
||||||
res_cache.FlushSurface(depth_surface);
|
res_cache.MarkSurfaceAsDirty(depth_surface);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
|
void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAll() {}
|
void RasterizerOpenGL::FlushAll() {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
|
res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {}
|
void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
|
res_cache.FlushRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {}
|
void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
|
res_cache.InvalidateRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {}
|
void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
|
res_cache.FlushRegion(addr, size);
|
||||||
|
res_cache.InvalidateRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
|
bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/hle/kernel/process.h"
|
#include "core/hle/kernel/process.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
#include "core/settings.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
|
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
|
||||||
#include "video_core/textures/astc.h"
|
#include "video_core/textures/astc.h"
|
||||||
|
@ -215,7 +216,7 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
|
||||||
cur_state.Apply();
|
cur_state.Apply();
|
||||||
}
|
}
|
||||||
|
|
||||||
CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) {
|
CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
|
||||||
texture.Create();
|
texture.Create();
|
||||||
const auto& rect{params.GetRect()};
|
const auto& rect{params.GetRect()};
|
||||||
AllocateSurfaceTexture(texture.handle,
|
AllocateSurfaceTexture(texture.handle,
|
||||||
|
@ -370,6 +371,12 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
|
||||||
draw_framebuffer.Create();
|
draw_framebuffer.Create();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
|
||||||
|
while (!surface_cache.empty()) {
|
||||||
|
UnregisterSurface(surface_cache.begin()->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
|
Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
|
||||||
return GetSurface(SurfaceParams::CreateForTexture(config));
|
return GetSurface(SurfaceParams::CreateForTexture(config));
|
||||||
}
|
}
|
||||||
|
@ -425,9 +432,17 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
|
||||||
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
|
void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
|
||||||
surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
if (Settings::values.use_accurate_framebuffers) {
|
||||||
surface->FlushGLBuffer();
|
// If enabled, always flush dirty surfaces
|
||||||
|
surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
|
||||||
|
surface->FlushGLBuffer();
|
||||||
|
} else {
|
||||||
|
// Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
|
||||||
|
// and flushes are very slow and do not seem to improve accuracy
|
||||||
|
const auto& params{surface->GetSurfaceParams()};
|
||||||
|
Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
|
Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
|
||||||
|
@ -441,13 +456,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
|
||||||
Surface surface;
|
Surface surface;
|
||||||
if (search != surface_cache.end()) {
|
if (search != surface_cache.end()) {
|
||||||
surface = search->second;
|
surface = search->second;
|
||||||
|
if (Settings::values.use_accurate_framebuffers) {
|
||||||
|
// Reload the surface from Switch memory
|
||||||
|
LoadSurface(surface);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
surface = std::make_shared<CachedSurface>(params);
|
surface = std::make_shared<CachedSurface>(params);
|
||||||
surface_cache[surface_key] = surface;
|
RegisterSurface(surface);
|
||||||
|
LoadSurface(surface);
|
||||||
}
|
}
|
||||||
|
|
||||||
LoadSurface(surface);
|
|
||||||
|
|
||||||
return surface;
|
return surface;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -476,3 +494,87 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
|
||||||
|
|
||||||
return surfaces[0];
|
return surfaces[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) {
|
||||||
|
// TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should
|
||||||
|
// probably implement this in the future, but for now, the `use_accurate_framebufers` setting
|
||||||
|
// can be used to always flush.
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) {
|
||||||
|
for (const auto& pair : surface_cache) {
|
||||||
|
const auto& surface{pair.second};
|
||||||
|
const auto& params{surface->GetSurfaceParams()};
|
||||||
|
|
||||||
|
if (params.IsOverlappingRegion(addr, size)) {
|
||||||
|
UnregisterSurface(surface);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
|
||||||
|
const auto& params{surface->GetSurfaceParams()};
|
||||||
|
const auto& surface_key{SurfaceKey::Create(params)};
|
||||||
|
const auto& search{surface_cache.find(surface_key)};
|
||||||
|
|
||||||
|
if (search != surface_cache.end()) {
|
||||||
|
// Registered already
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
surface_cache[surface_key] = surface;
|
||||||
|
UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
|
||||||
|
const auto& params{surface->GetSurfaceParams()};
|
||||||
|
const auto& surface_key{SurfaceKey::Create(params)};
|
||||||
|
const auto& search{surface_cache.find(surface_key)};
|
||||||
|
|
||||||
|
if (search == surface_cache.end()) {
|
||||||
|
// Unregistered already
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1);
|
||||||
|
surface_cache.erase(search);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Map, typename Interval>
|
||||||
|
constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
|
||||||
|
return boost::make_iterator_range(map.equal_range(interval));
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
|
||||||
|
const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) -
|
||||||
|
(addr >> Tegra::MemoryManager::PAGE_BITS) + 1;
|
||||||
|
const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS;
|
||||||
|
const u64 page_end = page_start + num_pages;
|
||||||
|
|
||||||
|
// Interval maps will erase segments if count reaches 0, so if delta is negative we have to
|
||||||
|
// subtract after iterating
|
||||||
|
const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end);
|
||||||
|
if (delta > 0)
|
||||||
|
cached_pages.add({pages_interval, delta});
|
||||||
|
|
||||||
|
for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
|
||||||
|
const auto interval = pair.first & pages_interval;
|
||||||
|
const int count = pair.second;
|
||||||
|
|
||||||
|
const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval)
|
||||||
|
<< Tegra::MemoryManager::PAGE_BITS;
|
||||||
|
const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval)
|
||||||
|
<< Tegra::MemoryManager::PAGE_BITS;
|
||||||
|
const u64 interval_size = interval_end_addr - interval_start_addr;
|
||||||
|
|
||||||
|
if (delta > 0 && count == delta)
|
||||||
|
Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
|
||||||
|
else if (delta < 0 && count == -delta)
|
||||||
|
Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
|
||||||
|
else
|
||||||
|
ASSERT(count >= 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delta < 0)
|
||||||
|
cached_pages.add({pages_interval, delta});
|
||||||
|
}
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <boost/icl/interval_map.hpp>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/hash.h"
|
#include "common/hash.h"
|
||||||
#include "common/math_util.h"
|
#include "common/math_util.h"
|
||||||
|
@ -19,6 +19,7 @@
|
||||||
class CachedSurface;
|
class CachedSurface;
|
||||||
using Surface = std::shared_ptr<CachedSurface>;
|
using Surface = std::shared_ptr<CachedSurface>;
|
||||||
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
|
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
|
||||||
|
using PageMap = boost::icl::interval_map<u64, int>;
|
||||||
|
|
||||||
struct SurfaceParams {
|
struct SurfaceParams {
|
||||||
enum class PixelFormat {
|
enum class PixelFormat {
|
||||||
|
@ -243,8 +244,10 @@ struct SurfaceParams {
|
||||||
return SurfaceType::Invalid;
|
return SurfaceType::Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the rectangle corresponding to this surface
|
||||||
MathUtil::Rectangle<u32> GetRect() const;
|
MathUtil::Rectangle<u32> GetRect() const;
|
||||||
|
|
||||||
|
/// Returns the size of this surface in bytes, adjusted for compression
|
||||||
size_t SizeInBytes() const {
|
size_t SizeInBytes() const {
|
||||||
const u32 compression_factor{GetCompressionFactor(pixel_format)};
|
const u32 compression_factor{GetCompressionFactor(pixel_format)};
|
||||||
ASSERT(width % compression_factor == 0);
|
ASSERT(width % compression_factor == 0);
|
||||||
|
@ -253,10 +256,18 @@ struct SurfaceParams {
|
||||||
GetFormatBpp(pixel_format) / CHAR_BIT;
|
GetFormatBpp(pixel_format) / CHAR_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the CPU virtual address for this surface
|
||||||
VAddr GetCpuAddr() const;
|
VAddr GetCpuAddr() const;
|
||||||
|
|
||||||
|
/// Returns true if the specified region overlaps with this surface's region in Switch memory
|
||||||
|
bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
|
||||||
|
return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates SurfaceParams from a texture configation
|
||||||
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
|
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
|
||||||
|
|
||||||
|
/// Creates SurfaceParams from a framebuffer configation
|
||||||
static SurfaceParams CreateForFramebuffer(
|
static SurfaceParams CreateForFramebuffer(
|
||||||
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
|
const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
|
||||||
|
|
||||||
|
@ -272,6 +283,7 @@ struct SurfaceParams {
|
||||||
size_t size_in_bytes;
|
size_t size_in_bytes;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Hashable variation of SurfaceParams, used for a key in the surface cache
|
||||||
struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
|
struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
|
||||||
static SurfaceKey Create(const SurfaceParams& params) {
|
static SurfaceKey Create(const SurfaceParams& params) {
|
||||||
SurfaceKey res;
|
SurfaceKey res;
|
||||||
|
@ -325,18 +337,43 @@ private:
|
||||||
class RasterizerCacheOpenGL final : NonCopyable {
|
class RasterizerCacheOpenGL final : NonCopyable {
|
||||||
public:
|
public:
|
||||||
RasterizerCacheOpenGL();
|
RasterizerCacheOpenGL();
|
||||||
|
~RasterizerCacheOpenGL();
|
||||||
|
|
||||||
|
/// Get a surface based on the texture configuration
|
||||||
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
|
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
|
||||||
|
|
||||||
|
/// Get the color and depth surfaces based on the framebuffer configuration
|
||||||
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
|
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
|
||||||
const MathUtil::Rectangle<s32>& viewport);
|
const MathUtil::Rectangle<s32>& viewport);
|
||||||
void LoadSurface(const Surface& surface);
|
|
||||||
void FlushSurface(const Surface& surface);
|
/// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
|
||||||
|
void MarkSurfaceAsDirty(const Surface& surface);
|
||||||
|
|
||||||
|
/// Tries to find a framebuffer GPU address based on the provided CPU address
|
||||||
Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
|
Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
|
||||||
|
|
||||||
|
/// Write any cached resources overlapping the region back to memory (if dirty)
|
||||||
|
void FlushRegion(Tegra::GPUVAddr addr, size_t size);
|
||||||
|
|
||||||
|
/// Mark the specified region as being invalidated
|
||||||
|
void InvalidateRegion(Tegra::GPUVAddr addr, size_t size);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void LoadSurface(const Surface& surface);
|
||||||
Surface GetSurface(const SurfaceParams& params);
|
Surface GetSurface(const SurfaceParams& params);
|
||||||
|
|
||||||
|
/// Register surface into the cache
|
||||||
|
void RegisterSurface(const Surface& surface);
|
||||||
|
|
||||||
|
/// Remove surface from the cache
|
||||||
|
void UnregisterSurface(const Surface& surface);
|
||||||
|
|
||||||
|
/// Increase/decrease the number of surface in pages touching the specified region
|
||||||
|
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
|
||||||
|
|
||||||
std::unordered_map<SurfaceKey, Surface> surface_cache;
|
std::unordered_map<SurfaceKey, Surface> surface_cache;
|
||||||
|
PageMap cached_pages;
|
||||||
|
|
||||||
OGLFramebuffer read_framebuffer;
|
OGLFramebuffer read_framebuffer;
|
||||||
OGLFramebuffer draw_framebuffer;
|
OGLFramebuffer draw_framebuffer;
|
||||||
};
|
};
|
||||||
|
|
Reference in New Issue