texture_cache: Implement Buffer Copy and detect Turing GPUs Image Copies
This commit is contained in:
parent
228f516bb4
commit
60bf761afb
|
@ -28,6 +28,7 @@ Device::Device() {
|
||||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||||
has_variable_aoffi = TestVariableAoffi();
|
has_variable_aoffi = TestVariableAoffi();
|
||||||
has_component_indexing_bug = TestComponentIndexingBug();
|
has_component_indexing_bug = TestComponentIndexingBug();
|
||||||
|
is_turing_plus = GLAD_GL_NV_mesh_shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
Device::Device(std::nullptr_t) {
|
Device::Device(std::nullptr_t) {
|
||||||
|
|
|
@ -34,6 +34,10 @@ public:
|
||||||
return has_component_indexing_bug;
|
return has_component_indexing_bug;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsTuringGPU() const {
|
||||||
|
return is_turing_plus;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool TestVariableAoffi();
|
static bool TestVariableAoffi();
|
||||||
static bool TestComponentIndexingBug();
|
static bool TestComponentIndexingBug();
|
||||||
|
@ -43,6 +47,7 @@ private:
|
||||||
u32 max_varyings{};
|
u32 max_varyings{};
|
||||||
bool has_variable_aoffi{};
|
bool has_variable_aoffi{};
|
||||||
bool has_component_indexing_bug{};
|
bool has_component_indexing_bug{};
|
||||||
|
bool is_turing_plus{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -82,7 +82,7 @@ struct DrawParameters {
|
||||||
|
|
||||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||||
ScreenInfo& info)
|
ScreenInfo& info)
|
||||||
: texture_cache{system, *this}, shader_cache{*this, system, emu_window, device},
|
: texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
|
||||||
global_cache{*this}, system{system}, screen_info{info},
|
global_cache{*this}, system{system}, screen_info{info},
|
||||||
buffer_cache(*this, STREAM_BUFFER_SIZE) {
|
buffer_cache(*this, STREAM_BUFFER_SIZE) {
|
||||||
OpenGLState::ApplyDefaultState();
|
OpenGLState::ApplyDefaultState();
|
||||||
|
|
|
@ -148,6 +148,14 @@ void OGLBuffer::Release() {
|
||||||
handle = 0;
|
handle = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OGLBuffer::MakePersistant(std::size_t buffer_size) {
|
||||||
|
if (handle == 0 || buffer_size == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const GLbitfield flags = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT | GL_MAP_READ_BIT;
|
||||||
|
glNamedBufferStorage(handle, static_cast<GLsizeiptr>(buffer_size), nullptr, flags);
|
||||||
|
}
|
||||||
|
|
||||||
void OGLSync::Create() {
|
void OGLSync::Create() {
|
||||||
if (handle != 0)
|
if (handle != 0)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -186,6 +186,9 @@ public:
|
||||||
/// Deletes the internal OpenGL resource
|
/// Deletes the internal OpenGL resource
|
||||||
void Release();
|
void Release();
|
||||||
|
|
||||||
|
// Converts the buffer into a persistant storage buffer
|
||||||
|
void MakePersistant(std::size_t buffer_size);
|
||||||
|
|
||||||
GLuint handle = 0;
|
GLuint handle = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "common/bit_util.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
|
@ -435,8 +436,10 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
|
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
|
||||||
VideoCore::RasterizerInterface& rasterizer)
|
VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
const Device& device)
|
||||||
: TextureCacheBase{system, rasterizer} {
|
: TextureCacheBase{system, rasterizer} {
|
||||||
|
support_info.depth_color_image_copies = !device.IsTuringGPU();
|
||||||
src_framebuffer.Create();
|
src_framebuffer.Create();
|
||||||
dst_framebuffer.Create();
|
dst_framebuffer.Create();
|
||||||
}
|
}
|
||||||
|
@ -449,6 +452,14 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams
|
||||||
|
|
||||||
void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface,
|
void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface,
|
||||||
const VideoCommon::CopyParams& copy_params) {
|
const VideoCommon::CopyParams& copy_params) {
|
||||||
|
if (!support_info.depth_color_image_copies) {
|
||||||
|
const auto& src_params = src_surface->GetSurfaceParams();
|
||||||
|
const auto& dst_params = dst_surface->GetSurfaceParams();
|
||||||
|
if (src_params.type != dst_params.type) {
|
||||||
|
// A fallback is needed
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
const auto src_handle = src_surface->GetTexture();
|
const auto src_handle = src_surface->GetTexture();
|
||||||
const auto src_target = src_surface->GetTarget();
|
const auto src_target = src_surface->GetTarget();
|
||||||
const auto dst_handle = dst_surface->GetTexture();
|
const auto dst_handle = dst_surface->GetTexture();
|
||||||
|
@ -517,4 +528,83 @@ void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view,
|
||||||
is_linear ? GL_LINEAR : GL_NEAREST);
|
is_linear ? GL_LINEAR : GL_NEAREST);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TextureCacheOpenGL::BufferCopy(Surface src_surface, Surface dst_surface) {
|
||||||
|
const auto& src_params = src_surface->GetSurfaceParams();
|
||||||
|
const auto& dst_params = dst_surface->GetSurfaceParams();
|
||||||
|
|
||||||
|
const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
|
||||||
|
const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
|
||||||
|
|
||||||
|
const std::size_t source_size = src_surface->GetHostSizeInBytes();
|
||||||
|
const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
|
||||||
|
|
||||||
|
const std::size_t buffer_size = std::max(source_size, dest_size);
|
||||||
|
|
||||||
|
GLuint copy_pbo_handle = FetchPBO(buffer_size);
|
||||||
|
|
||||||
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
|
||||||
|
|
||||||
|
if (source_format.compressed) {
|
||||||
|
glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
|
||||||
|
nullptr);
|
||||||
|
} else {
|
||||||
|
glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
|
||||||
|
static_cast<GLsizei>(source_size), nullptr);
|
||||||
|
}
|
||||||
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||||
|
|
||||||
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
|
||||||
|
|
||||||
|
const GLsizei width = static_cast<GLsizei>(dst_params.width);
|
||||||
|
const GLsizei height = static_cast<GLsizei>(dst_params.height);
|
||||||
|
const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
|
||||||
|
if (dest_format.compressed) {
|
||||||
|
LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
|
||||||
|
UNREACHABLE();
|
||||||
|
} else {
|
||||||
|
switch (dst_params.target) {
|
||||||
|
case SurfaceTarget::Texture1D:
|
||||||
|
glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
|
||||||
|
dest_format.type, nullptr);
|
||||||
|
break;
|
||||||
|
case SurfaceTarget::Texture2D:
|
||||||
|
glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
|
||||||
|
dest_format.format, dest_format.type, nullptr);
|
||||||
|
break;
|
||||||
|
case SurfaceTarget::Texture3D:
|
||||||
|
case SurfaceTarget::Texture2DArray:
|
||||||
|
case SurfaceTarget::TextureCubeArray:
|
||||||
|
glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
|
||||||
|
dest_format.format, dest_format.type, nullptr);
|
||||||
|
break;
|
||||||
|
case SurfaceTarget::TextureCubemap:
|
||||||
|
glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
|
||||||
|
dest_format.format, dest_format.type, nullptr);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
|
||||||
|
static_cast<u32>(dst_params.target));
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||||
|
|
||||||
|
glTextureBarrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) {
|
||||||
|
if (buffer_size < 0) {
|
||||||
|
UNREACHABLE();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size));
|
||||||
|
OGLBuffer& cp = copy_pbo_cache[l2];
|
||||||
|
if (cp.handle == 0) {
|
||||||
|
const std::size_t ceil_size = 1ULL << l2;
|
||||||
|
cp.Create();
|
||||||
|
cp.MakePersistant(ceil_size);
|
||||||
|
}
|
||||||
|
return cp.handle;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/shader_bytecode.h"
|
#include "video_core/engines/shader_bytecode.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
#include "video_core/texture_cache/texture_cache.h"
|
#include "video_core/texture_cache/texture_cache.h"
|
||||||
|
|
||||||
|
@ -129,7 +130,8 @@ private:
|
||||||
|
|
||||||
class TextureCacheOpenGL final : public TextureCacheBase {
|
class TextureCacheOpenGL final : public TextureCacheBase {
|
||||||
public:
|
public:
|
||||||
explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
|
explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
const Device& device);
|
||||||
~TextureCacheOpenGL();
|
~TextureCacheOpenGL();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -141,9 +143,14 @@ protected:
|
||||||
void ImageBlit(View src_view, View dst_view,
|
void ImageBlit(View src_view, View dst_view,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||||
|
|
||||||
|
void BufferCopy(Surface src_surface, Surface dst_surface) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
GLuint FetchPBO(std::size_t buffer_size);
|
||||||
|
|
||||||
OGLFramebuffer src_framebuffer;
|
OGLFramebuffer src_framebuffer;
|
||||||
OGLFramebuffer dst_framebuffer;
|
OGLFramebuffer dst_framebuffer;
|
||||||
|
std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -214,6 +214,13 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
// This structure is used for communicating with the backend, on which behaviors
|
||||||
|
// it supports and what not, to avoid assuming certain things about hardware.
|
||||||
|
// The backend is RESPONSIBLE for filling this settings on creation.
|
||||||
|
struct Support {
|
||||||
|
bool depth_color_image_copies;
|
||||||
|
} support_info;
|
||||||
|
|
||||||
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
||||||
: system{system}, rasterizer{rasterizer} {
|
: system{system}, rasterizer{rasterizer} {
|
||||||
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
|
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
|
||||||
|
@ -233,6 +240,10 @@ protected:
|
||||||
virtual void ImageBlit(TView src_view, TView dst_view,
|
virtual void ImageBlit(TView src_view, TView dst_view,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
|
const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
|
||||||
|
|
||||||
|
// Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
|
||||||
|
// and reading it from a sepparate buffer.
|
||||||
|
virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0;
|
||||||
|
|
||||||
void Register(TSurface surface) {
|
void Register(TSurface surface) {
|
||||||
std::lock_guard lock{mutex};
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
|
@ -377,9 +388,14 @@ private:
|
||||||
const SurfaceParams& params) {
|
const SurfaceParams& params) {
|
||||||
const auto gpu_addr = current_surface->GetGpuAddr();
|
const auto gpu_addr = current_surface->GetGpuAddr();
|
||||||
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
|
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
|
||||||
std::vector<CopyParams> bricks = current_surface->BreakDown(params);
|
const auto& cr_params = current_surface->GetSurfaceParams();
|
||||||
for (auto& brick : bricks) {
|
if (!support_info.depth_color_image_copies && cr_params.type != params.type) {
|
||||||
ImageCopy(current_surface, new_surface, brick);
|
BufferCopy(current_surface, new_surface);
|
||||||
|
} else {
|
||||||
|
std::vector<CopyParams> bricks = current_surface->BreakDown(params);
|
||||||
|
for (auto& brick : bricks) {
|
||||||
|
ImageCopy(current_surface, new_surface, brick);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Unregister(current_surface);
|
Unregister(current_surface);
|
||||||
Register(new_surface);
|
Register(new_surface);
|
||||||
|
@ -505,7 +521,8 @@ private:
|
||||||
auto topological_result = current_surface->MatchesTopology(params);
|
auto topological_result = current_surface->MatchesTopology(params);
|
||||||
if (topological_result != MatchTopologyResult::FullMatch) {
|
if (topological_result != MatchTopologyResult::FullMatch) {
|
||||||
std::vector<TSurface> overlaps{current_surface};
|
std::vector<TSurface> overlaps{current_surface};
|
||||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result);
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||||
|
topological_result);
|
||||||
}
|
}
|
||||||
MatchStructureResult s_result = current_surface->MatchesStructure(params);
|
MatchStructureResult s_result = current_surface->MatchesStructure(params);
|
||||||
if (s_result != MatchStructureResult::None &&
|
if (s_result != MatchStructureResult::None &&
|
||||||
|
@ -537,7 +554,8 @@ private:
|
||||||
for (auto surface : overlaps) {
|
for (auto surface : overlaps) {
|
||||||
auto topological_result = surface->MatchesTopology(params);
|
auto topological_result = surface->MatchesTopology(params);
|
||||||
if (topological_result != MatchTopologyResult::FullMatch) {
|
if (topological_result != MatchTopologyResult::FullMatch) {
|
||||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result);
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||||
|
topological_result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -555,7 +573,8 @@ private:
|
||||||
return *view;
|
return *view;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch);
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||||
|
MatchTopologyResult::FullMatch);
|
||||||
}
|
}
|
||||||
// Now we check if the candidate is a mipmap/layer of the overlap
|
// Now we check if the candidate is a mipmap/layer of the overlap
|
||||||
std::optional<TView> view =
|
std::optional<TView> view =
|
||||||
|
@ -578,13 +597,15 @@ private:
|
||||||
pair.first->EmplaceView(params, gpu_addr, candidate_size);
|
pair.first->EmplaceView(params, gpu_addr, candidate_size);
|
||||||
if (mirage_view)
|
if (mirage_view)
|
||||||
return {pair.first, *mirage_view};
|
return {pair.first, *mirage_view};
|
||||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch);
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||||
|
MatchTopologyResult::FullMatch);
|
||||||
}
|
}
|
||||||
return {current_surface, *view};
|
return {current_surface, *view};
|
||||||
}
|
}
|
||||||
// The next case is unsafe, so if we r in accurate GPU, just skip it
|
// The next case is unsafe, so if we r in accurate GPU, just skip it
|
||||||
if (Settings::values.use_accurate_gpu_emulation) {
|
if (Settings::values.use_accurate_gpu_emulation) {
|
||||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch);
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||||
|
MatchTopologyResult::FullMatch);
|
||||||
}
|
}
|
||||||
// This is the case the texture is a part of the parent.
|
// This is the case the texture is a part of the parent.
|
||||||
if (current_surface->MatchesSubTexture(params, gpu_addr)) {
|
if (current_surface->MatchesSubTexture(params, gpu_addr)) {
|
||||||
|
@ -601,7 +622,8 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We failed all the tests, recycle the overlaps into a new texture.
|
// We failed all the tests, recycle the overlaps into a new texture.
|
||||||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch);
|
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
|
||||||
|
MatchTopologyResult::FullMatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
|
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
|
||||||
|
|
Reference in New Issue