From 1a9df83535589ced8c3bc66ffa620b3cb6d86074 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Mar 2018 20:04:35 -0400 Subject: [PATCH 01/29] renderer_opengl: Only draw the screen if a framebuffer is specified. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1a24855d7..85e91c0e2 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -100,6 +100,8 @@ RendererOpenGL::~RendererOpenGL() = default; /// Swap buffers (render frame) void RendererOpenGL::SwapBuffers(boost::optional framebuffer) { + Core::System::GetInstance().perf_stats.EndSystemFrame(); + // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.Apply(); @@ -114,20 +116,19 @@ void RendererOpenGL::SwapBuffers(boost::optionalSwapBuffers(); } - DrawScreens(); - - Core::System::GetInstance().perf_stats.EndSystemFrame(); - - // Swap buffers render_window->PollEvents(); - render_window->SwapBuffers(); Core::System::GetInstance().frame_limiter.DoFrameLimiting(CoreTiming::GetGlobalTimeUs()); Core::System::GetInstance().perf_stats.BeginSystemFrame(); + // Restore the rasterizer state prev_state.Apply(); RefreshRasterizerSetting(); } From 94c70693f941484c40fae7c321ef9f3d866de684 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 00:45:24 -0400 Subject: [PATCH 02/29] maxwell: Add RenderTargetFormat enum. --- src/video_core/engines/maxwell_3d.h | 7 ++++--- src/video_core/gpu.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8e2d888e7..3c49cd27f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -11,6 +11,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/textures/texture.h" @@ -167,9 +168,9 @@ public: struct { u32 address_high; u32 address_low; - u32 horiz; - u32 vert; - u32 format; + u32 width; + u32 height; + Tegra::RenderTargetFormat format; u32 block_dimensions; u32 array_mode; u32 layer_stride; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 8183b12e9..ab24504a6 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -13,7 +13,7 @@ namespace Tegra { -enum class RenderTargetFormat { +enum class RenderTargetFormat : u32 { RGBA8_UNORM = 0xD5, }; From 170ac3f9ee3c2f3fe0b22f11e4933373e7742fd6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 00:47:33 -0400 Subject: [PATCH 03/29] gl_rasterizer_cache: Implement GetFramebufferSurfaces. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 97 ++++++++++++++++++- .../renderer_opengl/gl_rasterizer_cache.h | 11 ++- 2 files changed, 104 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 78fa7c051..81b4a64a7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -21,10 +21,13 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/vector_math.h" +#include "core/core.h" #include "core/frontend/emu_window.h" +#include "core/hle/kernel/process.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/utils.h" @@ -1098,9 +1101,97 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) { } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport_rect) { - UNIMPLEMENTED(); - return {}; + bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + const auto& config = regs.rt[0]; + + // TODO(bunnei): This is hard corded to use just the first render buffer + LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); + + // update resolution_scale_factor and reset cache if changed + // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We + // need to fix this before making the renderer multi-threaded. + static u16 resolution_scale_factor = GetResolutionScaleFactor(); + if (resolution_scale_factor != GetResolutionScaleFactor()) { + resolution_scale_factor = GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + } + + MathUtil::Rectangle viewport_clamped{ + static_cast(MathUtil::Clamp(viewport.left, 0, static_cast(config.width))), + static_cast(MathUtil::Clamp(viewport.top, 0, static_cast(config.height))), + static_cast(MathUtil::Clamp(viewport.right, 0, static_cast(config.width))), + static_cast(MathUtil::Clamp(viewport.bottom, 0, static_cast(config.height)))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.width; + color_params.height = config.height; + SurfaceParams depth_params = color_params; + + color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); + color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); + color_params.UpdateParams(); + + ASSERT(!using_depth_fb, "depth buffer is unimplemented"); + // depth_params.addr = config.GetDepthBufferPhysicalAddress(); + // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + // depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + MathUtil::Rectangle color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); } Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 14f3cdc38..0e1c481d7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -116,6 +116,15 @@ struct SurfaceParams { return GetFormatBpp(pixel_format); } + static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { + switch (format) { + case Tegra::RenderTargetFormat::RGBA8_UNORM: + return PixelFormat::RGBA8; + default: + UNREACHABLE(); + } + } + static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: @@ -308,7 +317,7 @@ public: /// Get the color and depth surfaces based on the framebuffer configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, - const MathUtil::Rectangle& viewport_rect); + const MathUtil::Rectangle& viewport); /// Get a surface that matches the fill config Surface GetFillSurface(const void* config); From 8041d72a1ff403a4773bfccebdb7e3162061efd4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 00:49:32 -0400 Subject: [PATCH 04/29] gl_rasterizer_cache: MortonCopy Switch-style. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 104 ++++++------------ 1 file changed, 32 insertions(+), 72 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 81b4a64a7..f556dbc41 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -113,65 +113,26 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { template static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; - constexpr u32 tile_size = bytes_per_pixel * 64; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); - gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; - const VAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); - const VAddr aligned_start = base + Common::AlignUp(start - base, tile_size); - const VAddr aligned_end = base + Common::AlignDown(end - base, tile_size); - - ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); - - const u64 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; - u32 x = static_cast((begin_pixel_index % (stride * 8)) / 8); - u32 y = static_cast((begin_pixel_index / (stride * 8)) * 8); - - gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; - - auto glbuf_next_tile = [&] { - x = (x + 8) % stride; - gl_buffer += 8 * gl_bytes_per_pixel; - if (!x) { - y += 8; - gl_buffer -= stride * 9 * gl_bytes_per_pixel; - } - }; - - u8* tile_buffer = Memory::GetPointer(start); - - if (start < aligned_start && !morton_to_gl) { - std::array tmp_buf; - MortonCopyTile(stride, &tmp_buf[0], gl_buffer); - std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], - std::min(aligned_start, end) - start); - - tile_buffer += aligned_start - start; - glbuf_next_tile(); - } - - const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; - while (tile_buffer < buffer_end) { - MortonCopyTile(stride, tile_buffer, gl_buffer); - tile_buffer += tile_size; - glbuf_next_tile(); - } - - if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { - std::array tmp_buf; - MortonCopyTile(stride, &tmp_buf[0], gl_buffer); - std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); - } + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the + // configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(base), gl_buffer, morton_to_gl); } static constexpr std::array morton_to_gl_fns = { - MortonCopy, // 0 - MortonCopy, // 1 - MortonCopy, // 2 - MortonCopy, // 3 - MortonCopy, // 4 + MortonCopy, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -180,19 +141,19 @@ static constexpr std::array mo nullptr, nullptr, nullptr, - nullptr, // 5 - 13 - MortonCopy, // 14 - nullptr, // 15 - MortonCopy, // 16 - MortonCopy // 17 }; static constexpr std::array gl_to_morton_fns = { - MortonCopy, // 0 - MortonCopy, // 1 - MortonCopy, // 2 - MortonCopy, // 3 - MortonCopy, // 4 + MortonCopy, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -201,11 +162,6 @@ static constexpr std::array gl nullptr, nullptr, nullptr, - nullptr, // 5 - 13 - MortonCopy, // 14 - nullptr, // 15 - MortonCopy, // 16 - MortonCopy // 17 }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -535,8 +491,7 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { ASSERT(type != SurfaceType::Fill); u8* texture_src_data = Memory::GetPointer(addr); - if (texture_src_data == nullptr) - return; + ASSERT(texture_src_data); if (gl_buffer == nullptr) { gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); @@ -551,11 +506,16 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { if (!is_tiled) { ASSERT(type == SurfaceType::Color); const u32 bytes_per_pixel{GetFormatBpp() >> 3}; + + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check + // the configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4, texture_src_data + start_offset, &gl_buffer[start_offset], true); } else { - ASSERT_MSG(false, "Unimplemented"); + morton_to_gl_fns[static_cast(pixel_format)](stride, height, &gl_buffer[0], addr, + load_start, load_end); } } From ed2134784e173e071a124c768eea5dd12be8425c Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 02:01:03 -0400 Subject: [PATCH 05/29] gl_rasterizer: Implement AnalyzeVertexArray. --- src/video_core/engines/maxwell_3d.h | 35 +++++++++++++++++++ .../renderer_opengl/gl_rasterizer.cpp | 22 +++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 3c49cd27f..869ddde90 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -229,6 +229,41 @@ public: BitField<21, 6, VertexSize> size; BitField<27, 3, VertexType> type; BitField<31, 1, u32> bgra; + + u32 SizeInBytes() const { + switch (size) { + case VertexSize::Size_32_32_32_32: + return 16; + case VertexSize::Size_32_32_32: + return 12; + case VertexSize::Size_16_16_16_16: + return 8; + case VertexSize::Size_32_32: + return 8; + case VertexSize::Size_16_16_16: + return 6; + case VertexSize::Size_8_8_8_8: + return 4; + case VertexSize::Size_16_16: + return 4; + case VertexSize::Size_32: + return 4; + case VertexSize::Size_8_8_8: + return 3; + case VertexSize::Size_8_8: + return 2; + case VertexSize::Size_16: + return 2; + case VertexSize::Size_8: + return 1; + case VertexSize::Size_10_10_10_2: + return 4; + case VertexSize::Size_11_11_10: + return 4; + default: + UNREACHABLE(); + } + } } vertex_attrib_format[NumVertexAttributes]; INSERT_PADDING_WORDS(0xF); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 286491b73..982e84768 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -14,7 +14,10 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/vector_math.h" +#include "core/core.h" +#include "core/hle/kernel/process.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/renderer_opengl.h" @@ -146,7 +149,24 @@ static constexpr std::array vs_attrib_types{ }; void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { - UNIMPLEMENTED(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const auto& vertex_attributes = regs.vertex_attrib_format; + + if (is_indexed) { + UNREACHABLE(); + } + const u32 vertex_num = regs.vertex_buffer.count; + + vs_input_size = 0; + u32 max_offset{}; + for (const auto& attrib : vertex_attributes) { + if (max_offset >= attrib.offset) { + continue; + } + max_offset = attrib.offset; + vs_input_size = max_offset + attrib.SizeInBytes(); + } + vs_input_size *= vertex_num; } void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { From 33c0bf9dc5939bf996bae1c611690944ac8ffbb2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 02:41:16 -0400 Subject: [PATCH 06/29] Maxwell3D: Call AccelerateDrawBatch on DrawArrays. --- src/video_core/engines/maxwell_3d.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 088d4357e..5359d21a2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,8 +7,11 @@ #include "core/core.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" +#include "video_core/video_core.h" namespace Tegra { namespace Engines { @@ -174,7 +177,9 @@ void Maxwell3D::ProcessQueryGet() { } void Maxwell3D::DrawArrays() { - LOG_WARNING(HW_GPU, "Game requested a DrawArrays, ignoring"); + LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(), + regs.vertex_buffer.count); + auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); if (debug_context) { @@ -184,6 +189,8 @@ void Maxwell3D::DrawArrays() { if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); } + + VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/); } void Maxwell3D::BindTextureInfoBuffer(const std::vector& parameters) { From 0162a2d5cbfb0e0df7f99187a87c031416fd79cc Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 03:59:51 -0400 Subject: [PATCH 07/29] gl_rasterizer: Implement DrawTriangles. --- .../renderer_opengl/gl_rasterizer.cpp | 195 +++++++++++++++++- 1 file changed, 194 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 982e84768..c44ce4a18 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -197,8 +197,201 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { } void RasterizerOpenGL::DrawTriangles() { + if (accelerate_draw == AccelDraw::Disabled) + return; + MICROPROFILE_SCOPE(OpenGL_Drawing); - UNIMPLEMENTED(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + // TODO(bunnei): Implement these + const bool has_stencil = false; + const bool using_color_fb = true; + const bool using_depth_fb = false; + + MathUtil::Rectangle viewport_rect_unscaled{ + static_cast(regs.viewport[0].x), // left + static_cast(regs.viewport[0].y + regs.viewport[0].height), // top + static_cast(regs.viewport[0].x + regs.viewport[0].width), // right + static_cast(regs.viewport[0].y) // bottom + }; + + const bool write_color_fb = + state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = + (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || + (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + + Surface color_surface; + Surface depth_surface; + MathUtil::Rectangle surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + + const u16 res_scale = color_surface != nullptr + ? color_surface->res_scale + : (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + MathUtil::Rectangle draw_rect{ + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.left) + + viewport_rect_unscaled.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast(MathUtil::Clamp(static_cast(surfaces_rect.bottom) + + viewport_rect_unscaled.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + + // Bind the framebuffer surfaces + state.draw.draw_framebuffer = framebuffer.handle; + state.Apply(); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + color_surface != nullptr ? color_surface->texture.handle : 0, 0); + if (depth_surface != nullptr) { + if (has_stencil) { + // attach both depth and stencil + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + } else { + // attach depth + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + // clear stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } + } else { + // clear both depth and stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } + + // Sync the viewport + state.viewport.x = + static_cast(surfaces_rect.left) + viewport_rect_unscaled.left * res_scale; + state.viewport.y = + static_cast(surfaces_rect.bottom) + viewport_rect_unscaled.bottom * res_scale; + state.viewport.width = static_cast(viewport_rect_unscaled.GetWidth() * res_scale); + state.viewport.height = static_cast(viewport_rect_unscaled.GetHeight() * res_scale); + + // TODO(bunnei): Sync framebuffer_scale uniform here + // TODO(bunnei): Sync scissorbox uniform(s) here + // TODO(bunnei): Sync and bind the texture surfaces + + // Sync and bind the shader + if (shader_dirty) { + SetShader(); + shader_dirty = false; + } + + // Sync the uniform data + if (uniform_block_data.dirty) { + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); + uniform_block_data.dirty = false; + } + + // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable + // scissor test to prevent drawing outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); + state.Apply(); + + // Draw the vertex batch + GLenum primitive_mode; + switch (regs.draw.topology) { + case Maxwell::PrimitiveTopology::TriangleStrip: + primitive_mode = GL_TRIANGLE_STRIP; + break; + default: + UNREACHABLE(); + } + + const bool is_indexed = accelerate_draw == AccelDraw::Indexed; + + AnalyzeVertexArray(is_indexed); + state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.Apply(); + + size_t buffer_size = static_cast(vs_input_size); + if (is_indexed) { + UNREACHABLE(); + } + buffer_size += sizeof(VSUniformData); + + size_t ptr_pos = 0; + u8* buffer_ptr; + GLintptr buffer_offset; + std::tie(buffer_ptr, buffer_offset) = + stream_buffer->Map(static_cast(buffer_size), 4); + + SetupVertexArray(buffer_ptr, buffer_offset); + ptr_pos += vs_input_size; + + GLintptr index_buffer_offset = 0; + if (is_indexed) { + UNREACHABLE(); + } + + SetupVertexShader(reinterpret_cast(&buffer_ptr[ptr_pos]), + buffer_offset + static_cast(ptr_pos)); + const GLintptr vs_ubo_offset = buffer_offset + static_cast(ptr_pos); + ptr_pos += sizeof(VSUniformData); + + stream_buffer->Unmap(); + + const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { + if (has_ARB_direct_state_access) { + glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); + } else { + glBindBuffer(GL_COPY_WRITE_BUFFER, handle); + glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); + } + }; + + copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); + + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); + + if (is_indexed) { + UNREACHABLE(); + } else { + glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count); + } + + // Disable scissor test + state.scissor.enabled = false; + + accelerate_draw = AccelDraw::Disabled; + + // Unbind textures for potential future use as framebuffer attachments + for (auto& texture_unit : state.texture_units) { + texture_unit.texture_2d = 0; + } + state.Apply(); + + // Mark framebuffer surfaces as dirty + MathUtil::Rectangle draw_rect_unscaled{ + draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, + draw_rect.bottom / res_scale}; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + depth_surface); + } } void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {} From 0ee38e136376964c0f80362952130258cc661c60 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 04:06:26 -0400 Subject: [PATCH 08/29] gl_rasterizer: Use 32 texture units instead of 3. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 + src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_opengl/gl_state.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c44ce4a18..fae9abd19 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -22,6 +22,7 @@ #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/renderer_opengl.h" +using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PixelFormat = SurfaceParams::PixelFormat; using SurfaceType = SurfaceParams::SurfaceType; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b387f383b..71a5437a2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -139,7 +139,7 @@ private: OGLVertexArray hw_vao; std::array hw_vao_enabled_attributes; - std::array texture_samplers; + std::array texture_samplers; static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; std::unique_ptr vertex_buffer; OGLBuffer uniform_buffer; diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 940575dfa..c1f4efc8c 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -85,7 +85,7 @@ public: struct { GLuint texture_2d; // GL_TEXTURE_BINDING_2D GLuint sampler; // GL_SAMPLER_BINDING - } texture_units[3]; + } texture_units[32]; struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER From 15925b82930cf619f81c26acca9cc8208e009149 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 16:26:14 -0400 Subject: [PATCH 09/29] maxwell_3d: Add VertexAttribute struct and cleanup. --- src/video_core/engines/maxwell_3d.h | 281 ++++++++++++++++------------ 1 file changed, 160 insertions(+), 121 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 869ddde90..c242786da 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -60,88 +60,169 @@ public: Fragment = 4, }; - enum class VertexSize : u32 { - Size_32_32_32_32 = 0x01, - Size_32_32_32 = 0x02, - Size_16_16_16_16 = 0x03, - Size_32_32 = 0x04, - Size_16_16_16 = 0x05, - Size_8_8_8_8 = 0x0a, - Size_16_16 = 0x0f, - Size_32 = 0x12, - Size_8_8_8 = 0x13, - Size_8_8 = 0x18, - Size_16 = 0x1b, - Size_8 = 0x1d, - Size_10_10_10_2 = 0x30, - Size_11_11_10 = 0x31, - }; + struct VertexAttribute { + enum class Size : u32 { + Size_32_32_32_32 = 0x01, + Size_32_32_32 = 0x02, + Size_16_16_16_16 = 0x03, + Size_32_32 = 0x04, + Size_16_16_16 = 0x05, + Size_8_8_8_8 = 0x0a, + Size_16_16 = 0x0f, + Size_32 = 0x12, + Size_8_8_8 = 0x13, + Size_8_8 = 0x18, + Size_16 = 0x1b, + Size_8 = 0x1d, + Size_10_10_10_2 = 0x30, + Size_11_11_10 = 0x31, + }; - static std::string VertexSizeToString(VertexSize vertex_size) { - switch (vertex_size) { - case VertexSize::Size_32_32_32_32: - return "32_32_32_32"; - case VertexSize::Size_32_32_32: - return "32_32_32"; - case VertexSize::Size_16_16_16_16: - return "16_16_16_16"; - case VertexSize::Size_32_32: - return "32_32"; - case VertexSize::Size_16_16_16: - return "16_16_16"; - case VertexSize::Size_8_8_8_8: - return "8_8_8_8"; - case VertexSize::Size_16_16: - return "16_16"; - case VertexSize::Size_32: - return "32"; - case VertexSize::Size_8_8_8: - return "8_8_8"; - case VertexSize::Size_8_8: - return "8_8"; - case VertexSize::Size_16: - return "16"; - case VertexSize::Size_8: - return "8"; - case VertexSize::Size_10_10_10_2: - return "10_10_10_2"; - case VertexSize::Size_11_11_10: - return "11_11_10"; + enum class Type : u32 { + SignedNorm = 1, + UnsignedNorm = 2, + SignedInt = 3, + UnsignedInt = 4, + UnsignedScaled = 5, + SignedScaled = 6, + Float = 7, + }; + + union { + BitField<0, 5, u32> buffer; + BitField<6, 1, u32> constant; + BitField<7, 14, u32> offset; + BitField<21, 6, Size> size; + BitField<27, 3, Type> type; + BitField<31, 1, u32> bgra; + }; + + u32 ComponentCount() const { + switch (size) { + case Size::Size_32_32_32_32: + return 4; + case Size::Size_32_32_32: + return 3; + case Size::Size_16_16_16_16: + return 4; + case Size::Size_32_32: + return 2; + case Size::Size_16_16_16: + return 3; + case Size::Size_8_8_8_8: + return 4; + case Size::Size_16_16: + return 2; + case Size::Size_32: + return 1; + case Size::Size_8_8_8: + return 3; + case Size::Size_8_8: + return 2; + case Size::Size_16: + return 1; + case Size::Size_8: + return 1; + case Size::Size_10_10_10_2: + return 4; + case Size::Size_11_11_10: + return 3; + default: + UNREACHABLE(); + } } - UNIMPLEMENTED(); - return {}; - } - enum class VertexType : u32 { - SignedNorm = 1, - UnsignedNorm = 2, - SignedInt = 3, - UnsignedInt = 4, - UnsignedScaled = 5, - SignedScaled = 6, - Float = 7, - }; - - static std::string VertexTypeToString(VertexType vertex_type) { - switch (vertex_type) { - case VertexType::SignedNorm: - return "SignedNorm"; - case VertexType::UnsignedNorm: - return "UnsignedNorm"; - case VertexType::SignedInt: - return "SignedInt"; - case VertexType::UnsignedInt: - return "UnsignedInt"; - case VertexType::UnsignedScaled: - return "UnsignedScaled"; - case VertexType::SignedScaled: - return "SignedScaled"; - case VertexType::Float: - return "Float"; + u32 SizeInBytes() const { + switch (size) { + case Size::Size_32_32_32_32: + return 16; + case Size::Size_32_32_32: + return 12; + case Size::Size_16_16_16_16: + return 8; + case Size::Size_32_32: + return 8; + case Size::Size_16_16_16: + return 6; + case Size::Size_8_8_8_8: + return 4; + case Size::Size_16_16: + return 4; + case Size::Size_32: + return 4; + case Size::Size_8_8_8: + return 3; + case Size::Size_8_8: + return 2; + case Size::Size_16: + return 2; + case Size::Size_8: + return 1; + case Size::Size_10_10_10_2: + return 4; + case Size::Size_11_11_10: + return 4; + default: + UNREACHABLE(); + } } - UNIMPLEMENTED(); - return {}; - } + + std::string SizeString() const { + switch (size) { + case Size::Size_32_32_32_32: + return "32_32_32_32"; + case Size::Size_32_32_32: + return "32_32_32"; + case Size::Size_16_16_16_16: + return "16_16_16_16"; + case Size::Size_32_32: + return "32_32"; + case Size::Size_16_16_16: + return "16_16_16"; + case Size::Size_8_8_8_8: + return "8_8_8_8"; + case Size::Size_16_16: + return "16_16"; + case Size::Size_32: + return "32"; + case Size::Size_8_8_8: + return "8_8_8"; + case Size::Size_8_8: + return "8_8"; + case Size::Size_16: + return "16"; + case Size::Size_8: + return "8"; + case Size::Size_10_10_10_2: + return "10_10_10_2"; + case Size::Size_11_11_10: + return "11_11_10"; + } + UNREACHABLE(); + return {}; + } + + std::string TypeToString() const { + switch (type) { + case Type::SignedNorm: + return "SignedNorm"; + case Type::UnsignedNorm: + return "UnsignedNorm"; + case Type::SignedInt: + return "SignedInt"; + case Type::UnsignedInt: + return "UnsignedInt"; + case Type::UnsignedScaled: + return "UnsignedScaled"; + case Type::SignedScaled: + return "SignedScaled"; + case Type::Float: + return "Float"; + } + UNREACHABLE(); + return {}; + } + }; enum class PrimitiveTopology : u32 { Points = 0x0, @@ -222,49 +303,7 @@ public: INSERT_PADDING_WORDS(0x5B); - union { - BitField<0, 5, u32> buffer; - BitField<6, 1, u32> constant; - BitField<7, 14, u32> offset; - BitField<21, 6, VertexSize> size; - BitField<27, 3, VertexType> type; - BitField<31, 1, u32> bgra; - - u32 SizeInBytes() const { - switch (size) { - case VertexSize::Size_32_32_32_32: - return 16; - case VertexSize::Size_32_32_32: - return 12; - case VertexSize::Size_16_16_16_16: - return 8; - case VertexSize::Size_32_32: - return 8; - case VertexSize::Size_16_16_16: - return 6; - case VertexSize::Size_8_8_8_8: - return 4; - case VertexSize::Size_16_16: - return 4; - case VertexSize::Size_32: - return 4; - case VertexSize::Size_8_8_8: - return 3; - case VertexSize::Size_8_8: - return 2; - case VertexSize::Size_16: - return 2; - case VertexSize::Size_8: - return 1; - case VertexSize::Size_10_10_10_2: - return 4; - case VertexSize::Size_11_11_10: - return 4; - default: - UNREACHABLE(); - } - } - } vertex_attrib_format[NumVertexAttributes]; + VertexAttribute vertex_attrib_format[NumVertexAttributes]; INSERT_PADDING_WORDS(0xF); From 3754e0fdfdb46a74965427160cde3818ffaa54c4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 21:04:23 -0400 Subject: [PATCH 10/29] maxwell_3d: Use names that match envytools for VertexType. --- src/video_core/engines/maxwell_3d.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index c242786da..9d63fa803 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -202,22 +202,22 @@ public: return {}; } - std::string TypeToString() const { + std::string TypeString() const { switch (type) { case Type::SignedNorm: - return "SignedNorm"; + return "SNORM"; case Type::UnsignedNorm: - return "UnsignedNorm"; + return "UNORM"; case Type::SignedInt: - return "SignedInt"; + return "SINT"; case Type::UnsignedInt: - return "UnsignedInt"; + return "UINT"; case Type::UnsignedScaled: - return "UnsignedScaled"; + return "USCALED"; case Type::SignedScaled: - return "SignedScaled"; + return "SSCALED"; case Type::Float: - return "Float"; + return "FLOAT"; } UNREACHABLE(); return {}; From 4369af6b7e266112cf4dd234f2008ac41df5c00e Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 21:09:30 -0400 Subject: [PATCH 11/29] maxwell_to_gl: Add module and function for decoding VertexType. --- src/video_core/CMakeLists.txt | 1 + .../renderer_opengl/maxwell_to_gl.h | 40 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 src/video_core/renderer_opengl/maxwell_to_gl.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3dab81769..841f27d7f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -31,6 +31,7 @@ add_library(video_core STATIC renderer_opengl/gl_state.h renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_stream_buffer.h + renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h textures/decoders.cpp diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h new file mode 100644 index 000000000..54859b5a0 --- /dev/null +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -0,0 +1,40 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/engines/maxwell_3d.h" + +namespace MaxwellToGL { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +inline GLenum VertexType(Maxwell::VertexAttribute attrib) { + switch (attrib.type) { + case Maxwell::VertexAttribute::Type::UnsignedNorm: { + + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return GL_UNSIGNED_BYTE; + } + + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString()); + UNREACHABLE(); + return {}; + } + + case Maxwell::VertexAttribute::Type::Float: + return GL_FLOAT; + } + + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString()); + UNREACHABLE(); + return {}; +} + +} // namespace MaxwellToGL From cd8bb6ea9b185abf2bec8706b2a3f0715a46fc1d Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 21:24:34 -0400 Subject: [PATCH 12/29] gl_rasterizer_cache: Fix an ASSERT_MSG. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f556dbc41..42d3730ee 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1098,7 +1098,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); color_params.UpdateParams(); - ASSERT(!using_depth_fb, "depth buffer is unimplemented"); + ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); // depth_params.addr = config.GetDepthBufferPhysicalAddress(); // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); // depth_params.UpdateParams(); From c2dbdefedf2496b383e4686fdf3227c8c9576b09 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 21:29:47 -0400 Subject: [PATCH 13/29] gl_rasterizer: Implement SetupVertexArray. --- .../renderer_opengl/gl_rasterizer.cpp | 58 ++++++++++++------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fae9abd19..41e4ece1e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -142,37 +142,55 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -static constexpr std::array vs_attrib_types{ - GL_BYTE, // VertexAttributeFormat::BYTE - GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE - GL_SHORT, // VertexAttributeFormat::SHORT - GL_FLOAT // VertexAttributeFormat::FLOAT -}; - void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - const auto& vertex_attributes = regs.vertex_attrib_format; if (is_indexed) { UNREACHABLE(); } - const u32 vertex_num = regs.vertex_buffer.count; - vs_input_size = 0; - u32 max_offset{}; - for (const auto& attrib : vertex_attributes) { - if (max_offset >= attrib.offset) { - continue; - } - max_offset = attrib.offset; - vs_input_size = max_offset + attrib.SizeInBytes(); - } - vs_input_size *= vertex_num; + // TODO(bunnei): Add support for 1+ vertex arrays + vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride; } void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); - UNIMPLEMENTED(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + + state.draw.vertex_array = hw_vao.handle; + state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.Apply(); + + // TODO(bunnei): Add support for 1+ vertex arrays + const auto& vertex_array{regs.vertex_array[0]}; + ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); + ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); + for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { + ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index); + } + + // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. + // Enables the first 16 vertex attributes always, as we don't know which ones are actually used + // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now + // to avoid OpenGL errors. + for (unsigned index = 0; index < 16; ++index) { + auto& attrib = regs.vertex_attrib_format[index]; + glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + GL_FALSE, vertex_array.stride, + reinterpret_cast(buffer_offset + attrib.offset)); + glEnableVertexAttribArray(index); + hw_vao_enabled_attributes[index] = true; + } + + // Copy vertex array data + const u32 data_size{vertex_array.stride * regs.vertex_buffer.count}; + const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; + res_cache.FlushRegion(data_addr, data_size, nullptr); + std::memcpy(array_ptr, Memory::GetPointer(data_addr), data_size); + + array_ptr += data_size; + buffer_offset += data_size; } void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { From d732142b667a650e7418ff5d6c985fa333e04c38 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 22:21:14 -0400 Subject: [PATCH 14/29] memory: Add RasterizerMarkRegionCached code and cleanup. --- src/core/memory.cpp | 408 +++++++++++++++++++++----------------------- src/core/memory.h | 5 + 2 files changed, 204 insertions(+), 209 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index d6469dd3d..47c4828f7 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -15,6 +15,7 @@ #include "core/core.h" #include "core/hle/kernel/memory.h" #include "core/hle/kernel/process.h" +#include "core/hle/lock.h" #include "core/memory.h" #include "core/memory_setup.h" #include "video_core/renderer_base.h" @@ -115,91 +116,120 @@ static std::set GetSpecialHandlers(VAddr vaddr, u64 size) { return GetSpecialHandlers(page_table, vaddr, size); } -template -boost::optional ReadSpecial(VAddr addr); +/** + * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned) + * using a VMA from the current process + */ +static u8* GetPointerFromVMA(const Kernel::Process& process, VAddr vaddr) { + u8* direct_pointer = nullptr; + + auto& vm_manager = process.vm_manager; + + auto it = vm_manager.FindVMA(vaddr); + ASSERT(it != vm_manager.vma_map.end()); + + auto& vma = it->second; + switch (vma.type) { + case Kernel::VMAType::AllocatedMemoryBlock: + direct_pointer = vma.backing_block->data() + vma.offset; + break; + case Kernel::VMAType::BackingMemory: + direct_pointer = vma.backing_memory; + break; + case Kernel::VMAType::Free: + return nullptr; + default: + UNREACHABLE(); + } + + return direct_pointer + (vaddr - vma.base); +} + +/** + * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned) + * using a VMA from the current process. + */ +static u8* GetPointerFromVMA(VAddr vaddr) { + return GetPointerFromVMA(*Core::CurrentProcess(), vaddr); +} template T Read(const VAddr vaddr) { - if ((vaddr >> PAGE_BITS) >= PAGE_TABLE_NUM_ENTRIES) { - LOG_ERROR(HW_Memory, "Read%lu after page table @ 0x%016" PRIX64, sizeof(T) * 8, vaddr); - return 0; - } - - const PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; - switch (type) { - case PageType::Unmapped: - LOG_ERROR(HW_Memory, "unmapped Read%zu @ 0x%016" PRIX64, sizeof(T) * 8, vaddr); - return 0; - case PageType::Special: { - if (auto result = ReadSpecial(vaddr)) - return *result; - [[fallthrough]]; - } - case PageType::Memory: { - const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; - ASSERT_MSG(page_pointer, "Mapped memory page without a pointer @ %016" PRIX64, vaddr); - + const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; + if (page_pointer) { + // NOTE: Avoid adding any extra logic to this fast-path block T value; std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); return value; } + + // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state + std::lock_guard lock(HLE::g_hle_lock); + + PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; + switch (type) { + case PageType::Unmapped: + LOG_ERROR(HW_Memory, "unmapped Read%lu @ 0x%08X", sizeof(T) * 8, vaddr); + return 0; + case PageType::Memory: + ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); + break; + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush); + + T value; + std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T)); + return value; + } + default: + UNREACHABLE(); } - UNREACHABLE(); - return 0; } -template -bool WriteSpecial(VAddr addr, const T data); - template void Write(const VAddr vaddr, const T data) { - if ((vaddr >> PAGE_BITS) >= PAGE_TABLE_NUM_ENTRIES) { - LOG_ERROR(HW_Memory, "Write%lu after page table 0x%08X @ 0x%016" PRIX64, sizeof(data) * 8, - (u32)data, vaddr); - return; - } - - const PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; - switch (type) { - case PageType::Unmapped: - LOG_ERROR(HW_Memory, "unmapped Write%zu 0x%08X @ 0x%016" PRIX64, sizeof(data) * 8, - static_cast(data), vaddr); - return; - case PageType::Special: { - if (WriteSpecial(vaddr, data)) - return; - [[fallthrough]]; - } - case PageType::Memory: { - u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; - ASSERT_MSG(page_pointer, "Mapped memory page without a pointer @ %016" PRIX64, vaddr); + u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; + if (page_pointer) { + // NOTE: Avoid adding any extra logic to this fast-path block std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); return; } + + // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state + std::lock_guard lock(HLE::g_hle_lock); + + PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; + switch (type) { + case PageType::Unmapped: + LOG_ERROR(HW_Memory, "unmapped Write%lu 0x%08X @ 0x%08X", sizeof(data) * 8, (u32)data, + vaddr); + return; + case PageType::Memory: + ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); + break; + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); + std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); + break; + } + default: + UNREACHABLE(); } - UNREACHABLE(); } bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) { auto& page_table = process.vm_manager.page_table; - if ((vaddr >> PAGE_BITS) >= PAGE_TABLE_NUM_ENTRIES) + const u8* page_pointer = page_table.pointers[vaddr >> PAGE_BITS]; + if (page_pointer) + return true; + + if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) + return true; + + if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special) return false; - const PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; - switch (type) { - case PageType::Unmapped: - return false; - case PageType::Memory: - return true; - case PageType::Special: { - for (auto handler : GetSpecialHandlers(page_table, vaddr, 1)) - if (auto result = handler->IsValidAddress(vaddr)) - return *result; - return current_page_table->pointers[vaddr >> PAGE_BITS] != nullptr; - } - } - UNREACHABLE(); return false; } @@ -217,7 +247,11 @@ u8* GetPointer(const VAddr vaddr) { return page_pointer + (vaddr & PAGE_MASK); } - LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%016" PRIx64, vaddr); + if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) { + return GetPointerFromVMA(vaddr); + } + + LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr); return nullptr; } @@ -291,6 +325,58 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } +void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) { + if (start == 0) { + return; + } + + u64 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1; + VAddr vaddr = start; + + for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { + PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + + if (cached) { + // Switch page type to cached if now cached + switch (page_type) { + case PageType::Unmapped: + // It is not necessary for a process to have this region mapped into its address + // space, for example, a system module need not have a VRAM mapping. + break; + case PageType::Memory: + page_type = PageType::RasterizerCachedMemory; + current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; + break; + default: + UNREACHABLE(); + } + } else { + // Switch page type to uncached if now uncached + switch (page_type) { + case PageType::Unmapped: + // It is not necessary for a process to have this region mapped into its address + // space, for example, a system module need not have a VRAM mapping. + break; + case PageType::RasterizerCachedMemory: { + u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); + if (pointer == nullptr) { + // It's possible that this function has been called while updating the pagetable + // after unmapping a VMA. In that case the underlying VMA will no longer exist, + // and we should just leave the pagetable entry blank. + page_type = PageType::Unmapped; + } else { + page_type = PageType::Memory; + current_page_table->pointers[vaddr >> PAGE_BITS] = pointer; + } + break; + } + default: + UNREACHABLE(); + } + } + } +} + void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here @@ -344,17 +430,6 @@ u64 Read64(const VAddr addr) { return Read(addr); } -static bool ReadSpecialBlock(const Kernel::Process& process, const VAddr src_addr, - void* dest_buffer, const size_t size) { - auto& page_table = process.vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, src_addr, size)) { - if (handler->ReadBlock(src_addr, dest_buffer, size)) { - return true; - } - } - return false; -} - void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, const size_t size) { auto& page_table = process.vm_manager.page_table; @@ -364,21 +439,15 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_ size_t page_offset = src_addr & PAGE_MASK; while (remaining_size > 0) { - const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); const VAddr current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); switch (page_table.attributes[page_index]) { - case PageType::Unmapped: - LOG_ERROR(HW_Memory, - "unmapped ReadBlock @ 0x%016" PRIX64 " (start address = 0x%" PRIx64 - ", size = %zu)", + case PageType::Unmapped: { + LOG_ERROR(HW_Memory, "unmapped ReadBlock @ 0x%08X (start address = 0x%08X, size = %zu)", current_vaddr, src_addr, size); std::memset(dest_buffer, 0, copy_amount); break; - case PageType::Special: { - if (ReadSpecialBlock(process, current_vaddr, dest_buffer, copy_amount)) - break; - [[fallthrough]]; } case PageType::Memory: { DEBUG_ASSERT(page_table.pointers[page_index]); @@ -387,6 +456,12 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_ std::memcpy(dest_buffer, src_ptr, copy_amount); break; } + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), + FlushMode::Flush); + std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount); + break; + } default: UNREACHABLE(); } @@ -418,17 +493,6 @@ void Write64(const VAddr addr, const u64 data) { Write(addr, data); } -static bool WriteSpecialBlock(const Kernel::Process& process, const VAddr dest_addr, - const void* src_buffer, const size_t size) { - auto& page_table = process.vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, dest_addr, size)) { - if (handler->WriteBlock(dest_addr, src_buffer, size)) { - return true; - } - } - return false; -} - void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, const size_t size) { auto& page_table = process.vm_manager.page_table; @@ -437,20 +501,16 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi size_t page_offset = dest_addr & PAGE_MASK; while (remaining_size > 0) { - const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); const VAddr current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); switch (page_table.attributes[page_index]) { - case PageType::Unmapped: + case PageType::Unmapped: { LOG_ERROR(HW_Memory, - "unmapped WriteBlock @ 0x%016" PRIX64 " (start address = 0x%016" PRIX64 - ", size = %zu)", + "unmapped WriteBlock @ 0x%08X (start address = 0x%08X, size = %zu)", current_vaddr, dest_addr, size); break; - case PageType::Special: - if (WriteSpecialBlock(process, current_vaddr, src_buffer, copy_amount)) - break; - [[fallthrough]]; + } case PageType::Memory: { DEBUG_ASSERT(page_table.pointers[page_index]); @@ -458,6 +518,12 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi std::memcpy(dest_ptr, src_buffer, copy_amount); break; } + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), + FlushMode::Invalidate); + std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); + break; + } default: UNREACHABLE(); } @@ -473,9 +539,8 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size WriteBlock(*Core::CurrentProcess(), dest_addr, src_buffer, size); } -void ZeroBlock(const VAddr dest_addr, const size_t size) { - const auto& process = *Core::CurrentProcess(); - +void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size_t size) { + auto& page_table = process.vm_manager.page_table; size_t remaining_size = size; size_t page_index = dest_addr >> PAGE_BITS; size_t page_offset = dest_addr & PAGE_MASK; @@ -483,27 +548,28 @@ void ZeroBlock(const VAddr dest_addr, const size_t size) { static const std::array zeros = {}; while (remaining_size > 0) { - const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); const VAddr current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); - switch (current_page_table->attributes[page_index]) { - case PageType::Unmapped: - LOG_ERROR(HW_Memory, - "unmapped ZeroBlock @ 0x%016" PRIX64 " (start address = 0x%016" PRIX64 - ", size = %zu)", + switch (page_table.attributes[page_index]) { + case PageType::Unmapped: { + LOG_ERROR(HW_Memory, "unmapped ZeroBlock @ 0x%08X (start address = 0x%08X, size = %zu)", current_vaddr, dest_addr, size); break; - case PageType::Special: - if (WriteSpecialBlock(process, current_vaddr, zeros.data(), copy_amount)) - break; - [[fallthrough]]; + } case PageType::Memory: { - DEBUG_ASSERT(current_page_table->pointers[page_index]); + DEBUG_ASSERT(page_table.pointers[page_index]); - u8* dest_ptr = current_page_table->pointers[page_index] + page_offset; + u8* dest_ptr = page_table.pointers[page_index] + page_offset; std::memset(dest_ptr, 0, copy_amount); break; } + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), + FlushMode::Invalidate); + std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); + break; + } default: UNREACHABLE(); } @@ -514,37 +580,33 @@ void ZeroBlock(const VAddr dest_addr, const size_t size) { } } -void CopyBlock(VAddr dest_addr, VAddr src_addr, const size_t size) { - const auto& process = *Core::CurrentProcess(); - +void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr, const size_t size) { + auto& page_table = process.vm_manager.page_table; size_t remaining_size = size; size_t page_index = src_addr >> PAGE_BITS; size_t page_offset = src_addr & PAGE_MASK; while (remaining_size > 0) { - const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); const VAddr current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); - switch (current_page_table->attributes[page_index]) { - case PageType::Unmapped: - LOG_ERROR(HW_Memory, - "unmapped CopyBlock @ 0x%016" PRIX64 " (start address = 0x%016" PRIX64 - ", size = %zu)", + switch (page_table.attributes[page_index]) { + case PageType::Unmapped: { + LOG_ERROR(HW_Memory, "unmapped CopyBlock @ 0x%08X (start address = 0x%08X, size = %zu)", current_vaddr, src_addr, size); - ZeroBlock(dest_addr, copy_amount); + ZeroBlock(process, dest_addr, copy_amount); break; - case PageType::Special: { - std::vector buffer(copy_amount); - if (ReadSpecialBlock(process, current_vaddr, buffer.data(), buffer.size())) { - WriteBlock(dest_addr, buffer.data(), buffer.size()); - break; - } - [[fallthrough]]; } case PageType::Memory: { - DEBUG_ASSERT(current_page_table->pointers[page_index]); - const u8* src_ptr = current_page_table->pointers[page_index] + page_offset; - WriteBlock(dest_addr, src_ptr, copy_amount); + DEBUG_ASSERT(page_table.pointers[page_index]); + const u8* src_ptr = page_table.pointers[page_index] + page_offset; + WriteBlock(process, dest_addr, src_ptr, copy_amount); + break; + } + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(current_vaddr, static_cast(copy_amount), + FlushMode::Flush); + WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount); break; } default: @@ -559,78 +621,6 @@ void CopyBlock(VAddr dest_addr, VAddr src_addr, const size_t size) { } } -template <> -boost::optional ReadSpecial(VAddr addr) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u8))) - if (auto result = handler->Read8(addr)) - return *result; - return {}; -} - -template <> -boost::optional ReadSpecial(VAddr addr) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u16))) - if (auto result = handler->Read16(addr)) - return *result; - return {}; -} - -template <> -boost::optional ReadSpecial(VAddr addr) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u32))) - if (auto result = handler->Read32(addr)) - return *result; - return {}; -} - -template <> -boost::optional ReadSpecial(VAddr addr) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u64))) - if (auto result = handler->Read64(addr)) - return *result; - return {}; -} - -template <> -bool WriteSpecial(VAddr addr, const u8 data) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u8))) - if (handler->Write8(addr, data)) - return true; - return false; -} - -template <> -bool WriteSpecial(VAddr addr, const u16 data) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u16))) - if (handler->Write16(addr, data)) - return true; - return false; -} - -template <> -bool WriteSpecial(VAddr addr, const u32 data) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u32))) - if (handler->Write32(addr, data)) - return true; - return false; -} - -template <> -bool WriteSpecial(VAddr addr, const u64 data) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u64))) - if (handler->Write64(addr, data)) - return true; - return false; -} - boost::optional TryVirtualToPhysicalAddress(const VAddr addr) { if (addr == 0) { return 0; diff --git a/src/core/memory.h b/src/core/memory.h index 4b9c482fe..413a7b4e8 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -254,6 +254,11 @@ enum class FlushMode { FlushAndInvalidate, }; +/** + * Mark each page touching the region as cached. + */ +void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached); + /** * Flushes and invalidates any externally cached rasterizer resources touching the given virtual * address region. From c1ccbf332fdb00151d02907b84106304736fc39a Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 22:24:45 -0400 Subject: [PATCH 15/29] gl_rasterizer_cache: Implement UpdatePagesCachedCount. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 43 ++++++++++++++++--- .../renderer_opengl/gl_rasterizer_cache.h | 2 +- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 42d3730ee..2ffbd3bab 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -250,8 +250,8 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec static bool FillSurface(const Surface& surface, const u8* fill_data, const MathUtil::Rectangle& fill_rect, GLuint draw_fb_handle) { - ASSERT_MSG(false, "Unimplemented"); - return true; + UNREACHABLE(); + return {}; } SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { @@ -490,8 +490,9 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64 void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { ASSERT(type != SurfaceType::Fill); - u8* texture_src_data = Memory::GetPointer(addr); - ASSERT(texture_src_data); + u8* const texture_src_data = Memory::GetPointer(addr); + if (texture_src_data == nullptr) + return; if (gl_buffer == nullptr) { gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); @@ -1056,7 +1057,7 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& } Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return {}; } @@ -1155,7 +1156,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( } Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return {}; } @@ -1399,5 +1400,33 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { } void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { - // ASSERT_MSG(false, "Unimplemented"); + const u64 num_pages = + ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u64 page_start = addr >> Memory::PAGE_BITS; + const u64 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u64 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0e1c481d7..1f660d30c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -41,7 +41,7 @@ static_assert(std::is_same>; using SurfaceSurfaceRect_Tuple = std::tuple>; -using PageMap = boost::icl::interval_map; +using PageMap = boost::icl::interval_map; enum class ScaleMatch { Exact, // only accept same res scale From 7504df52fcc3dd320091a362ecc0df1c93ec8cfd Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 22:25:38 -0400 Subject: [PATCH 16/29] renderer_opengl: Remove framebuffer RasterizerFlushVirtualRegion hack. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 85e91c0e2..221d0a222 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -142,11 +142,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - // TODO(bunnei): The framebuffer region should only be invalidated if it is written to, not - // every frame. When we find the right place for this, the below line can be removed. - Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, - Memory::FlushMode::Invalidate); - // Framebuffer orientation handling framebuffer_transform_flags = framebuffer.transform_flags; From 0a5832798a0ce33a84e74a6f9aedc90278cb05ac Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 22:38:08 -0400 Subject: [PATCH 17/29] renderer_opengl: Logging, etc. cleanup. --- .../renderer_opengl/gl_rasterizer.cpp | 35 ++++++++++--------- .../renderer_opengl/gl_shader_decompiler.cpp | 2 +- .../renderer_opengl/gl_shader_gen.cpp | 4 +-- .../renderer_opengl/gl_shader_util.cpp | 20 +++++------ .../renderer_opengl/renderer_opengl.cpp | 4 +-- src/video_core/video_core.cpp | 2 +- 6 files changed, 34 insertions(+), 33 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 41e4ece1e..72481509b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -20,6 +20,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -124,14 +125,14 @@ RasterizerOpenGL::RasterizerOpenGL() { glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); } else { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } accelerate_draw = AccelDraw::Disabled; glEnable(GL_BLEND); - LOG_WARNING(HW_GPU, "Sync fixed function OpenGL state here when ready"); + LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); } RasterizerOpenGL::~RasterizerOpenGL() { @@ -200,12 +201,12 @@ void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_ void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_FS); - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { if (!has_ARB_separate_shader_objects) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return false; } @@ -438,17 +439,17 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { MICROPROFILE_SCOPE(OpenGL_Blits); - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return true; } bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return true; } bool RasterizerOpenGL::AccelerateFill(const void* config) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return true; } @@ -529,14 +530,14 @@ void main() { return; } - LOG_ERROR(HW_GPU, "Emulated shaders are not supported! Using a passthrough shader."); + LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); current_shader = &test_shader; if (has_ARB_separate_shader_objects) { test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); } else { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } state.draw.shader_program = test_shader.shader.handle; @@ -549,33 +550,33 @@ void main() { } void RasterizerOpenGL::SyncClipEnabled() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncClipCoef() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncCullMode() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncDepthScale() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncDepthOffset() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncBlendEnabled() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncBlendFuncs() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncBlendColor() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0e0ef18cc..564ea8f9e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -26,7 +26,7 @@ public: sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} std::string Decompile() { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index f242bce1d..8f3c98800 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -8,12 +8,12 @@ namespace GLShader { std::string GenerateVertexShader(const MaxwellVSConfig& config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } std::string GenerateFragmentShader(const MaxwellFSConfig& config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a3ba16761..a6c6204d5 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -38,8 +38,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", - &vertex_shader_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s", + &vertex_shader_error[0]); } } } @@ -62,8 +62,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling geometry shader:\n%s", - &geometry_shader_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s", + &geometry_shader_error[0]); } } } @@ -86,8 +86,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", - &fragment_shader_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s", + &fragment_shader_error[0]); } } } @@ -128,20 +128,20 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); } } // If the program linking failed at least one of the shaders was probably bad if (result == GL_FALSE) { if (vertex_shader) { - LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); + LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); } if (geometry_shader) { - LOG_ERROR(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); + LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); } if (fragment_shader) { - LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); + LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); } } ASSERT_MSG(result == GL_TRUE, "Shader not linked"); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 221d0a222..82063df72 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -279,7 +279,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, gl_framebuffer_data.resize(texture.width * texture.height * 4); break; default: - UNIMPLEMENTED(); + UNREACHABLE(); } state.texture_units[0].texture_2d = texture.resource.handle; @@ -305,7 +305,7 @@ void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, fl right = texcoords.left; } else { // Other transformations are unsupported - LOG_CRITICAL(HW_GPU, "unsupported framebuffer_transform_flags=%d", + LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags=%d", framebuffer_transform_flags); UNIMPLEMENTED(); } diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 864691baa..289140f31 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -26,7 +26,7 @@ bool Init(EmuWindow* emu_window) { if (g_renderer->Init()) { LOG_DEBUG(Render, "initialized OK"); } else { - LOG_ERROR(Render, "initialization failed !"); + LOG_CRITICAL(Render, "initialization failed !"); return false; } return true; From 1bfc0dc2dbb0caf02dce673e08a66762afee1457 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 22:42:28 -0400 Subject: [PATCH 18/29] gl_rasterizer: Use passthrough shader for SetupVertexShader. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 72481509b..630b46737 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -196,7 +196,8 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VS); - UNIMPLEMENTED(); + LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); } void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { From d89bfec5f55854daa1ff717e4a5f721bfa08a541 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 22:50:21 -0400 Subject: [PATCH 19/29] rasterizer: Rename DrawTriangles to DrawArrays. --- src/video_core/rasterizer_interface.h | 4 ++-- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ++-- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a493e1d60..8239f9aad 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -15,8 +15,8 @@ class RasterizerInterface { public: virtual ~RasterizerInterface() {} - /// Draw the current batch of triangles - virtual void DrawTriangles() = 0; + /// Draw the current batch of vertex arrays + virtual void DrawArrays() = 0; /// Notify rasterizer that the specified Maxwell register has been changed virtual void NotifyMaxwellRegisterChanged(u32 id) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 630b46737..dfc4beb9a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -212,12 +212,12 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { } accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; - DrawTriangles(); + DrawArrays(); return true; } -void RasterizerOpenGL::DrawTriangles() { +void RasterizerOpenGL::DrawArrays() { if (accelerate_draw == AccelDraw::Disabled) return; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 71a5437a2..c889b1aff 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -29,7 +29,7 @@ public: RasterizerOpenGL(); ~RasterizerOpenGL() override; - void DrawTriangles() override; + void DrawArrays() override; void NotifyMaxwellRegisterChanged(u32 id) override; void FlushAll() override; void FlushRegion(VAddr addr, u64 size) override; From 527ce12ce4288e0f190002ac2355b8ed471b7a8d Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 24 Mar 2018 23:22:19 -0400 Subject: [PATCH 20/29] maxwel_to_gl: Fix string formatting in log statements. --- src/video_core/renderer_opengl/maxwell_to_gl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 54859b5a0..003ee2fd9 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -23,7 +23,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_BYTE; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString()); + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str()); UNREACHABLE(); return {}; } @@ -32,7 +32,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_FLOAT; } - LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString()); + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str()); UNREACHABLE(); return {}; } From d4fb8a887cd88f2cbbf34b3f97e595070e231997 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 25 Mar 2018 00:01:13 -0400 Subject: [PATCH 21/29] memory: Fix cast for ReadBlock/WriteBlock/ZeroBlock/CopyBlock. --- src/core/memory.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 47c4828f7..291bf066f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -439,7 +439,8 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_ size_t page_offset = src_addr & PAGE_MASK; while (remaining_size > 0) { - const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = + std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); const VAddr current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); switch (page_table.attributes[page_index]) { @@ -501,7 +502,8 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi size_t page_offset = dest_addr & PAGE_MASK; while (remaining_size > 0) { - const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = + std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); const VAddr current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); switch (page_table.attributes[page_index]) { @@ -548,7 +550,8 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size static const std::array zeros = {}; while (remaining_size > 0) { - const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = + std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); const VAddr current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); switch (page_table.attributes[page_index]) { @@ -587,7 +590,8 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr, size_t page_offset = src_addr & PAGE_MASK; while (remaining_size > 0) { - const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = + std::min(static_cast(PAGE_SIZE) - page_offset, remaining_size); const VAddr current_vaddr = static_cast((page_index << PAGE_BITS) + page_offset); switch (page_table.attributes[page_index]) { From a6cab532f89ada46988e391708eb06b3be5ade19 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 25 Mar 2018 00:09:53 -0400 Subject: [PATCH 22/29] gl_rasterizer: Normalize vertex array data as appropriate. --- src/video_core/engines/maxwell_3d.h | 4 ++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 9d63fa803..0e1ae5912 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -222,6 +222,10 @@ public: UNREACHABLE(); return {}; } + + bool IsNormalized() const { + return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); + } }; enum class PrimitiveTopology : u32 { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dfc4beb9a..7950bac9e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -178,7 +178,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), - GL_FALSE, vertex_array.stride, + attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, reinterpret_cast(buffer_offset + attrib.offset)); glEnableVertexAttribArray(index); hw_vao_enabled_attributes[index] = true; From ac19e3d06193d5035694bb9918c705d5eb6762db Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 25 Mar 2018 01:00:41 -0400 Subject: [PATCH 23/29] gl_rasterizer: Use ReadBlock instead of GetPointer for SetupVertexArray. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7950bac9e..c4abbb2cd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -188,7 +188,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { const u32 data_size{vertex_array.stride * regs.vertex_buffer.count}; const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; res_cache.FlushRegion(data_addr, data_size, nullptr); - std::memcpy(array_ptr, Memory::GetPointer(data_addr), data_size); + Memory::ReadBlock(data_addr, array_ptr, data_size); array_ptr += data_size; buffer_offset += data_size; From 666d53299c9cc61d88c0a4ed32cebd7cbbb5b712 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 25 Mar 2018 17:57:53 -0400 Subject: [PATCH 24/29] graphics_surface: Fix merge conflicts. --- src/video_core/gpu.h | 1 + src/yuzu/debugger/graphics/graphics_surface.cpp | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ab24504a6..71a8661b4 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -14,6 +14,7 @@ namespace Tegra { enum class RenderTargetFormat : u32 { + NONE = 0x0, RGBA8_UNORM = 0xD5, }; diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp index 8e6509adc..7ea08c23f 100644 --- a/src/yuzu/debugger/graphics/graphics_surface.cpp +++ b/src/yuzu/debugger/graphics/graphics_surface.cpp @@ -339,9 +339,9 @@ void GraphicsSurfaceWidget::OnUpdate() { static_cast(Source::RenderTarget0)]; surface_address = rt.Address(); - surface_width = rt.horiz; - surface_height = rt.vert; - if (rt.format != 0) { + surface_width = rt.width; + surface_height = rt.height; + if (rt.format != Tegra::RenderTargetFormat::NONE) { surface_format = ConvertToTextureFormat(static_cast(rt.format)); } From 67bc2f5ecd325d8d23d6d3d1ac979c8c78fdd743 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Mar 2018 20:09:01 -0400 Subject: [PATCH 25/29] gl_rasterizer: Move PrimitiveTopology check to MaxwellToGL. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 ++----------- src/video_core/renderer_opengl/maxwell_to_gl.h | 10 ++++++++++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c4abbb2cd..487d37a26 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -326,17 +326,7 @@ void RasterizerOpenGL::DrawArrays() { state.Apply(); // Draw the vertex batch - GLenum primitive_mode; - switch (regs.draw.topology) { - case Maxwell::PrimitiveTopology::TriangleStrip: - primitive_mode = GL_TRIANGLE_STRIP; - break; - default: - UNREACHABLE(); - } - const bool is_indexed = accelerate_draw == AccelDraw::Indexed; - AnalyzeVertexArray(is_indexed); state.draw.vertex_buffer = stream_buffer->GetHandle(); state.Apply(); @@ -384,7 +374,8 @@ void RasterizerOpenGL::DrawArrays() { if (is_indexed) { UNREACHABLE(); } else { - glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count); + glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0, + regs.vertex_buffer.count); } // Disable scissor test diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 003ee2fd9..d847317ac 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -37,4 +37,14 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return {}; } +inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::TriangleStrip: + return GL_TRIANGLE_STRIP; + } + LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology); + UNREACHABLE(); + return {}; +} + } // namespace MaxwellToGL From d30110348b10e1cf9765a5c7cec294a4e076a3af Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Mar 2018 20:45:10 -0400 Subject: [PATCH 26/29] gl_rasterizer: Add a SyncViewport method. --- src/video_core/engines/maxwell_3d.h | 10 ++++++ .../renderer_opengl/gl_rasterizer.cpp | 35 +++++++++---------- .../renderer_opengl/gl_rasterizer.h | 3 ++ 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0e1ae5912..3066bc606 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -11,6 +11,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/math_util.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/textures/texture.h" @@ -281,6 +282,15 @@ public: }; float depth_range_near; float depth_range_far; + + MathUtil::Rectangle GetRect() const { + return { + static_cast(x), // left + static_cast(y + height), // top + static_cast(x + width), // right + static_cast(y) // bottom + }; + }; } viewport[NumViewports]; INSERT_PADDING_WORDS(0x1D); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 487d37a26..d83c38cf8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -228,13 +228,7 @@ void RasterizerOpenGL::DrawArrays() { const bool has_stencil = false; const bool using_color_fb = true; const bool using_depth_fb = false; - - MathUtil::Rectangle viewport_rect_unscaled{ - static_cast(regs.viewport[0].x), // left - static_cast(regs.viewport[0].y + regs.viewport[0].height), // top - static_cast(regs.viewport[0].x + regs.viewport[0].width), // right - static_cast(regs.viewport[0].y) // bottom - }; + const MathUtil::Rectangle viewport_rect{regs.viewport[0].GetRect()}; const bool write_color_fb = state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || @@ -248,7 +242,7 @@ void RasterizerOpenGL::DrawArrays() { Surface depth_surface; MathUtil::Rectangle surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); const u16 res_scale = color_surface != nullptr ? color_surface->res_scale @@ -256,16 +250,16 @@ void RasterizerOpenGL::DrawArrays() { MathUtil::Rectangle draw_rect{ static_cast(MathUtil::Clamp(static_cast(surfaces_rect.left) + - viewport_rect_unscaled.left * res_scale, + viewport_rect.left * res_scale, surfaces_rect.left, surfaces_rect.right)), // Left static_cast(MathUtil::Clamp(static_cast(surfaces_rect.bottom) + - viewport_rect_unscaled.top * res_scale, + viewport_rect.top * res_scale, surfaces_rect.bottom, surfaces_rect.top)), // Top static_cast(MathUtil::Clamp(static_cast(surfaces_rect.left) + - viewport_rect_unscaled.right * res_scale, + viewport_rect.right * res_scale, surfaces_rect.left, surfaces_rect.right)), // Right static_cast(MathUtil::Clamp(static_cast(surfaces_rect.bottom) + - viewport_rect_unscaled.bottom * res_scale, + viewport_rect.bottom * res_scale, surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces @@ -293,12 +287,7 @@ void RasterizerOpenGL::DrawArrays() { } // Sync the viewport - state.viewport.x = - static_cast(surfaces_rect.left) + viewport_rect_unscaled.left * res_scale; - state.viewport.y = - static_cast(surfaces_rect.bottom) + viewport_rect_unscaled.bottom * res_scale; - state.viewport.width = static_cast(viewport_rect_unscaled.GetWidth() * res_scale); - state.viewport.height = static_cast(viewport_rect_unscaled.GetHeight() * res_scale); + SyncViewport(surfaces_rect, res_scale); // TODO(bunnei): Sync framebuffer_scale uniform here // TODO(bunnei): Sync scissorbox uniform(s) here @@ -541,6 +530,16 @@ void main() { } } +void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle& surfaces_rect, u16 res_scale) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const MathUtil::Rectangle viewport_rect{regs.viewport[0].GetRect()}; + + state.viewport.x = static_cast(surfaces_rect.left) + viewport_rect.left * res_scale; + state.viewport.y = static_cast(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; + state.viewport.width = static_cast(viewport_rect.GetWidth() * res_scale); + state.viewport.height = static_cast(viewport_rect.GetHeight() * res_scale); +} + void RasterizerOpenGL::SyncClipEnabled() { UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index c889b1aff..1cd46c96a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -87,6 +87,9 @@ public: private: struct SamplerInfo {}; + /// Syncs the viewport to match the guest state + void SyncViewport(const MathUtil::Rectangle& surfaces_rect, u16 res_scale); + /// Syncs the clip enabled status to match the guest state void SyncClipEnabled(); From c33abac275a356a9b6c7a6c8d4214e1f8cb1a80c Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Mar 2018 20:58:19 -0400 Subject: [PATCH 27/29] gl_rasterizer: Move code to bind framebuffer surfaces before draw to its own function. --- .../renderer_opengl/gl_rasterizer.cpp | 49 ++++++++++--------- .../renderer_opengl/gl_rasterizer.h | 4 ++ 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d83c38cf8..911890f16 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -263,28 +263,7 @@ void RasterizerOpenGL::DrawArrays() { surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces - state.draw.draw_framebuffer = framebuffer.handle; - state.Apply(); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->texture.handle : 0, 0); - if (depth_surface != nullptr) { - if (has_stencil) { - // attach both depth and stencil - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); - } else { - // attach depth - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); - // clear stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - } - } else { - // clear both depth and stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - } + BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); // Sync the viewport SyncViewport(surfaces_rect, res_scale); @@ -530,6 +509,32 @@ void main() { } } +void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, + const Surface& depth_surface, bool has_stencil) { + state.draw.draw_framebuffer = framebuffer.handle; + state.Apply(); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + color_surface != nullptr ? color_surface->texture.handle : 0, 0); + if (depth_surface != nullptr) { + if (has_stencil) { + // attach both depth and stencil + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + } else { + // attach depth + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + // clear stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } + } else { + // clear both depth and stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } +} + void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle& surfaces_rect, u16 res_scale) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; const MathUtil::Rectangle viewport_rect{regs.viewport[0].GetRect()}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1cd46c96a..fd53e94cd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -87,6 +87,10 @@ public: private: struct SamplerInfo {}; + /// Binds the framebuffer color and depth surface + void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, + bool has_stencil); + /// Syncs the viewport to match the guest state void SyncViewport(const MathUtil::Rectangle& surfaces_rect, u16 res_scale); From d8f745382b64d6adefec666489ab008090842a9d Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Mar 2018 21:02:31 -0400 Subject: [PATCH 28/29] graphics_surface: Remove superfluous cast. --- src/yuzu/debugger/graphics/graphics_surface.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp index 7ea08c23f..1e4844b57 100644 --- a/src/yuzu/debugger/graphics/graphics_surface.cpp +++ b/src/yuzu/debugger/graphics/graphics_surface.cpp @@ -342,8 +342,7 @@ void GraphicsSurfaceWidget::OnUpdate() { surface_width = rt.width; surface_height = rt.height; if (rt.format != Tegra::RenderTargetFormat::NONE) { - surface_format = - ConvertToTextureFormat(static_cast(rt.format)); + surface_format = ConvertToTextureFormat(rt.format); } break; From 5e343edc9e4606ebdf2cceef7a56336e0a92f69c Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 26 Mar 2018 21:06:37 -0400 Subject: [PATCH 29/29] renderer_opengl: Use better naming for DrawScreens and DrawSingleScreen. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 12 ++++++------ src/video_core/renderer_opengl/renderer_opengl.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 82063df72..78b50b227 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -119,7 +119,7 @@ void RendererOpenGL::SwapBuffers(boost::optionalSwapBuffers(); } @@ -293,8 +293,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, state.Apply(); } -void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, - float h) { +void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, + float h) { const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; auto right = texcoords.right; @@ -330,7 +330,7 @@ void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, fl /** * Draws the emulated screens to the emulator window. */ -void RendererOpenGL::DrawScreens() { +void RendererOpenGL::DrawScreen() { const auto& layout = render_window->GetFramebufferLayout(); const auto& screen = layout.screen; @@ -346,8 +346,8 @@ void RendererOpenGL::DrawScreens() { glActiveTexture(GL_TEXTURE0); glUniform1i(uniform_color_texture, 0); - DrawSingleScreen(screen_info, (float)screen.left, (float)screen.top, (float)screen.GetWidth(), - (float)screen.GetHeight()); + DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top, + (float)screen.GetWidth(), (float)screen.GetHeight()); m_current_frame++; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 29516baf4..fffd0f9f4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -55,8 +55,8 @@ private: void InitOpenGLObjects(); void ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer); - void DrawScreens(); - void DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, float h); + void DrawScreen(); + void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); void UpdateFramerate(); // Loads framebuffer from emulated memory into the display information structure