From 8cd0d9c000e2c3cb072ca001db13f1c12f2a07ea Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 18:37:14 +0100 Subject: [PATCH 01/34] citra-qt: static-constify a map. --- src/citra_qt/debugger/graphics_breakpoints.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp index 53394b6e6..469c3e268 100644 --- a/src/citra_qt/debugger/graphics_breakpoints.cpp +++ b/src/citra_qt/debugger/graphics_breakpoints.cpp @@ -39,15 +39,16 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const switch (index.column()) { case 0: { - std::map map; - map.insert({Pica::DebugContext::Event::CommandLoaded, tr("Pica command loaded")}); - map.insert({Pica::DebugContext::Event::CommandProcessed, tr("Pica command processed")}); - map.insert({Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch")}); - map.insert({Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch")}); + static const std::map map = { + { Pica::DebugContext::Event::CommandLoaded, tr("Pica command loaded") }, + { Pica::DebugContext::Event::CommandProcessed, tr("Pica command processed") }, + { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") }, + { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") }, + }; _dbg_assert_(Debug_GPU, map.size() == static_cast(Pica::DebugContext::Event::NumEvents)); - return map[event]; + return (map.find(event) != map.end()) ? map.at(event) : QString(); } case 1: From 95be6a09b2d93844f3f71396acc40175fd19332c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 16 Dec 2014 01:18:56 +0100 Subject: [PATCH 02/34] BitField: Add an explicit Assign method. This is useful when doing crazy stuff like inheriting from BitField. --- src/common/bit_field.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 9e02210f9..3ec061e63 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -142,7 +142,7 @@ public: __forceinline BitField& operator=(T val) { - storage = (storage & ~GetMask()) | (((StorageType)val << position) & GetMask()); + Assign(val); return *this; } @@ -151,6 +151,10 @@ public: return Value(); } + __forceinline void Assign(const T& value) { + storage = (storage & ~GetMask()) | (((StorageType)value << position) & GetMask()); + } + __forceinline T Value() const { if (std::numeric_limits::is_signed) From fd2539121cddd6177a964770a6985f8880ca1646 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 6 Dec 2014 19:10:08 +0100 Subject: [PATCH 03/34] Pica: Initial support for multitexturing. --- src/citra_qt/debugger/graphics_cmdlists.cpp | 39 +++++++++++--- src/video_core/pica.h | 38 +++++++++++-- src/video_core/rasterizer.cpp | 60 ++++++++++++++------- src/video_core/vertex_shader.h | 9 +++- 4 files changed, 115 insertions(+), 31 deletions(-) diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index 7f97cf143..bdd676470 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp @@ -223,9 +223,21 @@ void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace& void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt(); - if (COMMAND_IN_RANGE(command_id, texture0)) { - auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(Pica::registers.texture0, - Pica::registers.texture0_format); + if (COMMAND_IN_RANGE(command_id, texture0) || + COMMAND_IN_RANGE(command_id, texture1) || + COMMAND_IN_RANGE(command_id, texture2)) { + + unsigned index; + if (COMMAND_IN_RANGE(command_id, texture0)) { + index = 0; + } else if (COMMAND_IN_RANGE(command_id, texture1)) { + index = 1; + } else { + index = 2; + } + auto config = Pica::registers.GetTextures()[index].config; + auto format = Pica::registers.GetTextures()[index].format; + auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); // TODO: Instead, emit a signal here to be caught by the main window widget. auto main_window = static_cast(parent()); @@ -237,10 +249,23 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { QWidget* new_info_widget; const int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toInt(); - if (COMMAND_IN_RANGE(command_id, texture0)) { - u8* src = Memory::GetPointer(Pica::registers.texture0.GetPhysicalAddress()); - auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(Pica::registers.texture0, - Pica::registers.texture0_format); + if (COMMAND_IN_RANGE(command_id, texture0) || + COMMAND_IN_RANGE(command_id, texture1) || + COMMAND_IN_RANGE(command_id, texture2)) { + + unsigned index; + if (COMMAND_IN_RANGE(command_id, texture0)) { + index = 0; + } else if (COMMAND_IN_RANGE(command_id, texture1)) { + index = 1; + } else { + index = 2; + } + auto config = Pica::registers.GetTextures()[index].config; + auto format = Pica::registers.GetTextures()[index].format; + + auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); + u8* src = Memory::GetPointer(config.GetPhysicalAddress()); new_info_widget = new TextureInfoWidget(src, info); } else { new_info_widget = new QWidget; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 4c3791ad9..92a87c086 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -155,12 +155,34 @@ struct Regs { } } - BitField< 0, 1, u32> texturing_enable; + union { + BitField< 0, 1, u32> texture0_enable; + BitField< 1, 1, u32> texture1_enable; + BitField< 2, 1, u32> texture2_enable; + }; TextureConfig texture0; INSERT_PADDING_WORDS(0x8); BitField<0, 4, TextureFormat> texture0_format; + INSERT_PADDING_WORDS(0x2); + TextureConfig texture1; + BitField<0, 4, TextureFormat> texture1_format; + INSERT_PADDING_WORDS(0x2); + TextureConfig texture2; + BitField<0, 4, TextureFormat> texture2_format; + INSERT_PADDING_WORDS(0x21); - INSERT_PADDING_WORDS(0x31); + struct FullTextureConfig { + const bool enabled; + const TextureConfig config; + const TextureFormat format; + }; + const std::array GetTextures() const { + return {{ + { static_cast(texture0_enable), texture0, texture0_format }, + { static_cast(texture1_enable), texture1, texture1_format }, + { static_cast(texture2_enable), texture2, texture2_format } + }}; + } // 0xc0-0xff: Texture Combiner (akin to glTexEnv) struct TevStageConfig { @@ -556,9 +578,13 @@ struct Regs { ADD_FIELD(viewport_depth_range); ADD_FIELD(viewport_depth_far_plane); ADD_FIELD(viewport_corner); - ADD_FIELD(texturing_enable); + ADD_FIELD(texture0_enable); ADD_FIELD(texture0); ADD_FIELD(texture0_format); + ADD_FIELD(texture1); + ADD_FIELD(texture1_format); + ADD_FIELD(texture2); + ADD_FIELD(texture2_format); ADD_FIELD(tev_stage0); ADD_FIELD(tev_stage1); ADD_FIELD(tev_stage2); @@ -622,9 +648,13 @@ ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(viewport_corner, 0x68); -ASSERT_REG_POSITION(texturing_enable, 0x80); +ASSERT_REG_POSITION(texture0_enable, 0x80); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); +ASSERT_REG_POSITION(texture1, 0x91); +ASSERT_REG_POSITION(texture1_format, 0x96); +ASSERT_REG_POSITION(texture2, 0x99); +ASSERT_REG_POSITION(texture2_format, 0x9e); ASSERT_REG_POSITION(tev_stage0, 0xc0); ASSERT_REG_POSITION(tev_stage1, 0xc8); ASSERT_REG_POSITION(tev_stage2, 0xd0); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index b7e04a560..2ff6d19a6 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -167,10 +167,22 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) }; - Math::Vec4 texture_color{}; - float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); - float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); - if (registers.texturing_enable) { + Math::Vec2 uv[3]; + uv[0].u() = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); + uv[0].v() = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); + uv[1].u() = GetInterpolatedAttribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u()); + uv[1].v() = GetInterpolatedAttribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v()); + uv[2].u() = GetInterpolatedAttribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u()); + uv[2].v() = GetInterpolatedAttribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v()); + + Math::Vec4 texture_color[3]{}; + for (int i = 0; i < 3; ++i) { + auto texture = registers.GetTextures()[i]; + if (!texture.enabled) + continue; + + _dbg_assert_(GPU, 0 != texture.config.address); + // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each // of which is composed of four 2x2 subtiles each of which is composed of four texels. // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. @@ -189,14 +201,11 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, // 02 03 06 07 18 19 22 23 // 00 01 04 05 16 17 20 21 - // TODO: This is currently hardcoded for RGB8 - u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); - // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. // To be flexible in case different but similar patterns are used, we keep this // somewhat inefficient code around for now. - int s = (int)(u * float24::FromFloat32(static_cast(registers.texture0.width))).ToFloat32(); - int t = (int)(v * float24::FromFloat32(static_cast(registers.texture0.height))).ToFloat32(); + int s = (int)(uv[i].u() * float24::FromFloat32(static_cast(texture.config.width))).ToFloat32(); + int t = (int)(uv[i].v() * float24::FromFloat32(static_cast(texture.config.height))).ToFloat32(); int texel_index_within_tile = 0; for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { int sub_tile_width = 1 << block_size_index; @@ -213,14 +222,17 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, int coarse_s = (s / block_width) * block_width; int coarse_t = (t / block_height) * block_height; - const int row_stride = registers.texture0.width * 3; - u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3; - texture_color.r() = source_ptr[2]; - texture_color.g() = source_ptr[1]; - texture_color.b() = source_ptr[0]; - texture_color.a() = 0xFF; + // TODO: This is currently hardcoded for RGB8 + u32* texture_data = (u32*)Memory::GetPointer(texture.config.GetPhysicalAddress()); - DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data); + const int row_stride = texture.config.width * 3; + u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3; + texture_color[i].r() = source_ptr[2]; + texture_color[i].g() = source_ptr[1]; + texture_color[i].b() = source_ptr[0]; + texture_color[i].a() = 0xFF; + + DebugUtils::DumpTexture(texture.config, (u8*)texture_data); } // Texture environment - consists of 6 stages of color and alpha combining. @@ -243,7 +255,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, return primary_color.rgb(); case Source::Texture0: - return texture_color.rgb(); + return texture_color[0].rgb(); + + case Source::Texture1: + return texture_color[1].rgb(); + + case Source::Texture2: + return texture_color[2].rgb(); case Source::Constant: return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; @@ -263,7 +281,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, return primary_color.a(); case Source::Texture0: - return texture_color.a(); + return texture_color[0].a(); + + case Source::Texture1: + return texture_color[1].a(); + + case Source::Texture2: + return texture_color[2].a(); case Source::Constant: return tev_stage.const_a; diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index bfb6fb6e3..c1292fc2d 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -27,15 +27,18 @@ struct OutputVertex { Math::Vec4 dummy; // quaternions (not implemented, yet) Math::Vec4 color; Math::Vec2 tc0; + Math::Vec2 tc1; + float24 pad[6]; + Math::Vec2 tc2; // Padding for optimal alignment - float24 pad[14]; + float24 pad2[4]; // Attributes used to store intermediate results // position after perspective divide Math::Vec3 screenpos; - float24 pad2; + float24 pad3; // Linear interpolation // factor: 0=this, 1=vtx @@ -44,6 +47,8 @@ struct OutputVertex { // TODO: Should perform perspective correct interpolation here... tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); + tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); + tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); From 782592e6d393f4e38db5db58daba3f7fbf1786b4 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 15 Dec 2014 22:09:48 +0100 Subject: [PATCH 04/34] citra-qt: Fix invalid memory read upon program startup. This was caused by the framebuffer display widget not checking whether we are actually in a valid emulation state or not. --- src/citra_qt/debugger/graphics_framebuffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index 61b61ef6d..c055299a4 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp @@ -125,7 +125,8 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptrat_breakpoint) + emit Update(); widget()->setEnabled(false); // TODO: Only enable if currently at breakpoint } From c81f1a9ebc9a5f9df9add64e282d9a0c0da96e79 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 6 Dec 2014 21:20:56 +0100 Subject: [PATCH 05/34] Pica/DebugUtils: Add support for RGBA8, RGBA5551, RGBA4 and A8 texture formats. --- src/video_core/debug_utils/debug_utils.cpp | 49 ++++++++++++++++++++-- src/video_core/pica.h | 2 + 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 1a20f19ec..89bf08b99 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -357,7 +357,6 @@ std::unique_ptr FinishPicaTracing() } const Math::Vec4 LookupTexture(const u8* source, int x, int y, const TextureInfo& info) { - _dbg_assert_(Debug_GPU, info.format == Pica::Regs::TextureFormat::RGB8); // Cf. rasterizer code for an explanation of this algorithm. int texel_index_within_tile = 0; @@ -376,8 +375,52 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture int coarse_x = (x / block_width) * block_width; int coarse_y = (y / block_height) * block_height; - const u8* source_ptr = source + coarse_x * block_height * 3 + coarse_y * info.stride + texel_index_within_tile * 3; - return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; + switch (info.format) { + case Regs::TextureFormat::RGBA8: + { + const u8* source_ptr = source + coarse_x * block_height * 4 + coarse_y * info.stride + texel_index_within_tile * 4; + return { source_ptr[3], source_ptr[2], source_ptr[1], 255 }; + } + + case Regs::TextureFormat::RGB8: + { + const u8* source_ptr = source + coarse_x * block_height * 3 + coarse_y * info.stride + texel_index_within_tile * 3; + return { source_ptr[2], source_ptr[1], source_ptr[0], 255 }; + } + + case Regs::TextureFormat::RGBA5551: + { + const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2); + u8 r = (source_ptr >> 11) & 0x1F; + u8 g = ((source_ptr) >> 6) & 0x1F; + u8 b = (source_ptr >> 1) & 0x1F; + u8 a = 1; + return Math::MakeVec((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), a * 255); + } + + case Regs::TextureFormat::RGBA4: + { + const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2; + u8 r = source_ptr[1] >> 4; + u8 g = source_ptr[1] & 0xFF; + u8 b = source_ptr[0] >> 4; + r = (r << 4) | r; + g = (g << 4) | g; + b = (b << 4) | b; + return { r, g, b, 255 }; + } + + case Regs::TextureFormat::A8: + { + const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; + return { *source_ptr, *source_ptr, *source_ptr, 255 }; + } + + default: + LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format); + _dbg_assert_(HW_GPU, 0); + return {}; + } } TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 92a87c086..7ed49caed 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -132,6 +132,8 @@ struct Regs { RGB565 = 3, RGBA4 = 4, + A8 = 8, + // TODO: Support for the other formats is not implemented, yet. // Seems like they are luminance formats and compressed textures. }; From 0fba1d48a6ab7a9fa19ce65ec864da212ceb501a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 4 Dec 2014 17:37:59 +0100 Subject: [PATCH 06/34] Pica: Implement texture wrapping. --- src/video_core/pica.h | 12 +++++++++++- src/video_core/rasterizer.cpp | 21 ++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 7ed49caed..ec20114fe 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -104,6 +104,11 @@ struct Regs { INSERT_PADDING_WORDS(0x17); struct TextureConfig { + enum WrapMode : u32 { + ClampToEdge = 0, + Repeat = 2, + }; + INSERT_PADDING_WORDS(0x1); union { @@ -111,7 +116,12 @@ struct Regs { BitField<16, 16, u32> width; }; - INSERT_PADDING_WORDS(0x2); + union { + BitField< 8, 2, WrapMode> wrap_s; + BitField<11, 2, WrapMode> wrap_t; + }; + + INSERT_PADDING_WORDS(0x1); u32 address; diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 2ff6d19a6..e12f68a7f 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -181,7 +181,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, if (!texture.enabled) continue; - _dbg_assert_(GPU, 0 != texture.config.address); + _dbg_assert_(HW_GPU, 0 != texture.config.address); // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each // of which is composed of four 2x2 subtiles each of which is composed of four texels. @@ -206,6 +206,25 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, // somewhat inefficient code around for now. int s = (int)(uv[i].u() * float24::FromFloat32(static_cast(texture.config.width))).ToFloat32(); int t = (int)(uv[i].v() * float24::FromFloat32(static_cast(texture.config.height))).ToFloat32(); + auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { + switch (mode) { + case Regs::TextureConfig::ClampToEdge: + val = std::max(val, 0); + val = std::min(val, (int)size - 1); + return val; + + case Regs::TextureConfig::Repeat: + return (int)(((unsigned)val) % size); + + default: + LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x\n", (int)mode); + _dbg_assert_(HW_GPU, 0); + return 0; + } + }; + s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width); + t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height); + int texel_index_within_tile = 0; for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { int sub_tile_width = 1 << block_size_index; From 3df88d59b0ba43f1c3360cfdaaccd461cacff72c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 6 Dec 2014 21:52:21 +0100 Subject: [PATCH 07/34] Pica: Merge texture lookup logic for DebugUtils and Rasterizer. This effectively adds support for a lot texture formats in the rasterizer. --- src/citra_qt/debugger/graphics_cmdlists.cpp | 2 +- src/video_core/debug_utils/debug_utils.cpp | 42 +++++++++++++++--- src/video_core/debug_utils/debug_utils.h | 3 +- src/video_core/rasterizer.cpp | 49 ++------------------- 4 files changed, 41 insertions(+), 55 deletions(-) diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index bdd676470..01ff31d44 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp @@ -24,7 +24,7 @@ QImage LoadTexture(u8* src, const Pica::DebugUtils::TextureInfo& info) { QImage decoded_image(info.width, info.height, QImage::Format_ARGB32); for (int y = 0; y < info.height; ++y) { for (int x = 0; x < info.width; ++x) { - Math::Vec4 color = Pica::DebugUtils::LookupTexture(src, x, y, info); + Math::Vec4 color = Pica::DebugUtils::LookupTexture(src, x, y, info, true); decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a())); } } diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 89bf08b99..6c26138da 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -356,9 +356,29 @@ std::unique_ptr FinishPicaTracing() return std::move(ret); } -const Math::Vec4 LookupTexture(const u8* source, int x, int y, const TextureInfo& info) { +const Math::Vec4 LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { - // Cf. rasterizer code for an explanation of this algorithm. + // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each + // of which is composed of four 2x2 subtiles each of which is composed of four texels. + // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. + // texels are laid out in a 2x2 subtile like this: + // 2 3 + // 0 1 + // + // The full 8x8 tile has the texels arranged like this: + // + // 42 43 46 47 58 59 62 63 + // 40 41 44 45 56 57 60 61 + // 34 35 38 39 50 51 54 55 + // 32 33 36 37 48 49 52 53 + // 10 11 14 15 26 27 30 31 + // 08 09 12 13 24 25 28 29 + // 02 03 06 07 18 19 22 23 + // 00 01 04 05 16 17 20 21 + + // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. + // To be flexible in case different but similar patterns are used, we keep this + // somewhat inefficient code around for now. int texel_index_within_tile = 0; for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { int sub_tile_width = 1 << block_size_index; @@ -379,7 +399,7 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture case Regs::TextureFormat::RGBA8: { const u8* source_ptr = source + coarse_x * block_height * 4 + coarse_y * info.stride + texel_index_within_tile * 4; - return { source_ptr[3], source_ptr[2], source_ptr[1], 255 }; + return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? 255 : source_ptr[0] }; } case Regs::TextureFormat::RGB8: @@ -394,8 +414,8 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture u8 r = (source_ptr >> 11) & 0x1F; u8 g = ((source_ptr) >> 6) & 0x1F; u8 b = (source_ptr >> 1) & 0x1F; - u8 a = 1; - return Math::MakeVec((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), a * 255); + u8 a = source_ptr & 1; + return Math::MakeVec((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255)); } case Regs::TextureFormat::RGBA4: @@ -404,16 +424,24 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture u8 r = source_ptr[1] >> 4; u8 g = source_ptr[1] & 0xFF; u8 b = source_ptr[0] >> 4; + u8 a = source_ptr[0] & 0xFF; r = (r << 4) | r; g = (g << 4) | g; b = (b << 4) | b; - return { r, g, b, 255 }; + a = (a << 4) | a; + return { r, g, b, disable_alpha ? 255 : a }; } case Regs::TextureFormat::A8: { const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; - return { *source_ptr, *source_ptr, *source_ptr, 255 }; + + // TODO: Better control this... + if (disable_alpha) { + return { *source_ptr, *source_ptr, *source_ptr, 255 }; + } else { + return { 0, 0, 0, *source_ptr }; + } } default: diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 51f14f12f..f950356f3 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -202,7 +202,8 @@ struct TextureInfo { const Pica::Regs::TextureFormat& format); }; -const Math::Vec4 LookupTexture(const u8* source, int x, int y, const TextureInfo& info); +const Math::Vec4 LookupTexture(const u8* source, int x, int y, const TextureInfo& info, + bool disable_alpha = false); void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); void DumpTevStageConfig(const std::array& stages); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index e12f68a7f..aa2bc93ec 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -183,27 +183,6 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, _dbg_assert_(HW_GPU, 0 != texture.config.address); - // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each - // of which is composed of four 2x2 subtiles each of which is composed of four texels. - // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. - // texels are laid out in a 2x2 subtile like this: - // 2 3 - // 0 1 - // - // The full 8x8 tile has the texels arranged like this: - // - // 42 43 46 47 58 59 62 63 - // 40 41 44 45 56 57 60 61 - // 34 35 38 39 50 51 54 55 - // 32 33 36 37 48 49 52 53 - // 10 11 14 15 26 27 30 31 - // 08 09 12 13 24 25 28 29 - // 02 03 06 07 18 19 22 23 - // 00 01 04 05 16 17 20 21 - - // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. - // To be flexible in case different but similar patterns are used, we keep this - // somewhat inefficient code around for now. int s = (int)(uv[i].u() * float24::FromFloat32(static_cast(texture.config.width))).ToFloat32(); int t = (int)(uv[i].v() * float24::FromFloat32(static_cast(texture.config.height))).ToFloat32(); auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { @@ -225,32 +204,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width); t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height); - int texel_index_within_tile = 0; - for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { - int sub_tile_width = 1 << block_size_index; - int sub_tile_height = 1 << block_size_index; - - int sub_tile_index = (s & sub_tile_width) << block_size_index; - sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); - texel_index_within_tile += sub_tile_index; - } - - const int block_width = 8; - const int block_height = 8; - - int coarse_s = (s / block_width) * block_width; - int coarse_t = (t / block_height) * block_height; - - // TODO: This is currently hardcoded for RGB8 - u32* texture_data = (u32*)Memory::GetPointer(texture.config.GetPhysicalAddress()); - - const int row_stride = texture.config.width * 3; - u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3; - texture_color[i].r() = source_ptr[2]; - texture_color[i].g() = source_ptr[1]; - texture_color[i].b() = source_ptr[0]; - texture_color[i].a() = 0xFF; + u8* texture_data = Memory::GetPointer(texture.config.GetPhysicalAddress()); + auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); + texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); DebugUtils::DumpTexture(texture.config, (u8*)texture_data); } From 7e210e0229b9caef77c80fea7c056c3913e68129 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 24 Oct 2014 00:58:04 +0200 Subject: [PATCH 08/34] Pica: Further improve Tev emulation. --- src/video_core/debug_utils/debug_utils.cpp | 10 ++++- src/video_core/pica.h | 1 + src/video_core/rasterizer.cpp | 52 +++++++++++++++++----- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 6c26138da..3cc22f436 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -573,20 +573,26 @@ void DumpTevStageConfig(const std::array& stages) const std::map source_map = { { Source::PrimaryColor, "PrimaryColor" }, { Source::Texture0, "Texture0" }, + { Source::Texture1, "Texture1" }, + { Source::Texture2, "Texture2" }, { Source::Constant, "Constant" }, { Source::Previous, "Previous" }, }; const std::map color_modifier_map = { - { ColorModifier::SourceColor, { "%source.rgb" } } + { ColorModifier::SourceColor, { "%source.rgb" } }, + { ColorModifier::SourceAlpha, { "%source.aaa" } }, }; const std::map alpha_modifier_map = { - { AlphaModifier::SourceAlpha, "%source.a" } + { AlphaModifier::SourceAlpha, "%source.a" }, + { AlphaModifier::OneMinusSourceAlpha, "(255 - %source.a)" }, }; std::map combiner_map = { { Operation::Replace, "%source1" }, { Operation::Modulate, "(%source1 * %source2) / 255" }, + { Operation::Add, "(%source1 + %source2)" }, + { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, }; auto ReplacePattern = diff --git a/src/video_core/pica.h b/src/video_core/pica.h index ec20114fe..5712439e1 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "common/bit_field.h" #include "common/common_types.h" diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index aa2bc93ec..25efd49c8 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -225,28 +225,29 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, using AlphaModifier = Regs::TevStageConfig::AlphaModifier; using Operation = Regs::TevStageConfig::Operation; - auto GetColorSource = [&](Source source) -> Math::Vec3 { + auto GetColorSource = [&](Source source) -> Math::Vec4 { switch (source) { case Source::PrimaryColor: - return primary_color.rgb(); + return primary_color; case Source::Texture0: - return texture_color[0].rgb(); + return texture_color[0]; case Source::Texture1: - return texture_color[1].rgb(); + return texture_color[1]; case Source::Texture2: - return texture_color[2].rgb(); + return texture_color[2]; case Source::Constant: - return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; + return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; case Source::Previous: - return combiner_output.rgb(); + return combiner_output; default: LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); + _dbg_assert_(HW_GPU, 0); return {}; } }; @@ -273,17 +274,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, default: LOG_ERROR(HW_GPU, "Unknown alpha combiner source %d\n", (int)source); + _dbg_assert_(HW_GPU, 0); return 0; } }; - auto GetColorModifier = [](ColorModifier factor, const Math::Vec3& values) -> Math::Vec3 { + auto GetColorModifier = [](ColorModifier factor, const Math::Vec4& values) -> Math::Vec3 { switch (factor) { case ColorModifier::SourceColor: - return values; + return values.rgb(); + + case ColorModifier::SourceAlpha: + return { values.a(), values.a(), values.a() }; + default: LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor); + _dbg_assert_(HW_GPU, 0); return {}; } }; @@ -292,8 +299,13 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, switch (factor) { case AlphaModifier::SourceAlpha: return value; + + case AlphaModifier::OneMinusSourceAlpha: + return 255 - value; + default: - LOG_ERROR(HW_GPU, "Unknown color factor %d\n", (int)factor); + LOG_ERROR(HW_GPU, "Unknown alpha factor %d\n", (int)factor); + _dbg_assert_(HW_GPU, 0); return 0; } }; @@ -306,8 +318,21 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Operation::Modulate: return ((input[0] * input[1]) / 255).Cast(); + case Operation::Add: + { + auto result = input[0] + input[1]; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + return result.Cast(); + } + + case Operation::Lerp: + return ((input[0] * input[2] + input[1] * (Math::MakeVec(255, 255, 255) - input[2]).Cast()) / 255).Cast(); + default: LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); + _dbg_assert_(HW_GPU, 0); return {}; } }; @@ -320,8 +345,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Operation::Modulate: return input[0] * input[1] / 255; + case Operation::Add: + return std::min(255, input[0] + input[1]); + + case Operation::Lerp: + return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; + default: LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); + _dbg_assert_(HW_GPU, 0); return 0; } }; From 40f123b7c0eaf1507d51f6b87192ec2f956e5d5e Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Mon, 15 Dec 2014 21:28:45 +0100 Subject: [PATCH 09/34] Pica: Unify ugly address translation hacks. --- src/citra_qt/debugger/graphics_cmdlists.cpp | 8 +++--- .../debugger/graphics_framebuffer.cpp | 8 +++--- src/video_core/command_processor.cpp | 4 +-- src/video_core/debug_utils/debug_utils.cpp | 2 +- src/video_core/debug_utils/debug_utils.h | 2 +- src/video_core/pica.h | 25 +++++++++++++------ src/video_core/rasterizer.cpp | 8 +++--- 7 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index 01ff31d44..bf35f035f 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp @@ -47,7 +47,7 @@ public: }; TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo& info, QWidget* parent) - : QDockWidget(tr("Texture 0x%1").arg(info.address, 8, 16, QLatin1Char('0'))), + : QDockWidget(tr("Texture 0x%1").arg(info.physical_address, 8, 16, QLatin1Char('0'))), info(info) { QWidget* main_widget = new QWidget; @@ -60,7 +60,7 @@ TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo phys_address_spinbox->SetBase(16); phys_address_spinbox->SetRange(0, 0xFFFFFFFF); phys_address_spinbox->SetPrefix("0x"); - phys_address_spinbox->SetValue(info.address); + phys_address_spinbox->SetValue(info.physical_address); connect(phys_address_spinbox, SIGNAL(ValueChanged(qint64)), this, SLOT(OnAddressChanged(qint64))); QComboBox* format_choice = new QComboBox; @@ -125,7 +125,7 @@ TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo } void TextureInfoDockWidget::OnAddressChanged(qint64 value) { - info.address = value; + info.physical_address = value; emit UpdatePixmap(ReloadPixmap()); } @@ -150,7 +150,7 @@ void TextureInfoDockWidget::OnStrideChanged(int value) { } QPixmap TextureInfoDockWidget::ReloadPixmap() const { - u8* src = Memory::GetPointer(info.address); + u8* src = Memory::GetPointer(Pica::PAddrToVAddr(info.physical_address)); return QPixmap::fromImage(LoadTexture(src, info)); } diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index c055299a4..484be1db5 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp @@ -199,7 +199,7 @@ void GraphicsFramebufferWidget::OnUpdate() auto framebuffer = Pica::registers.framebuffer; using Framebuffer = decltype(framebuffer); - framebuffer_address = framebuffer.GetColorBufferAddress(); + framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); framebuffer_width = framebuffer.GetWidth(); framebuffer_height = framebuffer.GetHeight(); framebuffer_format = static_cast(framebuffer.color_format); @@ -224,7 +224,7 @@ void GraphicsFramebufferWidget::OnUpdate() case Format::RGBA8: { QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); - u32* color_buffer = (u32*)Memory::GetPointer(framebuffer_address); + u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); for (unsigned y = 0; y < framebuffer_height; ++y) { for (unsigned x = 0; x < framebuffer_width; ++x) { u32 value = *(color_buffer + x + y * framebuffer_width); @@ -239,7 +239,7 @@ void GraphicsFramebufferWidget::OnUpdate() case Format::RGB8: { QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); - u8* color_buffer = Memory::GetPointer(framebuffer_address); + u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); for (unsigned y = 0; y < framebuffer_height; ++y) { for (unsigned x = 0; x < framebuffer_width; ++x) { u8* pixel_pointer = color_buffer + x * 3 + y * 3 * framebuffer_width; @@ -254,7 +254,7 @@ void GraphicsFramebufferWidget::OnUpdate() case Format::RGBA5551: { QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32); - u32* color_buffer = (u32*)Memory::GetPointer(framebuffer_address); + u32* color_buffer = (u32*)Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address)); for (unsigned y = 0; y < framebuffer_height; ++y) { for (unsigned x = 0; x < framebuffer_width; ++x) { u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index b74cd3261..3d06ac7e6 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -56,7 +56,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); const auto& attribute_config = registers.vertex_attributes; - const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); + const u8* const base_address = Memory::GetPointer(PAddrToVAddr(attribute_config.GetPhysicalBaseAddress())); // Information about internal vertex attributes const u8* vertex_attribute_sources[16]; @@ -116,7 +116,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { input.attr[i][comp] = float24::FromFloat32(srcval); LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", comp, i, vertex, index, - attribute_config.GetBaseAddress(), + PAddrToVAddr(attribute_config.GetPhysicalBaseAddress()), vertex_attribute_sources[i] - base_address, srcdata - vertex_attribute_sources[i], input.attr[i][comp].ToFloat32()); diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 3cc22f436..08ecd4ccb 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -455,7 +455,7 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, const Regs::TextureFormat& format) { TextureInfo info; - info.address = config.GetPhysicalAddress(); + info.physical_address = config.GetPhysicalAddress(); info.width = config.width; info.height = config.height; info.format = format; diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index f950356f3..2a764e121 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -192,7 +192,7 @@ void OnPicaRegWrite(u32 id, u32 value); std::unique_ptr FinishPicaTracing(); struct TextureInfo { - unsigned int address; + PAddr physical_address; int width; int height; int stride; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 5712439e1..7d82d733d 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -127,7 +127,7 @@ struct Regs { u32 address; u32 GetPhysicalAddress() const { - return DecodeAddressRegister(address) - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR; + return DecodeAddressRegister(address); } // texture1 and texture2 store the texture format directly after the address @@ -317,11 +317,11 @@ struct Regs { INSERT_PADDING_WORDS(0x1); - inline u32 GetColorBufferAddress() const { - return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(color_buffer_address)); + inline u32 GetColorBufferPhysicalAddress() const { + return DecodeAddressRegister(color_buffer_address); } - inline u32 GetDepthBufferAddress() const { - return Memory::PhysicalToVirtualAddress(DecodeAddressRegister(depth_buffer_address)); + inline u32 GetDepthBufferPhysicalAddress() const { + return DecodeAddressRegister(depth_buffer_address); } inline u32 GetWidth() const { @@ -345,9 +345,8 @@ struct Regs { BitField<0, 29, u32> base_address; - inline u32 GetBaseAddress() const { - // TODO: Ugly, should fix PhysicalToVirtualAddress instead - return DecodeAddressRegister(base_address) - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR; + u32 GetPhysicalBaseAddress() const { + return DecodeAddressRegister(base_address); } // Descriptor for internal vertex attributes @@ -779,5 +778,15 @@ union CommandHeader { BitField<31, 1, u32> group_commands; }; +// TODO: Ugly, should fix PhysicalToVirtualAddress instead +inline static u32 PAddrToVAddr(u32 addr) { + if (addr >= Memory::VRAM_PADDR && addr < Memory::VRAM_PADDR + Memory::VRAM_SIZE) { + return addr - Memory::VRAM_PADDR + Memory::VRAM_VADDR; + } else if (addr >= Memory::FCRAM_PADDR && addr < Memory::FCRAM_PADDR + Memory::FCRAM_SIZE) { + return addr - Memory::FCRAM_PADDR + Memory::HEAP_LINEAR_VADDR; + } else { + return 0; + } +} } // namespace diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 25efd49c8..bd79e4413 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -18,7 +18,7 @@ namespace Pica { namespace Rasterizer { static void DrawPixel(int x, int y, const Math::Vec4& color) { - u32* color_buffer = (u32*)Memory::GetPointer(registers.framebuffer.GetColorBufferAddress()); + u32* color_buffer = (u32*)Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())); u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); // Assuming RGBA8 format until actual framebuffer format handling is implemented @@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4& color) { } static u32 GetDepth(int x, int y) { - u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); + u16* depth_buffer = (u16*)Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())); // Assuming 16-bit depth buffer format until actual format handling is implemented return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); } static void SetDepth(int x, int y, u16 value) { - u16* depth_buffer = (u16*)Memory::GetPointer(registers.framebuffer.GetDepthBufferAddress()); + u16* depth_buffer = (u16*)Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())); // Assuming 16-bit depth buffer format until actual format handling is implemented *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; @@ -204,7 +204,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width); t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height); - u8* texture_data = Memory::GetPointer(texture.config.GetPhysicalAddress()); + u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress())); auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); From 1c972ef3b93252a157ec15d0878a2be3e4b46a0e Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 10 Dec 2014 21:51:00 +0100 Subject: [PATCH 10/34] Add support for a ridiculous number of texture formats. --- src/citra_qt/debugger/graphics_cmdlists.cpp | 9 ++- src/video_core/debug_utils/debug_utils.cpp | 65 ++++++++++++++++++++- src/video_core/pica.h | 22 +++++-- 3 files changed, 88 insertions(+), 8 deletions(-) diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index bf35f035f..95187e54d 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp @@ -69,6 +69,13 @@ TextureInfoDockWidget::TextureInfoDockWidget(const Pica::DebugUtils::TextureInfo format_choice->addItem(tr("RGBA5551")); format_choice->addItem(tr("RGB565")); format_choice->addItem(tr("RGBA4")); + format_choice->addItem(tr("IA8")); + format_choice->addItem(tr("UNK6")); + format_choice->addItem(tr("I8")); + format_choice->addItem(tr("A8")); + format_choice->addItem(tr("IA4")); + format_choice->addItem(tr("UNK10")); + format_choice->addItem(tr("A4")); format_choice->setCurrentIndex(static_cast(info.format)); connect(format_choice, SIGNAL(currentIndexChanged(int)), this, SLOT(OnFormatChanged(int))); @@ -265,7 +272,7 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { auto format = Pica::registers.GetTextures()[index].format; auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); - u8* src = Memory::GetPointer(config.GetPhysicalAddress()); + u8* src = Memory::GetPointer(Pica::PAddrToVAddr(config.GetPhysicalAddress())); new_info_widget = new TextureInfoWidget(src, info); } else { new_info_widget = new QWidget; diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 08ecd4ccb..1a7b851d5 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -418,6 +418,15 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture return Math::MakeVec((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255)); } + case Regs::TextureFormat::RGB565: + { + const u16 source_ptr = *(const u16*)(source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2); + u8 r = (source_ptr >> 11) & 0x1F; + u8 g = ((source_ptr) >> 5) & 0x3F; + u8 b = (source_ptr) & 0x1F; + return Math::MakeVec((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255); + } + case Regs::TextureFormat::RGBA4: { const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2; @@ -432,6 +441,26 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture return { r, g, b, disable_alpha ? 255 : a }; } + case Regs::TextureFormat::IA8: + { + const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2; + + // TODO: Better control this... + if (disable_alpha) { + return { *source_ptr, *(source_ptr+1), 0, 255 }; + } else { + return { *source_ptr, *source_ptr, *source_ptr, *(source_ptr+1)}; + } + } + + case Regs::TextureFormat::I8: + { + const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; + + // TODO: Better control this... + return { *source_ptr, *source_ptr, *source_ptr, 255 }; + } + case Regs::TextureFormat::A8: { const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; @@ -444,6 +473,40 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture } } + case Regs::TextureFormat::IA4: + { + const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2; + + // TODO: Order? + u8 i = (*source_ptr)&0xF; + u8 a = ((*source_ptr) & 0xF0) >> 4; + a |= a << 4; + i |= i << 4; + + // TODO: Better control this... + if (disable_alpha) { + return { i, a, 0, 255 }; + } else { + return { i, i, i, a }; + } + } + + case Regs::TextureFormat::A4: + { + const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2; + + // TODO: Order? + u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4); + a |= a << 4; + + // TODO: Better control this... + if (disable_alpha) { + return { *source_ptr, *source_ptr, *source_ptr, 255 }; + } else { + return { 0, 0, 0, *source_ptr }; + } + } + default: LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format); _dbg_assert_(HW_GPU, 0); @@ -459,7 +522,7 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, info.width = config.width; info.height = config.height; info.format = format; - info.stride = Pica::Regs::BytesPerPixel(info.format) * info.width; + info.stride = Pica::Regs::NibblesPerPixel(info.format) * info.width / 2; return info; } diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 7d82d733d..583614328 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -142,29 +142,39 @@ struct Regs { RGBA5551 = 2, RGB565 = 3, RGBA4 = 4, + IA8 = 5, + I8 = 7, A8 = 8, + IA4 = 9, + A4 = 11, // TODO: Support for the other formats is not implemented, yet. // Seems like they are luminance formats and compressed textures. }; - static unsigned BytesPerPixel(TextureFormat format) { + static unsigned NibblesPerPixel(TextureFormat format) { switch (format) { case TextureFormat::RGBA8: - return 4; + return 8; case TextureFormat::RGB8: - return 3; + return 6; case TextureFormat::RGBA5551: case TextureFormat::RGB565: case TextureFormat::RGBA4: - return 2; + case TextureFormat::IA8: + return 4; - default: - // placeholder for yet unknown formats + case TextureFormat::A4: return 1; + + case TextureFormat::I8: + case TextureFormat::A8: + case TextureFormat::IA4: + default: // placeholder for yet unknown formats + return 2; } } From 1e960e9ee280eff2e94873bfc6f888a1ccbc30a4 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 7 Dec 2014 00:25:51 +0100 Subject: [PATCH 11/34] Pica/CommandProcessor: Fix vertex decoding if multiple memory areas are accessed for different attributes. --- src/video_core/command_processor.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 3d06ac7e6..d4559fad6 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -56,10 +56,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); const auto& attribute_config = registers.vertex_attributes; - const u8* const base_address = Memory::GetPointer(PAddrToVAddr(attribute_config.GetPhysicalBaseAddress())); + const u32 base_address = attribute_config.GetPhysicalBaseAddress(); // Information about internal vertex attributes - const u8* vertex_attribute_sources[16]; + u32 vertex_attribute_sources[16]; + std::fill(vertex_attribute_sources, &vertex_attribute_sources[16], 0xdeadbeef); u32 vertex_attribute_strides[16]; u32 vertex_attribute_formats[16]; u32 vertex_attribute_elements[16]; @@ -69,7 +70,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { for (int loader = 0; loader < 12; ++loader) { const auto& loader_config = attribute_config.attribute_loaders[loader]; - const u8* load_address = base_address + loader_config.data_offset; + u32 load_address = base_address + loader_config.data_offset; // TODO: What happens if a loader overwrites a previous one's data? for (unsigned component = 0; component < loader_config.component_count; ++component) { @@ -87,7 +88,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); const auto& index_info = registers.index_array; - const u8* index_address_8 = (u8*)base_address + index_info.offset; + const u8* index_address_8 = Memory::GetPointer(PAddrToVAddr(base_address + index_info.offset)); const u16* index_address_16 = (u16*)index_address_8; bool index_u16 = (bool)index_info.format; @@ -108,7 +109,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - const u8* srcdata = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; + const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i])); const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata : (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata : (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata : @@ -116,9 +117,9 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { input.attr[i][comp] = float24::FromFloat32(srcval); LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", comp, i, vertex, index, - PAddrToVAddr(attribute_config.GetPhysicalBaseAddress()), + attribute_config.GetPhysicalBaseAddress(), vertex_attribute_sources[i] - base_address, - srcdata - vertex_attribute_sources[i], + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], input.attr[i][comp].ToFloat32()); } } From 346012f29e244489681d2cdf2cf6291d04fbed33 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 16 Dec 2014 00:01:08 +0100 Subject: [PATCH 12/34] Pica/CommandProcessor: Add a safety check for invalid (?) GPU configurations. --- src/video_core/command_processor.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d4559fad6..d8bddd569 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -110,6 +110,13 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i])); + + // TODO(neobrain): Ocarina of Time 3D has GetNumTotalAttributes return 8, + // yet only provides 2 valid source data addresses. Need to figure out + // what's wrong there, until then we just continue when address lookup fails + if (srcdata == nullptr) + continue; + const float srcval = (vertex_attribute_formats[i] == 0) ? *(s8*)srcdata : (vertex_attribute_formats[i] == 1) ? *(u8*)srcdata : (vertex_attribute_formats[i] == 2) ? *(s16*)srcdata : From cd322e328ec6957b6744ee9b68d29b2c29a554df Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 7 Dec 2014 00:26:48 +0100 Subject: [PATCH 13/34] Pica/PrimitiveAssembly: Implement triangle strips. --- src/video_core/primitive_assembly.cpp | 23 +++++++++++++++-------- src/video_core/primitive_assembly.h | 1 + 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 102693ed9..ff46c7b52 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -30,20 +30,27 @@ void PrimitiveAssembler::SubmitVertex(VertexType& vtx, TriangleHandl } break; + case Regs::TriangleTopology::Strip: case Regs::TriangleTopology::Fan: - if (buffer_index == 2) { - buffer_index = 0; + if (strip_ready) { + // TODO: Should be "buffer[0], buffer[1], vtx" instead! + // Not quite sure why we need this order for things to show up properly. + // Maybe a bug in the rasterizer? + triangle_handler(buffer[1], buffer[0], vtx); + } + buffer[buffer_index] = vtx; - triangle_handler(buffer[0], buffer[1], vtx); - - buffer[1] = vtx; - } else { - buffer[buffer_index++] = vtx; + if (topology == Regs::TriangleTopology::Strip) { + strip_ready |= (buffer_index == 1); + buffer_index = !buffer_index; + } else if (topology == Regs::TriangleTopology::Fan) { + buffer_index = 1; + strip_ready = true; } break; default: - LOG_ERROR(Render_Software, "Unknown triangle topology %x:", (int)topology); + LOG_ERROR(HW_GPU, "Unknown triangle topology %x:", (int)topology); break; } } diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index ea2e2f61e..decf0fd64 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -37,6 +37,7 @@ private: int buffer_index; VertexType buffer[2]; + bool strip_ready = false; }; From 79c29243ed94fb247dfa5a60e1863a8f64f11669 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Wed, 10 Dec 2014 17:31:50 +0100 Subject: [PATCH 14/34] Pica/DebugUtils: Add an event triggered after loading a vertex. --- src/citra_qt/debugger/graphics_breakpoints.cpp | 1 + src/video_core/command_processor.cpp | 3 +++ src/video_core/debug_utils/debug_utils.h | 1 + 3 files changed, 5 insertions(+) diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp index 469c3e268..4cb41db22 100644 --- a/src/citra_qt/debugger/graphics_breakpoints.cpp +++ b/src/citra_qt/debugger/graphics_breakpoints.cpp @@ -44,6 +44,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const { Pica::DebugContext::Event::CommandProcessed, tr("Pica command processed") }, { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") }, { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") }, + { Pica::DebugContext::Event::VertexLoaded, tr("Vertex Loaded") } }; _dbg_assert_(Debug_GPU, map.size() == static_cast(Pica::DebugContext::Event::NumEvents)); diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d8bddd569..4f82694fd 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -131,6 +131,9 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } } + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); + // NOTE: When dumping geometry, we simply assume that the first input attribute // corresponds to the position for now. DebugUtils::GeometryDumper::Vertex dumped_vertex = { diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 2a764e121..f9be90115 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -26,6 +26,7 @@ public: CommandProcessed, IncomingPrimitiveBatch, FinishedPrimitiveBatch, + VertexLoaded, NumEvents }; From 056a8f9dfa5f3da3bc3dfc0b032283db0ff6ab15 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Thu, 27 Nov 2014 19:09:37 +0100 Subject: [PATCH 15/34] Add nihstro (a 3DS shader tools suite) as a submodule. --- .gitmodules | 3 +++ CMakeLists.txt | 2 ++ externals/nihstro | 1 + 3 files changed, 6 insertions(+) create mode 160000 externals/nihstro diff --git a/.gitmodules b/.gitmodules index 54714e5cd..a9e0a5c1a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "externals/boost"] path = externals/boost url = https://github.com/citra-emu/ext-boost.git +[submodule "externals/nihstro"] + path = externals/nihstro + url = https://github.com/neobrain/nihstro.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 61d5d524a..638b468a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,6 +141,8 @@ set(INI_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/externals/inih") include_directories(${INI_PREFIX}) add_subdirectory(${INI_PREFIX}) +include_directories(externals/nihstro/include) + # process subdirectories if(ENABLE_QT) include_directories(externals/qhexedit) diff --git a/externals/nihstro b/externals/nihstro new file mode 160000 index 000000000..fc71f8684 --- /dev/null +++ b/externals/nihstro @@ -0,0 +1 @@ +Subproject commit fc71f8684d26ccf277ad68809c8bd7273141fe89 From 8ce1d324602001e1102648319a9281ee08a1af95 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 16 Dec 2014 00:32:49 +0100 Subject: [PATCH 16/34] Pica/VertexShader: Remove (now) duplicated shader bytecode definitions in favor of nihstro's ones. --- src/video_core/vertex_shader.cpp | 43 +++++-- src/video_core/vertex_shader.h | 209 ------------------------------- 2 files changed, 30 insertions(+), 222 deletions(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 477e78cfe..064a703eb 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -8,11 +8,18 @@ #include +#include + #include "debug_utils/debug_utils.h" #include "pica.h" #include "vertex_shader.h" +using nihstro::Instruction; +using nihstro::RegisterType; +using nihstro::SourceRegister; +using nihstro::SwizzlePattern; + namespace Pica { namespace VertexShader { @@ -70,19 +77,28 @@ static void ProcessShaderCode(VertexShaderState& state) { const Instruction& instr = *(const Instruction*)state.program_counter; state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); - const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] - : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x - : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x - : nullptr; - const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] - : &state.temporary_registers[instr.common.src2.GetIndex()].x; + auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { + switch (source_reg.GetRegisterType()) { + case RegisterType::Input: + return state.input_register_table[source_reg.GetIndex()]; + + case RegisterType::Temporary: + return &state.temporary_registers[source_reg.GetIndex()].x; + + case RegisterType::FloatUniform: + return &shader_uniforms.f[source_reg.GetIndex()].x; + } + }; + bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] : (instr.common.dest < 0x10) ? nullptr : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] : nullptr; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - const bool negate_src1 = (swizzle.negate != 0); + const bool negate_src1 = (swizzle.negate_src1 != 0); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -192,7 +208,9 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::RET: + // NOP is currently used as a heuristic for leaving from a function. + // TODO: This is completely incorrect. + case Instruction::OpCode::NOP: if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { exit_loop = true; } else { @@ -209,17 +227,16 @@ static void ProcessShaderCode(VertexShaderState& state) { _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); *++state.call_stack_pointer = state.program_counter - shader_memory; - // TODO: Does this offset refer to the beginning of shader memory? - state.program_counter = &shader_memory[instr.flow_control.offset_words]; + state.program_counter = &shader_memory[instr.flow_control.dest_offset]; break; - case Instruction::OpCode::FLS: - // TODO: Do whatever needs to be done here? + case Instruction::OpCode::END: + // TODO break; default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); + (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); break; } diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index c1292fc2d..131769808 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -66,215 +66,6 @@ struct OutputVertex { static_assert(std::is_pod::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); -union Instruction { - enum class OpCode : u32 { - ADD = 0x0, - DP3 = 0x1, - DP4 = 0x2, - - MUL = 0x8, - - MAX = 0xC, - MIN = 0xD, - RCP = 0xE, - RSQ = 0xF, - - MOV = 0x13, - - RET = 0x21, - FLS = 0x22, // Flush - CALL = 0x24, - }; - - std::string GetOpCodeName() const { - std::map map = { - { OpCode::ADD, "ADD" }, - { OpCode::DP3, "DP3" }, - { OpCode::DP4, "DP4" }, - { OpCode::MUL, "MUL" }, - { OpCode::MAX, "MAX" }, - { OpCode::MIN, "MIN" }, - { OpCode::RCP, "RCP" }, - { OpCode::RSQ, "RSQ" }, - { OpCode::MOV, "MOV" }, - { OpCode::RET, "RET" }, - { OpCode::FLS, "FLS" }, - }; - auto it = map.find(opcode); - if (it == map.end()) - return "UNK"; - else - return it->second; - } - - u32 hex; - - BitField<0x1a, 0x6, OpCode> opcode; - - // General notes: - // - // When two input registers are used, one of them uses a 5-bit index while the other - // one uses a 7-bit index. This is because at most one floating point uniform may be used - // as an input. - - - // Format used e.g. by arithmetic instructions and comparisons - // "src1" and "src2" specify register indices (i.e. indices referring to groups of 4 floats), - // while "dest" addresses individual floats. - union { - BitField<0x00, 0x5, u32> operand_desc_id; - - template - struct SourceRegister : BitFieldType { - enum RegisterType { - Input, - Temporary, - FloatUniform - }; - - RegisterType GetRegisterType() const { - if (BitFieldType::Value() < 0x10) - return Input; - else if (BitFieldType::Value() < 0x20) - return Temporary; - else - return FloatUniform; - } - - int GetIndex() const { - if (GetRegisterType() == Input) - return BitFieldType::Value(); - else if (GetRegisterType() == Temporary) - return BitFieldType::Value() - 0x10; - else // if (GetRegisterType() == FloatUniform) - return BitFieldType::Value() - 0x20; - } - - std::string GetRegisterName() const { - std::map type = { - { Input, "i" }, - { Temporary, "t" }, - { FloatUniform, "f" }, - }; - return type[GetRegisterType()] + std::to_string(GetIndex()); - } - }; - - SourceRegister> src2; - SourceRegister> src1; - - struct : BitField<0x15, 0x5, u32> - { - enum RegisterType { - Output, - Temporary, - Unknown - }; - RegisterType GetRegisterType() const { - if (Value() < 0x8) - return Output; - else if (Value() < 0x10) - return Unknown; - else - return Temporary; - } - int GetIndex() const { - if (GetRegisterType() == Output) - return Value(); - else if (GetRegisterType() == Temporary) - return Value() - 0x10; - else - return Value(); - } - std::string GetRegisterName() const { - std::map type = { - { Output, "o" }, - { Temporary, "t" }, - { Unknown, "u" } - }; - return type[GetRegisterType()] + std::to_string(GetIndex()); - } - } dest; - } common; - - // Format used for flow control instructions ("if") - union { - BitField<0x00, 0x8, u32> num_instructions; - BitField<0x0a, 0xc, u32> offset_words; - } flow_control; -}; -static_assert(std::is_standard_layout::value, "Structure is not using standard layout!"); - -union SwizzlePattern { - u32 hex; - - enum class Selector : u32 { - x = 0, - y = 1, - z = 2, - w = 3 - }; - - Selector GetSelectorSrc1(int comp) const { - Selector selectors[] = { - src1_selector_0, src1_selector_1, src1_selector_2, src1_selector_3 - }; - return selectors[comp]; - } - - Selector GetSelectorSrc2(int comp) const { - Selector selectors[] = { - src2_selector_0, src2_selector_1, src2_selector_2, src2_selector_3 - }; - return selectors[comp]; - } - - bool DestComponentEnabled(int i) const { - return (dest_mask & (0x8 >> i)) != 0; - } - - std::string SelectorToString(bool src2) const { - std::map map = { - { Selector::x, "x" }, - { Selector::y, "y" }, - { Selector::z, "z" }, - { Selector::w, "w" } - }; - std::string ret; - for (int i = 0; i < 4; ++i) { - ret += map.at(src2 ? GetSelectorSrc2(i) : GetSelectorSrc1(i)); - } - return ret; - } - - std::string DestMaskToString() const { - std::string ret; - for (int i = 0; i < 4; ++i) { - if (!DestComponentEnabled(i)) - ret += "_"; - else - ret += "xyzw"[i]; - } - return ret; - } - - // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x - BitField< 0, 4, u32> dest_mask; - - BitField< 4, 1, u32> negate; // negates src1 - - BitField< 5, 2, Selector> src1_selector_3; - BitField< 7, 2, Selector> src1_selector_2; - BitField< 9, 2, Selector> src1_selector_1; - BitField<11, 2, Selector> src1_selector_0; - - BitField<14, 2, Selector> src2_selector_3; - BitField<16, 2, Selector> src2_selector_2; - BitField<18, 2, Selector> src2_selector_1; - BitField<20, 2, Selector> src2_selector_0; - - BitField<31, 1, u32> flag; // not sure what this means, maybe it's the sign? -}; void SubmitShaderMemoryChange(u32 addr, u32 value); void SubmitSwizzleDataChange(u32 addr, u32 value); From cc5746abfe838fa130dd8be58219e00ae292a8fe Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 16 Dec 2014 01:12:29 +0100 Subject: [PATCH 17/34] Pica/DebugUtils: Replace duplicated SHBIN structures in favor of nihstro's ones. --- src/video_core/debug_utils/debug_utils.cpp | 69 +++------------------- 1 file changed, 8 insertions(+), 61 deletions(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 1a7b851d5..7e1cfb92c 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -14,6 +14,8 @@ #include #endif +#include + #include "common/log.h" #include "common/file_util.h" @@ -22,6 +24,10 @@ #include "debug_utils.h" +using nihstro::DVLBHeader; +using nihstro::DVLEHeader; +using nihstro::DVLPHeader; + namespace Pica { void DebugContext::OnEvent(Event event, void* data) { @@ -98,65 +104,6 @@ void GeometryDumper::Dump() { } } -#pragma pack(1) -struct DVLBHeader { - enum : u32 { - MAGIC_WORD = 0x424C5644, // "DVLB" - }; - - u32 magic_word; - u32 num_programs; -// u32 dvle_offset_table[]; -}; -static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size"); - -struct DVLPHeader { - enum : u32 { - MAGIC_WORD = 0x504C5644, // "DVLP" - }; - - u32 magic_word; - u32 version; - u32 binary_offset; // relative to DVLP start - u32 binary_size_words; - u32 swizzle_patterns_offset; - u32 swizzle_patterns_num_entries; - u32 unk2; -}; -static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size"); - -struct DVLEHeader { - enum : u32 { - MAGIC_WORD = 0x454c5644, // "DVLE" - }; - - enum class ShaderType : u8 { - VERTEX = 0, - GEOMETRY = 1, - }; - - u32 magic_word; - u16 pad1; - ShaderType type; - u8 pad2; - u32 main_offset_words; // offset within binary blob - u32 endmain_offset_words; - u32 pad3; - u32 pad4; - u32 constant_table_offset; - u32 constant_table_size; // number of entries - u32 label_table_offset; - u32 label_table_size; - u32 output_register_table_offset; - u32 output_register_table_size; - u32 uniform_table_offset; - u32 uniform_table_size; - u32 symbol_table_offset; - u32 symbol_table_size; - -}; -static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size"); -#pragma pack() void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, u32 main_offset, const Regs::VSOutputAttributes* output_attributes) @@ -276,8 +223,8 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data dvlp.binary_size_words = binary_size; QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); - dvlp.swizzle_patterns_offset = write_offset - dvlp_offset; - dvlp.swizzle_patterns_num_entries = swizzle_size; + dvlp.swizzle_info_offset = write_offset - dvlp_offset; + dvlp.swizzle_info_num_entries = swizzle_size; u32 dummy = 0; for (unsigned int i = 0; i < swizzle_size; ++i) { QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); From ce36ad454ecd4707a77916fdb79954c8924b50ee Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Dec 2014 17:55:43 +0100 Subject: [PATCH 18/34] Pica/VertexShader: Support negating src2. --- src/video_core/vertex_shader.cpp | 11 +++++++++-- src/video_core/vertex_shader.h | 1 - 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 064a703eb..c5c5261fe 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -10,10 +10,10 @@ #include -#include "debug_utils/debug_utils.h" #include "pica.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" using nihstro::Instruction; using nihstro::RegisterType; @@ -99,6 +99,7 @@ static void ProcessShaderCode(VertexShaderState& state) { const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; const bool negate_src1 = (swizzle.negate_src1 != 0); + const bool negate_src2 = (swizzle.negate_src2 != 0); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -112,12 +113,18 @@ static void ProcessShaderCode(VertexShaderState& state) { src1[2] = src1[2] * float24::FromFloat32(-1); src1[3] = src1[3] * float24::FromFloat32(-1); } - const float24 src2[4] = { + float24 src2[4] = { src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], }; + if (negate_src2) { + src2[0] = src2[0] * float24::FromFloat32(-1); + src2[1] = src2[1] * float24::FromFloat32(-1); + src2[2] = src2[2] * float24::FromFloat32(-1); + src2[3] = src2[3] * float24::FromFloat32(-1); + } switch (instr.opcode) { case Instruction::OpCode::ADD: diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index 131769808..2f6ff5904 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -66,7 +66,6 @@ struct OutputVertex { static_assert(std::is_pod::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); - void SubmitShaderMemoryChange(u32 addr, u32 value); void SubmitSwizzleDataChange(u32 addr, u32 value); From b85524c760989f3d053d05df6b244b28252b2f4e Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 16 Dec 2014 01:20:29 +0100 Subject: [PATCH 19/34] Pica/VertexShader: Some cleanups using std::array. --- src/video_core/vertex_shader.cpp | 21 ++++++++++++++++----- src/video_core/vertex_shader.h | 3 +++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index c5c5261fe..c98c625c2 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -31,8 +31,8 @@ static struct { // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! // For now, we just keep these local arrays around. -static u32 shader_memory[1024]; -static u32 swizzle_data[1024]; +static std::array shader_memory; +static std::array swizzle_data; void SubmitShaderMemoryChange(u32 addr, u32 value) { @@ -49,6 +49,17 @@ Math::Vec4& GetFloatUniform(u32 index) return shader_uniforms.f[index]; } +const std::array& GetShaderBinary() +{ + return shader_memory; +} + +const std::array& GetSwizzlePatterns() +{ + return swizzle_data; +} + + struct VertexShaderState { u32* program_counter; @@ -75,7 +86,7 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; - state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); + state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { @@ -233,7 +244,7 @@ static void ProcessShaderCode(VertexShaderState& state) { _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); - *++state.call_stack_pointer = state.program_counter - shader_memory; + *++state.call_stack_pointer = state.program_counter - shader_memory.data(); state.program_counter = &shader_memory[instr.flow_control.dest_offset]; break; @@ -305,7 +316,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.call_stack_pointer = &state.call_stack[0]; ProcessShaderCode(state); - DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, + DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), state.debug.max_opdesc_id, registers.vs_main_offset, registers.vs_output_attributes); diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index 2f6ff5904..be01b24d7 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -73,6 +73,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes); Math::Vec4& GetFloatUniform(u32 index); +const std::array& GetShaderBinary(); +const std::array& GetSwizzlePatterns(); + } // namespace } // namespace From cb1804e0aba48826d671afb0500ae5eaeebd5c5a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Dec 2014 18:31:37 +0100 Subject: [PATCH 20/34] Pica/VertexShader: Move code around a bit. --- src/video_core/vertex_shader.cpp | 98 +++++++++++++++++++------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index c98c625c2..33a862b74 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -86,6 +86,8 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; + const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; + state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { @@ -100,47 +102,52 @@ static void ProcessShaderCode(VertexShaderState& state) { return &shader_uniforms.f[source_reg.GetIndex()].x; } }; - bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); - const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); - float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] - : (instr.common.dest < 0x10) ? nullptr - : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] - : nullptr; - const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - const bool negate_src1 = (swizzle.negate_src1 != 0); - const bool negate_src2 = (swizzle.negate_src2 != 0); + switch (instr.opcode.GetInfo().type) { + case Instruction::OpCodeType::Arithmetic: + { + bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); - float24 src1[4] = { - src1_[(int)swizzle.GetSelectorSrc1(0)], - src1_[(int)swizzle.GetSelectorSrc1(1)], - src1_[(int)swizzle.GetSelectorSrc1(2)], - src1_[(int)swizzle.GetSelectorSrc1(3)], - }; - if (negate_src1) { - src1[0] = src1[0] * float24::FromFloat32(-1); - src1[1] = src1[1] * float24::FromFloat32(-1); - src1[2] = src1[2] * float24::FromFloat32(-1); - src1[3] = src1[3] * float24::FromFloat32(-1); - } - float24 src2[4] = { - src2_[(int)swizzle.GetSelectorSrc2(0)], - src2_[(int)swizzle.GetSelectorSrc2(1)], - src2_[(int)swizzle.GetSelectorSrc2(2)], - src2_[(int)swizzle.GetSelectorSrc2(3)], - }; - if (negate_src2) { - src2[0] = src2[0] * float24::FromFloat32(-1); - src2[1] = src2[1] * float24::FromFloat32(-1); - src2[2] = src2[2] * float24::FromFloat32(-1); - src2[3] = src2[3] * float24::FromFloat32(-1); - } + const bool negate_src1 = (swizzle.negate_src1 != 0); + const bool negate_src2 = (swizzle.negate_src2 != 0); - switch (instr.opcode) { + float24 src1[4] = { + src1_[(int)swizzle.GetSelectorSrc1(0)], + src1_[(int)swizzle.GetSelectorSrc1(1)], + src1_[(int)swizzle.GetSelectorSrc1(2)], + src1_[(int)swizzle.GetSelectorSrc1(3)], + }; + if (negate_src1) { + src1[0] = src1[0] * float24::FromFloat32(-1); + src1[1] = src1[1] * float24::FromFloat32(-1); + src1[2] = src1[2] * float24::FromFloat32(-1); + src1[3] = src1[3] * float24::FromFloat32(-1); + } + float24 src2[4] = { + src2_[(int)swizzle.GetSelectorSrc2(0)], + src2_[(int)swizzle.GetSelectorSrc2(1)], + src2_[(int)swizzle.GetSelectorSrc2(2)], + src2_[(int)swizzle.GetSelectorSrc2(3)], + }; + if (negate_src2) { + src2[0] = src2[0] * float24::FromFloat32(-1); + src2[1] = src2[1] * float24::FromFloat32(-1); + src2[2] = src2[2] * float24::FromFloat32(-1); + src2[3] = src2[3] * float24::FromFloat32(-1); + } + + float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] + : (instr.common.dest < 0x10) ? nullptr + : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] + : nullptr; + + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); + + switch (instr.opcode) { case Instruction::OpCode::ADD: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -153,7 +160,6 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MUL: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -167,7 +173,6 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); float24 dot = float24::FromFloat32(0.f); int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) @@ -185,7 +190,6 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal case Instruction::OpCode::RCP: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -201,7 +205,6 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal Square Root case Instruction::OpCode::RSQ: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -216,7 +219,6 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MOV: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -226,6 +228,17 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + default: + LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", + (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + break; + } + + break; + } + default: + // Process instruction explicitly + switch (instr.opcode) { // NOP is currently used as a heuristic for leaving from a function. // TODO: This is completely incorrect. case Instruction::OpCode::NOP: @@ -256,6 +269,9 @@ static void ProcessShaderCode(VertexShaderState& state) { LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); break; + } + + break; } if (increment_pc) From 67618a2c55e0b6860bbb083962cdd28a543bf82a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Dec 2014 22:50:09 +0100 Subject: [PATCH 21/34] Pica/VertexShader: Add support for MOVA, CMP and IFC. --- src/video_core/pica.h | 8 ++ src/video_core/vertex_shader.cpp | 137 +++++++++++++++++++++++++++++-- 2 files changed, 138 insertions(+), 7 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 583614328..87a9e7913 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -771,6 +771,14 @@ struct float24 { return ToFloat32() <= flt.ToFloat32(); } + bool operator == (const float24& flt) const { + return ToFloat32() == flt.ToFloat32(); + } + + bool operator != (const float24& flt) const { + return ToFloat32() != flt.ToFloat32(); + } + private: // Stored as a regular float, merely for convenience // TODO: Perform proper arithmetic on this! diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 33a862b74..5d9203c86 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -59,6 +59,8 @@ const std::array& GetSwizzlePatterns() return swizzle_data; } +// TODO: Is there actually a limit on hardware? +const int if_stack_size = 8; struct VertexShaderState { u32* program_counter; @@ -67,7 +69,11 @@ struct VertexShaderState { float24* output_register_table[7*4]; Math::Vec4 temporary_registers[16]; - bool status_registers[2]; + bool conditional_code[2]; + + // Two Address registers and one loop counter + // TODO: How many bits do these actually have? + s32 address_registers[3]; enum { INVALID_ADDRESS = 0xFFFFFFFF @@ -75,6 +81,12 @@ struct VertexShaderState { u32 call_stack[8]; // TODO: What is the maximal call stack depth? u32* call_stack_pointer; + struct IfStackElement { + u32 else_addr; + u32 else_instructions; + } if_stack[if_stack_size]; + IfStackElement* if_stack_pointer; + struct { u32 max_offset; // maximum program counter ever reached u32 max_opdesc_id; // maximum swizzle pattern index ever used @@ -107,11 +119,20 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCodeType::Arithmetic: { bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); + if (is_inverted) { + // We don't really support this properly and/or reliably + LOG_ERROR(HW_GPU, "Bad condition..."); + exit(0); + } + + const int address_offset = (instr.common.address_register_index == 0) + ? 0 : state.address_registers[instr.common.address_register_index - 1]; + + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); - const bool negate_src1 = (swizzle.negate_src1 != 0); - const bool negate_src2 = (swizzle.negate_src2 != 0); + const bool negate_src1 = (swizzle.negate_src1 != false); + const bool negate_src2 = (swizzle.negate_src2 != false); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -217,6 +238,19 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case Instruction::OpCode::MOVA: + { + for (int i = 0; i < 2; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + // TODO: Figure out how the rounding is done on hardware + state.address_registers[i] = static_cast(src1[i].ToFloat32()); + } + + break; + } + case Instruction::OpCode::MOV: { for (int i = 0; i < 4; ++i) { @@ -228,16 +262,56 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case Instruction::OpCode::CMP: + for (int i = 0; i < 2; ++i) { + // TODO: Can you restrict to one compare via dest masking? + + auto compare_op = instr.common.compare_op; + auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); + + switch (op) { + case compare_op.Equal: + state.conditional_code[i] = (src1[i] == src2[i]); + break; + + case compare_op.NotEqual: + state.conditional_code[i] = (src1[i] != src2[i]); + break; + + case compare_op.LessThan: + state.conditional_code[i] = (src1[i] < src2[i]); + break; + + case compare_op.LessEqual: + state.conditional_code[i] = (src1[i] <= src2[i]); + break; + + case compare_op.GreaterThan: + state.conditional_code[i] = (src1[i] > src2[i]); + break; + + case compare_op.GreaterEqual: + state.conditional_code[i] = (src1[i] >= src2[i]); + break; + + default: + LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast(op)); + break; + } + } + break; + default: LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + _dbg_assert_(HW_GPU, 0); break; } break; } default: - // Process instruction explicitly + // Handle each instruction on its own switch (instr.opcode) { // NOP is currently used as a heuristic for leaving from a function. // TODO: This is completely incorrect. @@ -265,6 +339,44 @@ static void ProcessShaderCode(VertexShaderState& state) { // TODO break; + case Instruction::OpCode::IFC: + { + // TODO: Do we need to consider swizzlers here? + + auto flow_control = instr.flow_control; + bool results[3] = { flow_control.refx == state.conditional_code[0], + flow_control.refy == state.conditional_code[1] }; + + switch (flow_control.op) { + case flow_control.Or: + results[2] = results[0] || results[1]; + break; + + case flow_control.And: + results[2] = results[0] && results[1]; + break; + + case flow_control.JustX: + results[2] = results[0]; + break; + + case flow_control.JustY: + results[2] = results[1]; + break; + } + + if (results[2]) { + ++state.if_stack_pointer; + + state.if_stack_pointer->else_addr = instr.flow_control.dest_offset; + state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions; + } else { + state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; + } + + break; + } + default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); @@ -277,6 +389,13 @@ static void ProcessShaderCode(VertexShaderState& state) { if (increment_pc) ++state.program_counter; + if (state.if_stack_pointer >= &state.if_stack[0]) { + if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) { + state.program_counter += state.if_stack_pointer->else_instructions; + state.if_stack_pointer--; + } + } + if (exit_loop) break; } @@ -326,11 +445,15 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; } - state.status_registers[0] = false; - state.status_registers[1] = false; + state.conditional_code[0] = false; + state.conditional_code[1] = false; boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); state.call_stack_pointer = &state.call_stack[0]; + std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]), + VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS}); + state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly + ProcessShaderCode(state); DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), state.debug.max_opdesc_id, registers.vs_main_offset, From aff808b2fdfd9605179a13eb55b72d68a7cdd8c2 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:20:47 +0100 Subject: [PATCH 22/34] Pica: Add support for boolean uniforms. --- src/video_core/command_processor.cpp | 6 ++++++ src/video_core/pica.h | 8 +++++++- src/video_core/vertex_shader.cpp | 8 +++++++- src/video_core/vertex_shader.h | 1 + 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 4f82694fd..9b8ecf8e3 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -162,6 +162,12 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { break; } + case PICA_REG_INDEX(vs_bool_uniforms): + for (unsigned i = 0; i < 16; ++i) + VertexShader::GetBoolUniform(i) = (registers.vs_bool_uniforms.Value() & (1 << i)); + + break; + case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 87a9e7913..06552a3ef 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -492,7 +492,11 @@ struct Regs { BitField<8, 2, TriangleTopology> triangle_topology; - INSERT_PADDING_WORDS(0x5b); + INSERT_PADDING_WORDS(0x51); + + BitField<0, 16, u32> vs_bool_uniforms; + + INSERT_PADDING_WORDS(0x9); // Offset to shader program entry point (in words) BitField<0, 16, u32> vs_main_offset; @@ -620,6 +624,7 @@ struct Regs { ADD_FIELD(trigger_draw); ADD_FIELD(trigger_draw_indexed); ADD_FIELD(triangle_topology); + ADD_FIELD(vs_bool_uniforms); ADD_FIELD(vs_main_offset); ADD_FIELD(vs_input_register_map); ADD_FIELD(vs_uniform_setup); @@ -690,6 +695,7 @@ ASSERT_REG_POSITION(num_vertices, 0x228); ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(triangle_topology, 0x25e); +ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); ASSERT_REG_POSITION(vs_main_offset, 0x2ba); ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 5d9203c86..fbec1bcc8 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -26,8 +26,9 @@ namespace VertexShader { static struct { Math::Vec4 f[96]; -} shader_uniforms; + std::array b; +} shader_uniforms; // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! // For now, we just keep these local arrays around. @@ -49,6 +50,11 @@ Math::Vec4& GetFloatUniform(u32 index) return shader_uniforms.f[index]; } +bool& GetBoolUniform(u32 index) +{ + return shader_uniforms.b[index]; +} + const std::array& GetShaderBinary() { return shader_memory; diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index be01b24d7..047dde046 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -72,6 +72,7 @@ void SubmitSwizzleDataChange(u32 addr, u32 value); OutputVertex RunShader(const InputVertex& input, int num_attributes); Math::Vec4& GetFloatUniform(u32 index); +bool& GetBoolUniform(u32 index); const std::array& GetShaderBinary(); const std::array& GetSwizzlePatterns(); From cd163fb59ae2922d33aa931f51ef5d116c0adc3f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:22:55 +0100 Subject: [PATCH 23/34] Pica/VertexShader: Implement MAX instructions. --- src/video_core/vertex_shader.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index fbec1bcc8..742e5a9f2 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -197,6 +197,15 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case Instruction::OpCode::MAX: + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = std::max(src1[i], src2[i]); + } + break; + case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { From 22afb9d8309f56494d95f6132561a413b8e7895c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:23:41 +0100 Subject: [PATCH 24/34] Pica/VertexShader: Run instruction handlers according to the effective opcode. This allows for proper emulation of the different CMP/LRP/MAD instructions. --- src/video_core/vertex_shader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 742e5a9f2..dd406f9ca 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -172,7 +172,7 @@ static void ProcessShaderCode(VertexShaderState& state) { state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); - switch (instr.opcode) { + switch (instr.opcode.EffectiveOpCode()) { case Instruction::OpCode::ADD: { for (int i = 0; i < 4; ++i) { From 6bd41de276a97fee1d4f07789a33ff49d494a20d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:30:13 +0100 Subject: [PATCH 25/34] Pica/VertexShader: Cleanup flow control logic and implement CMP/IFU instructions. --- src/video_core/vertex_shader.cpp | 108 ++++++++++++++++--------------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index dd406f9ca..af9332975 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 // Refer to the license.txt file included. +#include + #include #include @@ -65,9 +67,6 @@ const std::array& GetSwizzlePatterns() return swizzle_data; } -// TODO: Is there actually a limit on hardware? -const int if_stack_size = 8; - struct VertexShaderState { u32* program_counter; @@ -84,14 +83,14 @@ struct VertexShaderState { enum { INVALID_ADDRESS = 0xFFFFFFFF }; - u32 call_stack[8]; // TODO: What is the maximal call stack depth? - u32* call_stack_pointer; - struct IfStackElement { - u32 else_addr; - u32 else_instructions; - } if_stack[if_stack_size]; - IfStackElement* if_stack_pointer; + struct CallStackElement { + u32 final_address; + u32 return_address; + }; + + // TODO: Is there a maximal size for this? + std::stack call_stack; struct { u32 max_offset; // maximum program counter ever reached @@ -101,12 +100,27 @@ struct VertexShaderState { static void ProcessShaderCode(VertexShaderState& state) { while (true) { - bool increment_pc = true; + if (!state.call_stack.empty()) { + if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { + state.program_counter = &shader_memory[state.call_stack.top().return_address]; + state.call_stack.pop(); + + // TODO: Is "trying again" accurate to hardware? + continue; + } + } + bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); + auto call = [&](std::stack& stack, u32 offset, u32 num_instructions, u32 return_offset) { + state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + stack.push({ offset + num_instructions, return_offset }); + }; + u32 binary_offset = state.program_counter - shader_memory.data(); + + state.debug.max_offset = std::max(state.debug.max_offset, 1 + binary_offset); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { @@ -328,30 +342,33 @@ static void ProcessShaderCode(VertexShaderState& state) { default: // Handle each instruction on its own switch (instr.opcode) { - // NOP is currently used as a heuristic for leaving from a function. - // TODO: This is completely incorrect. - case Instruction::OpCode::NOP: - if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { - exit_loop = true; - } else { - // Jump back to call stack position, invalidate call stack entry, move up call stack pointer - state.program_counter = &shader_memory[*state.call_stack_pointer]; - *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; - } - + case Instruction::OpCode::END: + exit_loop = true; break; case Instruction::OpCode::CALL: - increment_pc = false; - - _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); - - *++state.call_stack_pointer = state.program_counter - shader_memory.data(); - state.program_counter = &shader_memory[instr.flow_control.dest_offset]; + call(state.call_stack, + instr.flow_control.dest_offset, + instr.flow_control.num_instructions, + binary_offset + 1); break; - case Instruction::OpCode::END: - // TODO + case Instruction::OpCode::NOP: + break; + + case Instruction::OpCode::IFU: + if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { + call(state.call_stack, + binary_offset + 1, + instr.flow_control.dest_offset - binary_offset - 1, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); + } else { + call(state.call_stack, + instr.flow_control.dest_offset, + instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); + } + break; case Instruction::OpCode::IFC: @@ -381,12 +398,15 @@ static void ProcessShaderCode(VertexShaderState& state) { } if (results[2]) { - ++state.if_stack_pointer; - - state.if_stack_pointer->else_addr = instr.flow_control.dest_offset; - state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions; + call(state.call_stack, + binary_offset + 1, + instr.flow_control.dest_offset - binary_offset - 1, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); } else { - state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; + call(state.call_stack, + instr.flow_control.dest_offset, + instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); } break; @@ -401,15 +421,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - if (increment_pc) - ++state.program_counter; - - if (state.if_stack_pointer >= &state.if_stack[0]) { - if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) { - state.program_counter += state.if_stack_pointer->else_instructions; - state.if_stack_pointer--; - } - } + ++state.program_counter; if (exit_loop) break; @@ -462,12 +474,6 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.conditional_code[0] = false; state.conditional_code[1] = false; - boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); - state.call_stack_pointer = &state.call_stack[0]; - - std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]), - VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS}); - state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly ProcessShaderCode(state); DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), From d81370682fccda1370ba22026aa21a260b506efd Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 18:49:09 +0100 Subject: [PATCH 26/34] Pica/DebugUtils: Make a number of variables static. Makes for cleaner and faster code. --- src/video_core/debug_utils/debug_utils.cpp | 26 +++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 7e1cfb92c..0085c117d 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -580,7 +580,7 @@ void DumpTevStageConfig(const std::array& stages) for (size_t index = 0; index < stages.size(); ++index) { const auto& tev_stage = stages[index]; - const std::map source_map = { + static const std::map source_map = { { Source::PrimaryColor, "PrimaryColor" }, { Source::Texture0, "Texture0" }, { Source::Texture1, "Texture1" }, @@ -589,23 +589,23 @@ void DumpTevStageConfig(const std::array& stages) { Source::Previous, "Previous" }, }; - const std::map color_modifier_map = { + static const std::map color_modifier_map = { { ColorModifier::SourceColor, { "%source.rgb" } }, { ColorModifier::SourceAlpha, { "%source.aaa" } }, }; - const std::map alpha_modifier_map = { + static const std::map alpha_modifier_map = { { AlphaModifier::SourceAlpha, "%source.a" }, { AlphaModifier::OneMinusSourceAlpha, "(255 - %source.a)" }, }; - std::map combiner_map = { + static const std::map combiner_map = { { Operation::Replace, "%source1" }, { Operation::Modulate, "(%source1 * %source2) / 255" }, { Operation::Add, "(%source1 + %source2)" }, { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, }; - auto ReplacePattern = + static auto ReplacePattern = [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string { size_t start = input.find(pattern); if (start == std::string::npos) @@ -615,8 +615,8 @@ void DumpTevStageConfig(const std::array& stages) ret.replace(start, pattern.length(), replacement); return ret; }; - auto GetColorSourceStr = - [&source_map,&color_modifier_map,&ReplacePattern](const Source& src, const ColorModifier& modifier) { + static auto GetColorSourceStr = + [](const Source& src, const ColorModifier& modifier) { auto src_it = source_map.find(src); std::string src_str = "Unknown"; if (src_it != source_map.end()) @@ -629,8 +629,8 @@ void DumpTevStageConfig(const std::array& stages) return ReplacePattern(modifier_str, "%source", src_str); }; - auto GetColorCombinerStr = - [&](const Regs::TevStageConfig& tev_stage) { + static auto GetColorCombinerStr = + [](const Regs::TevStageConfig& tev_stage) { auto op_it = combiner_map.find(tev_stage.color_op); std::string op_str = "Unknown op (%source1, %source2, %source3)"; if (op_it != combiner_map.end()) @@ -640,8 +640,8 @@ void DumpTevStageConfig(const std::array& stages) op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2)); return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3)); }; - auto GetAlphaSourceStr = - [&source_map,&alpha_modifier_map,&ReplacePattern](const Source& src, const AlphaModifier& modifier) { + static auto GetAlphaSourceStr = + [](const Source& src, const AlphaModifier& modifier) { auto src_it = source_map.find(src); std::string src_str = "Unknown"; if (src_it != source_map.end()) @@ -654,8 +654,8 @@ void DumpTevStageConfig(const std::array& stages) return ReplacePattern(modifier_str, "%source", src_str); }; - auto GetAlphaCombinerStr = - [&](const Regs::TevStageConfig& tev_stage) { + static auto GetAlphaCombinerStr = + [](const Regs::TevStageConfig& tev_stage) { auto op_it = combiner_map.find(tev_stage.alpha_op); std::string op_str = "Unknown op (%source1, %source2, %source3)"; if (op_it != combiner_map.end()) From e4e9710d1863a1c503ad4274eb8e64fbfdaa2d76 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:04:13 +0100 Subject: [PATCH 27/34] Pica/Rasterizer: Get rid of C-style casts. --- src/video_core/rasterizer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index bd79e4413..bf9c36661 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -18,7 +18,7 @@ namespace Pica { namespace Rasterizer { static void DrawPixel(int x, int y, const Math::Vec4& color) { - u32* color_buffer = (u32*)Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress())); + u32* color_buffer = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress()))); u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); // Assuming RGBA8 format until actual framebuffer format handling is implemented @@ -26,14 +26,14 @@ static void DrawPixel(int x, int y, const Math::Vec4& color) { } static u32 GetDepth(int x, int y) { - u16* depth_buffer = (u16*)Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())); + u16* depth_buffer = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); // Assuming 16-bit depth buffer format until actual format handling is implemented return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); } static void SetDepth(int x, int y, u16 value) { - u16* depth_buffer = (u16*)Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress())); + u16* depth_buffer = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); // Assuming 16-bit depth buffer format until actual format handling is implemented *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; @@ -208,7 +208,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); - DebugUtils::DumpTexture(texture.config, (u8*)texture_data); + DebugUtils::DumpTexture(texture.config, texture_data); } // Texture environment - consists of 6 stages of color and alpha combining. From 6e275778c9e7e55cabadb14fdabaa51a55348663 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:15:47 +0100 Subject: [PATCH 28/34] Pica/DebugUtils: Better document LookupTexture. --- src/video_core/debug_utils/debug_utils.cpp | 12 ++++++------ src/video_core/debug_utils/debug_utils.h | 11 ++++++++++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 0085c117d..1c08ba350 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -392,8 +392,10 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2; - // TODO: Better control this... + // TODO: compoent order not verified + if (disable_alpha) { + // Show intensity as red, alpha as green return { *source_ptr, *(source_ptr+1), 0, 255 }; } else { return { *source_ptr, *source_ptr, *source_ptr, *(source_ptr+1)}; @@ -403,8 +405,6 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture case Regs::TextureFormat::I8: { const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; - - // TODO: Better control this... return { *source_ptr, *source_ptr, *source_ptr, 255 }; } @@ -412,7 +412,6 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + coarse_x * block_height + coarse_y * info.stride + texel_index_within_tile; - // TODO: Better control this... if (disable_alpha) { return { *source_ptr, *source_ptr, *source_ptr, 255 }; } else { @@ -424,14 +423,15 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2; - // TODO: Order? + // TODO: compoent order not verified + u8 i = (*source_ptr)&0xF; u8 a = ((*source_ptr) & 0xF0) >> 4; a |= a << 4; i |= i << 4; - // TODO: Better control this... if (disable_alpha) { + // Show intensity as red, alpha as green return { i, a, 0, 255 }; } else { return { i, i, i, a }; diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index f9be90115..f361a5385 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -203,8 +203,17 @@ struct TextureInfo { const Pica::Regs::TextureFormat& format); }; -const Math::Vec4 LookupTexture(const u8* source, int x, int y, const TextureInfo& info, +/** + * Lookup texel located at the given coordinates and return an RGBA vector of its color. + * @param source Source pointer to read data from + * @param s,t Texture coordinates to read from + * @param info TextureInfo object describing the texture setup + * @param disable_alpha This is used for debug widgets which use this method to display textures without providing a good way to visualize alpha by themselves. If true, this will return 255 for the alpha component, and either drop the information entirely or store it in an "unused" color channel. + * @todo Eventually we should get rid of the disable_alpha parameter. + */ +const Math::Vec4 LookupTexture(const u8* source, int s, int t, const TextureInfo& info, bool disable_alpha = false); + void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); void DumpTevStageConfig(const std::array& stages); From 88e9efe4b8b370a93bae688dcbe3c03eda905379 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:20:02 +0100 Subject: [PATCH 29/34] Pica/DebugUtils: Fix two warnings. --- src/video_core/debug_utils/debug_utils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 1c08ba350..d9fed58bf 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -346,7 +346,7 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture case Regs::TextureFormat::RGBA8: { const u8* source_ptr = source + coarse_x * block_height * 4 + coarse_y * info.stride + texel_index_within_tile * 4; - return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? 255 : source_ptr[0] }; + return { source_ptr[3], source_ptr[2], source_ptr[1], disable_alpha ? (u8)255 : source_ptr[0] }; } case Regs::TextureFormat::RGB8: @@ -385,7 +385,7 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture g = (g << 4) | g; b = (b << 4) | b; a = (a << 4) | a; - return { r, g, b, disable_alpha ? 255 : a }; + return { r, g, b, disable_alpha ? (u8)255 : a }; } case Regs::TextureFormat::IA8: From 871418e62b079a83d9121dca0ef75b91acbe77cd Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:37:37 +0100 Subject: [PATCH 30/34] Pica/DebugUtils: Further cleanups to LookupTexture. --- src/video_core/debug_utils/debug_utils.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index d9fed58bf..328386b7e 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -392,13 +392,13 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + coarse_x * block_height * 2 + coarse_y * info.stride + texel_index_within_tile * 2; - // TODO: compoent order not verified + // TODO: component order not verified if (disable_alpha) { // Show intensity as red, alpha as green - return { *source_ptr, *(source_ptr+1), 0, 255 }; + return { source_ptr[0], source_ptr[1], 0, 255 }; } else { - return { *source_ptr, *source_ptr, *source_ptr, *(source_ptr+1)}; + return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]}; } } @@ -423,9 +423,9 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2; - // TODO: compoent order not verified + // TODO: component order not verified - u8 i = (*source_ptr)&0xF; + u8 i = (*source_ptr) & 0xF; u8 a = ((*source_ptr) & 0xF0) >> 4; a |= a << 4; i |= i << 4; @@ -442,11 +442,11 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + coarse_x * block_height / 2 + coarse_y * info.stride + texel_index_within_tile / 2; - // TODO: Order? + // TODO: component order not verified + u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4); a |= a << 4; - // TODO: Better control this... if (disable_alpha) { return { *source_ptr, *source_ptr, *source_ptr, 255 }; } else { From ad5db467d7e9a598e7f8e998066bc5ffe99f1436 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:49:17 +0100 Subject: [PATCH 31/34] Pica/VertexShader: Clarify a comment. --- src/video_core/vertex_shader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index af9332975..5ca30ba53 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -140,7 +140,9 @@ static void ProcessShaderCode(VertexShaderState& state) { { bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); if (is_inverted) { - // We don't really support this properly and/or reliably + // TODO: We don't really support this properly: For instance, the address register + // offset needs to be applied to SRC2 instead, etc. + // For now, we just abort in this situation. LOG_ERROR(HW_GPU, "Bad condition..."); exit(0); } From a664574ecbddb643dd12fb9815f4c4526f59f9ff Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:58:21 +0100 Subject: [PATCH 32/34] Pica/VertexShader: Be robust against invalid inputs. More specifically, this also fixes crashes by Citra trying to load a src2 register even if the current instruction does not use that. --- src/video_core/vertex_shader.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 5ca30ba53..345f3c3fe 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -99,6 +99,10 @@ struct VertexShaderState { }; static void ProcessShaderCode(VertexShaderState& state) { + + // Placeholder for invalid inputs + static float24 dummy_vec4_float24[4]; + while (true) { if (!state.call_stack.empty()) { if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { @@ -132,6 +136,9 @@ static void ProcessShaderCode(VertexShaderState& state) { case RegisterType::FloatUniform: return &shader_uniforms.f[source_reg.GetIndex()].x; + + default: + return dummy_vec4_float24; } }; @@ -182,9 +189,9 @@ static void ProcessShaderCode(VertexShaderState& state) { } float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] - : (instr.common.dest < 0x10) ? nullptr + : (instr.common.dest < 0x10) ? dummy_vec4_float24 : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] - : nullptr; + : dummy_vec4_float24; state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); From 17f31de364df294337963cabad106a5f0a9d302b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 20 Dec 2014 15:19:36 +0100 Subject: [PATCH 33/34] Pica/VertexShader: Small optimization. --- src/video_core/vertex_shader.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 345f3c3fe..de963f5e9 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -118,9 +118,9 @@ static void ProcessShaderCode(VertexShaderState& state) { const Instruction& instr = *(const Instruction*)state.program_counter; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - auto call = [&](std::stack& stack, u32 offset, u32 num_instructions, u32 return_offset) { + auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset) { state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset - stack.push({ offset + num_instructions, return_offset }); + state.call_stack.push({ offset + num_instructions, return_offset }); }; u32 binary_offset = state.program_counter - shader_memory.data(); @@ -356,7 +356,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; case Instruction::OpCode::CALL: - call(state.call_stack, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, binary_offset + 1); @@ -367,12 +367,12 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::IFU: if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { - call(state.call_stack, + call(state, binary_offset + 1, instr.flow_control.dest_offset - binary_offset - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions); } else { - call(state.call_stack, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, instr.flow_control.dest_offset + instr.flow_control.num_instructions); @@ -407,12 +407,12 @@ static void ProcessShaderCode(VertexShaderState& state) { } if (results[2]) { - call(state.call_stack, + call(state, binary_offset + 1, instr.flow_control.dest_offset - binary_offset - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions); } else { - call(state.call_stack, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, instr.flow_control.dest_offset + instr.flow_control.num_instructions); From 08f42c2b8c30d55f5c931f2260a0900ff902735c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 20 Dec 2014 15:31:17 +0100 Subject: [PATCH 34/34] Pica/VertexShader: Promote a log message to critical status. --- src/video_core/vertex_shader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index de963f5e9..4ba69fa51 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -150,7 +150,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // TODO: We don't really support this properly: For instance, the address register // offset needs to be applied to SRC2 instead, etc. // For now, we just abort in this situation. - LOG_ERROR(HW_GPU, "Bad condition..."); + LOG_CRITICAL(HW_GPU, "Bad condition..."); exit(0); }