renderer_software: Multi-thread processing (#6698)
* renderer_software: Multi-thread processing * Doubles the performance in most cases * renderer_software: Move memory access out of the raster loop * Profiling shows this has a significant impact
This commit is contained in:
parent
8b218e1b7d
commit
d1f600601d
|
@ -41,10 +41,22 @@ Framebuffer::Framebuffer(Memory::MemorySystem& memory_, const Pica::FramebufferR
|
||||||
|
|
||||||
Framebuffer::~Framebuffer() = default;
|
Framebuffer::~Framebuffer() = default;
|
||||||
|
|
||||||
void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const {
|
void Framebuffer::Bind() {
|
||||||
const auto& framebuffer = regs.framebuffer;
|
PAddr addr = regs.framebuffer.GetColorBufferPhysicalAddress();
|
||||||
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
if (color_addr != addr) [[unlikely]] {
|
||||||
|
color_addr = addr;
|
||||||
|
color_buffer = memory.GetPhysicalPointer(color_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
|
||||||
|
if (depth_addr != addr) [[unlikely]] {
|
||||||
|
depth_addr = addr;
|
||||||
|
depth_buffer = memory.GetPhysicalPointer(depth_addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Framebuffer::DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const {
|
||||||
|
const auto& framebuffer = regs.framebuffer;
|
||||||
// Similarly to textures, the render framebuffer is laid out from bottom to top, too.
|
// Similarly to textures, the render framebuffer is laid out from bottom to top, too.
|
||||||
// NOTE: The framebuffer height register contains the actual FB height minus one.
|
// NOTE: The framebuffer height register contains the actual FB height minus one.
|
||||||
y = framebuffer.height - y;
|
y = framebuffer.height - y;
|
||||||
|
@ -54,8 +66,7 @@ void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const {
|
||||||
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
||||||
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
||||||
coarse_y * framebuffer.width * bytes_per_pixel;
|
coarse_y * framebuffer.width * bytes_per_pixel;
|
||||||
u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
u8* dst_pixel = color_buffer + dst_offset;
|
||||||
u8* dst_pixel = depth_buffer + dst_offset;
|
|
||||||
|
|
||||||
switch (framebuffer.color_format) {
|
switch (framebuffer.color_format) {
|
||||||
case FramebufferRegs::ColorFormat::RGBA8:
|
case FramebufferRegs::ColorFormat::RGBA8:
|
||||||
|
@ -80,10 +91,8 @@ void Framebuffer::DrawPixel(int x, int y, const Common::Vec4<u8>& color) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const {
|
const Common::Vec4<u8> Framebuffer::GetPixel(u32 x, u32 y) const {
|
||||||
const auto& framebuffer = regs.framebuffer;
|
const auto& framebuffer = regs.framebuffer;
|
||||||
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
|
||||||
|
|
||||||
y = framebuffer.height - y;
|
y = framebuffer.height - y;
|
||||||
|
|
||||||
const u32 coarse_y = y & ~7;
|
const u32 coarse_y = y & ~7;
|
||||||
|
@ -91,7 +100,6 @@ const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const {
|
||||||
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
|
||||||
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
|
||||||
coarse_y * framebuffer.width * bytes_per_pixel;
|
coarse_y * framebuffer.width * bytes_per_pixel;
|
||||||
const u8* color_buffer = memory.GetPhysicalPointer(addr);
|
|
||||||
const u8* src_pixel = color_buffer + src_offset;
|
const u8* src_pixel = color_buffer + src_offset;
|
||||||
|
|
||||||
switch (framebuffer.color_format) {
|
switch (framebuffer.color_format) {
|
||||||
|
@ -114,10 +122,8 @@ const Common::Vec4<u8> Framebuffer::GetPixel(int x, int y) const {
|
||||||
return {0, 0, 0, 0};
|
return {0, 0, 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 Framebuffer::GetDepth(int x, int y) const {
|
u32 Framebuffer::GetDepth(u32 x, u32 y) const {
|
||||||
const auto& framebuffer = regs.framebuffer;
|
const auto& framebuffer = regs.framebuffer;
|
||||||
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
||||||
|
|
||||||
y = framebuffer.height - y;
|
y = framebuffer.height - y;
|
||||||
|
|
||||||
const u32 coarse_y = y & ~7;
|
const u32 coarse_y = y & ~7;
|
||||||
|
@ -125,7 +131,6 @@ u32 Framebuffer::GetDepth(int x, int y) const {
|
||||||
const u32 stride = framebuffer.width * bytes_per_pixel;
|
const u32 stride = framebuffer.width * bytes_per_pixel;
|
||||||
|
|
||||||
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||||
const u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
|
||||||
const u8* src_pixel = depth_buffer + src_offset;
|
const u8* src_pixel = depth_buffer + src_offset;
|
||||||
|
|
||||||
switch (framebuffer.depth_format) {
|
switch (framebuffer.depth_format) {
|
||||||
|
@ -143,10 +148,8 @@ u32 Framebuffer::GetDepth(int x, int y) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 Framebuffer::GetStencil(int x, int y) const {
|
u8 Framebuffer::GetStencil(u32 x, u32 y) const {
|
||||||
const auto& framebuffer = regs.framebuffer;
|
const auto& framebuffer = regs.framebuffer;
|
||||||
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
||||||
|
|
||||||
y = framebuffer.height - y;
|
y = framebuffer.height - y;
|
||||||
|
|
||||||
const u32 coarse_y = y & ~7;
|
const u32 coarse_y = y & ~7;
|
||||||
|
@ -154,7 +157,6 @@ u8 Framebuffer::GetStencil(int x, int y) const {
|
||||||
const u32 stride = framebuffer.width * bytes_per_pixel;
|
const u32 stride = framebuffer.width * bytes_per_pixel;
|
||||||
|
|
||||||
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||||
const u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
|
||||||
const u8* src_pixel = depth_buffer + src_offset;
|
const u8* src_pixel = depth_buffer + src_offset;
|
||||||
|
|
||||||
switch (framebuffer.depth_format) {
|
switch (framebuffer.depth_format) {
|
||||||
|
@ -169,10 +171,8 @@ u8 Framebuffer::GetStencil(int x, int y) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Framebuffer::SetDepth(int x, int y, u32 value) const {
|
void Framebuffer::SetDepth(u32 x, u32 y, u32 value) const {
|
||||||
const auto& framebuffer = regs.framebuffer;
|
const auto& framebuffer = regs.framebuffer;
|
||||||
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
||||||
|
|
||||||
y = framebuffer.height - y;
|
y = framebuffer.height - y;
|
||||||
|
|
||||||
const u32 coarse_y = y & ~7;
|
const u32 coarse_y = y & ~7;
|
||||||
|
@ -180,7 +180,6 @@ void Framebuffer::SetDepth(int x, int y, u32 value) const {
|
||||||
const u32 stride = framebuffer.width * bytes_per_pixel;
|
const u32 stride = framebuffer.width * bytes_per_pixel;
|
||||||
|
|
||||||
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||||
u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
|
||||||
u8* dst_pixel = depth_buffer + dst_offset;
|
u8* dst_pixel = depth_buffer + dst_offset;
|
||||||
|
|
||||||
switch (framebuffer.depth_format) {
|
switch (framebuffer.depth_format) {
|
||||||
|
@ -201,10 +200,8 @@ void Framebuffer::SetDepth(int x, int y, u32 value) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Framebuffer::SetStencil(int x, int y, u8 value) const {
|
void Framebuffer::SetStencil(u32 x, u32 y, u8 value) const {
|
||||||
const auto& framebuffer = regs.framebuffer;
|
const auto& framebuffer = regs.framebuffer;
|
||||||
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
|
||||||
|
|
||||||
y = framebuffer.height - y;
|
y = framebuffer.height - y;
|
||||||
|
|
||||||
const u32 coarse_y = y & ~7;
|
const u32 coarse_y = y & ~7;
|
||||||
|
@ -212,7 +209,6 @@ void Framebuffer::SetStencil(int x, int y, u8 value) const {
|
||||||
const u32 stride = framebuffer.width * bytes_per_pixel;
|
const u32 stride = framebuffer.width * bytes_per_pixel;
|
||||||
|
|
||||||
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||||
u8* depth_buffer = memory.GetPhysicalPointer(addr);
|
|
||||||
u8* dst_pixel = depth_buffer + dst_offset;
|
u8* dst_pixel = depth_buffer + dst_offset;
|
||||||
|
|
||||||
switch (framebuffer.depth_format) {
|
switch (framebuffer.depth_format) {
|
||||||
|
@ -231,7 +227,7 @@ void Framebuffer::SetStencil(int x, int y, u8 value) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Framebuffer::DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const {
|
void Framebuffer::DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const {
|
||||||
const auto& framebuffer = regs.framebuffer;
|
const auto& framebuffer = regs.framebuffer;
|
||||||
const auto& shadow = regs.shadow;
|
const auto& shadow = regs.shadow;
|
||||||
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
|
||||||
|
|
|
@ -23,30 +23,37 @@ public:
|
||||||
explicit Framebuffer(Memory::MemorySystem& memory, const Pica::FramebufferRegs& framebuffer);
|
explicit Framebuffer(Memory::MemorySystem& memory, const Pica::FramebufferRegs& framebuffer);
|
||||||
~Framebuffer();
|
~Framebuffer();
|
||||||
|
|
||||||
|
/// Updates the framebuffer addresses from the PICA registers.
|
||||||
|
void Bind();
|
||||||
|
|
||||||
/// Draws a pixel at the specified coordinates.
|
/// Draws a pixel at the specified coordinates.
|
||||||
void DrawPixel(int x, int y, const Common::Vec4<u8>& color) const;
|
void DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const;
|
||||||
|
|
||||||
/// Returns the current color at the specified coordinates.
|
/// Returns the current color at the specified coordinates.
|
||||||
[[nodiscard]] const Common::Vec4<u8> GetPixel(int x, int y) const;
|
[[nodiscard]] const Common::Vec4<u8> GetPixel(u32 x, u32 y) const;
|
||||||
|
|
||||||
/// Returns the depth value at the specified coordinates.
|
/// Returns the depth value at the specified coordinates.
|
||||||
[[nodiscard]] u32 GetDepth(int x, int y) const;
|
[[nodiscard]] u32 GetDepth(u32 x, u32 y) const;
|
||||||
|
|
||||||
/// Returns the stencil value at the specified coordinates.
|
/// Returns the stencil value at the specified coordinates.
|
||||||
[[nodiscard]] u8 GetStencil(int x, int y) const;
|
[[nodiscard]] u8 GetStencil(u32 x, u32 y) const;
|
||||||
|
|
||||||
/// Stores the provided depth value at the specified coordinates.
|
/// Stores the provided depth value at the specified coordinates.
|
||||||
void SetDepth(int x, int y, u32 value) const;
|
void SetDepth(u32 x, u32 y, u32 value) const;
|
||||||
|
|
||||||
/// Stores the provided stencil value at the specified coordinates.
|
/// Stores the provided stencil value at the specified coordinates.
|
||||||
void SetStencil(int x, int y, u8 value) const;
|
void SetStencil(u32 x, u32 y, u8 value) const;
|
||||||
|
|
||||||
/// Draws a pixel to the shadow buffer.
|
/// Draws a pixel to the shadow buffer.
|
||||||
void DrawShadowMapPixel(int x, int y, u32 depth, u8 stencil) const;
|
void DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Memory::MemorySystem& memory;
|
Memory::MemorySystem& memory;
|
||||||
const Pica::FramebufferRegs& regs;
|
const Pica::FramebufferRegs& regs;
|
||||||
|
PAddr color_addr;
|
||||||
|
u8* color_buffer{};
|
||||||
|
PAddr depth_addr;
|
||||||
|
u8* depth_buffer{};
|
||||||
};
|
};
|
||||||
|
|
||||||
u8 PerformStencilAction(Pica::FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref);
|
u8 PerformStencilAction(Pica::FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref);
|
||||||
|
|
|
@ -96,7 +96,9 @@ private:
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_)
|
RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_)
|
||||||
: memory{memory_}, state{Pica::g_state}, regs{state.regs}, fb{memory, regs.framebuffer} {}
|
: memory{memory_}, state{Pica::g_state}, regs{state.regs},
|
||||||
|
num_sw_threads{std::max(std::thread::hardware_concurrency(), 2U)},
|
||||||
|
sw_workers{num_sw_threads, "SwRenderer workers"}, fb{memory, regs.framebuffer} {}
|
||||||
|
|
||||||
void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||||
const Pica::Shader::OutputVertex& v1,
|
const Pica::Shader::OutputVertex& v1,
|
||||||
|
@ -289,15 +291,18 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
|
||||||
|
|
||||||
const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
|
const auto w_inverse = Common::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
|
||||||
|
|
||||||
auto textures = regs.texturing.GetTextures();
|
const auto textures = regs.texturing.GetTextures();
|
||||||
const auto tev_stages = regs.texturing.GetTevStages();
|
const auto tev_stages = regs.texturing.GetTevStages();
|
||||||
|
|
||||||
|
fb.Bind();
|
||||||
|
|
||||||
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
||||||
// TODO: Not sure if looping through x first might be faster
|
// TODO: Not sure if looping through x first might be faster
|
||||||
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
|
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
|
||||||
|
const auto process_scanline = [&, y] {
|
||||||
for (u16 x = min_x + 8; x < max_x; x += 0x10) {
|
for (u16 x = min_x + 8; x < max_x; x += 0x10) {
|
||||||
// Do not process the pixel if it's inside the scissor box and the scissor mode is set
|
// Do not process the pixel if it's inside the scissor box and the scissor mode is
|
||||||
// to Exclude.
|
// set to Exclude.
|
||||||
if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) {
|
if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) {
|
||||||
if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) {
|
if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -360,11 +365,13 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
|
||||||
* one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
|
* one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1)
|
||||||
* u = u_over_w / one_over_w
|
* u = u_over_w / one_over_w
|
||||||
*
|
*
|
||||||
* The generalization to three vertices is straightforward in baricentric coordinates.
|
* The generalization to three vertices is straightforward in baricentric
|
||||||
|
*coordinates.
|
||||||
**/
|
**/
|
||||||
const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) {
|
const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) {
|
||||||
auto attr_over_w = Common::MakeVec(attr0, attr1, attr2);
|
auto attr_over_w = Common::MakeVec(attr0, attr1, attr2);
|
||||||
f24 interpolated_attr_over_w = Common::Dot(attr_over_w, baricentric_coordinates);
|
f24 interpolated_attr_over_w =
|
||||||
|
Common::Dot(attr_over_w, baricentric_coordinates);
|
||||||
return interpolated_attr_over_w * interpolated_w_inverse;
|
return interpolated_attr_over_w * interpolated_w_inverse;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -405,9 +412,12 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
|
||||||
if (!regs.lighting.disable) {
|
if (!regs.lighting.disable) {
|
||||||
const auto normquat =
|
const auto normquat =
|
||||||
Common::Quaternion<f32>{
|
Common::Quaternion<f32>{
|
||||||
{get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(),
|
{get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x)
|
||||||
get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(),
|
.ToFloat32(),
|
||||||
get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()},
|
get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y)
|
||||||
|
.ToFloat32(),
|
||||||
|
get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z)
|
||||||
|
.ToFloat32()},
|
||||||
get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(),
|
get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(),
|
||||||
}
|
}
|
||||||
.Normalized();
|
.Normalized();
|
||||||
|
@ -417,20 +427,22 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
|
||||||
get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(),
|
get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(),
|
||||||
get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(),
|
get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(),
|
||||||
};
|
};
|
||||||
std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors(
|
std::tie(primary_fragment_color, secondary_fragment_color) =
|
||||||
regs.lighting, state.lighting, normquat, view, texture_color);
|
ComputeFragmentsColors(regs.lighting, state.lighting, normquat, view,
|
||||||
|
texture_color);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write the TEV stages.
|
// Write the TEV stages.
|
||||||
auto combiner_output = WriteTevConfig(texture_color, tev_stages, primary_color,
|
auto combiner_output =
|
||||||
primary_fragment_color, secondary_fragment_color);
|
WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color,
|
||||||
|
secondary_fragment_color);
|
||||||
|
|
||||||
const auto& output_merger = regs.framebuffer.output_merger;
|
const auto& output_merger = regs.framebuffer.output_merger;
|
||||||
if (output_merger.fragment_operation_mode ==
|
if (output_merger.fragment_operation_mode ==
|
||||||
FramebufferRegs::FragmentOperationMode::Shadow) {
|
FramebufferRegs::FragmentOperationMode::Shadow) {
|
||||||
u32 depth_int = static_cast<u32>(depth * 0xFFFFFF);
|
const u32 depth_int = static_cast<u32>(depth * 0xFFFFFF);
|
||||||
// Use green color as the shadow intensity
|
// Use green color as the shadow intensity
|
||||||
u8 stencil = combiner_output.y;
|
const u8 stencil = combiner_output.y;
|
||||||
fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil);
|
fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil);
|
||||||
// Skip the normal output merger pipeline if it is in shadow mode
|
// Skip the normal output merger pipeline if it is in shadow mode
|
||||||
continue;
|
continue;
|
||||||
|
@ -440,7 +452,7 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
|
||||||
if (!DoAlphaTest(combiner_output.a())) {
|
if (!DoAlphaTest(combiner_output.a())) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
WriteFog(combiner_output, depth);
|
WriteFog(depth, combiner_output);
|
||||||
if (!DoDepthStencilTest(x, y, depth)) {
|
if (!DoDepthStencilTest(x, y, depth)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -449,7 +461,10 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con
|
||||||
fb.DrawPixel(x >> 4, y >> 4, result);
|
fb.DrawPixel(x >> 4, y >> 4, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
sw_workers.QueueWork(std::move(process_scanline));
|
||||||
}
|
}
|
||||||
|
sw_workers.WaitForRequests();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor(
|
std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor(
|
||||||
|
@ -573,7 +588,7 @@ std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor(
|
||||||
}
|
}
|
||||||
|
|
||||||
Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y,
|
Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y,
|
||||||
Common::Vec4<u8>& combiner_output) const {
|
Common::Vec4<u8> combiner_output) const {
|
||||||
const auto dest = fb.GetPixel(x >> 4, y >> 4);
|
const auto dest = fb.GetPixel(x >> 4, y >> 4);
|
||||||
Common::Vec4<u8> blend_output = combiner_output;
|
Common::Vec4<u8> blend_output = combiner_output;
|
||||||
|
|
||||||
|
@ -771,7 +786,7 @@ Common::Vec4<u8> RasterizerSoftware::WriteTevConfig(
|
||||||
return combiner_output;
|
return combiner_output;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerSoftware::WriteFog(Common::Vec4<u8>& combiner_output, float depth) const {
|
void RasterizerSoftware::WriteFog(float depth, Common::Vec4<u8>& combiner_output) const {
|
||||||
/**
|
/**
|
||||||
* Apply fog combiner. Not fully accurate. We'd have to know what data type is used to
|
* Apply fog combiner. Not fully accurate. We'd have to know what data type is used to
|
||||||
* store the depth etc. Using float for now until we know more about Pica datatypes.
|
* store the depth etc. Using float for now until we know more about Pica datatypes.
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <span>
|
#include <span>
|
||||||
|
#include "common/thread_worker.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/regs_texturing.h"
|
#include "video_core/regs_texturing.h"
|
||||||
#include "video_core/renderer_software/sw_clipper.h"
|
#include "video_core/renderer_software/sw_clipper.h"
|
||||||
|
@ -52,7 +52,7 @@ private:
|
||||||
std::span<const Pica::TexturingRegs::FullTextureConfig, 3> textures, f24 tc0_w) const;
|
std::span<const Pica::TexturingRegs::FullTextureConfig, 3> textures, f24 tc0_w) const;
|
||||||
|
|
||||||
/// Returns the final pixel color with blending or logic ops applied.
|
/// Returns the final pixel color with blending or logic ops applied.
|
||||||
Common::Vec4<u8> PixelColor(u16 x, u16 y, Common::Vec4<u8>& combiner_output) const;
|
Common::Vec4<u8> PixelColor(u16 x, u16 y, Common::Vec4<u8> combiner_output) const;
|
||||||
|
|
||||||
/// Emulates the TEV configuration and returns the combiner output.
|
/// Emulates the TEV configuration and returns the combiner output.
|
||||||
Common::Vec4<u8> WriteTevConfig(
|
Common::Vec4<u8> WriteTevConfig(
|
||||||
|
@ -62,7 +62,7 @@ private:
|
||||||
Common::Vec4<u8> secondary_fragment_color);
|
Common::Vec4<u8> secondary_fragment_color);
|
||||||
|
|
||||||
/// Blends fog to the combiner output if enabled.
|
/// Blends fog to the combiner output if enabled.
|
||||||
void WriteFog(Common::Vec4<u8>& combiner_output, float depth) const;
|
void WriteFog(float depth, Common::Vec4<u8>& combiner_output) const;
|
||||||
|
|
||||||
/// Performs the alpha test. Returns false if the test failed.
|
/// Performs the alpha test. Returns false if the test failed.
|
||||||
bool DoAlphaTest(u8 alpha) const;
|
bool DoAlphaTest(u8 alpha) const;
|
||||||
|
@ -74,6 +74,8 @@ private:
|
||||||
Memory::MemorySystem& memory;
|
Memory::MemorySystem& memory;
|
||||||
Pica::State& state;
|
Pica::State& state;
|
||||||
const Pica::Regs& regs;
|
const Pica::Regs& regs;
|
||||||
|
size_t num_sw_threads;
|
||||||
|
Common::ThreadWorker sw_workers;
|
||||||
Framebuffer fb;
|
Framebuffer fb;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Reference in New Issue