Pica: Implement stencil testing.
This commit is contained in:
parent
302e9a20f3
commit
5e79706db2
|
@ -208,7 +208,32 @@ inline void EncodeD24(u32 value, u8* bytes) {
|
||||||
* @param bytes Pointer where to store the encoded value
|
* @param bytes Pointer where to store the encoded value
|
||||||
*/
|
*/
|
||||||
inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
|
inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
|
||||||
*reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
|
bytes[0] = depth & 0xFF;
|
||||||
|
bytes[1] = (depth >> 8) & 0xFF;
|
||||||
|
bytes[2] = (depth >> 16) & 0xFF;
|
||||||
|
bytes[3] = stencil;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode a 24 bit depth value as D24X8 format (32 bits per pixel with 8 bits unused)
|
||||||
|
* @param depth 24 bit source depth value to encode
|
||||||
|
* @param bytes Pointer where to store the encoded value
|
||||||
|
* @note unused bits will not be modified
|
||||||
|
*/
|
||||||
|
inline void EncodeD24X8(u32 depth, u8* bytes) {
|
||||||
|
bytes[0] = depth & 0xFF;
|
||||||
|
bytes[1] = (depth >> 8) & 0xFF;
|
||||||
|
bytes[2] = (depth >> 16) & 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode an 8 bit stencil value as X24S8 format (32 bits per pixel with 24 bits unused)
|
||||||
|
* @param stencil 8 bit source stencil value to encode
|
||||||
|
* @param bytes Pointer where to store the encoded value
|
||||||
|
* @note unused bits will not be modified
|
||||||
|
*/
|
||||||
|
inline void EncodeX24S8(u8 stencil, u8* bytes) {
|
||||||
|
bytes[3] = stencil;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -420,6 +420,11 @@ struct Regs {
|
||||||
GreaterThanOrEqual = 7,
|
GreaterThanOrEqual = 7,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class StencilAction : u32 {
|
||||||
|
Keep = 0,
|
||||||
|
Xor = 5,
|
||||||
|
};
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
union {
|
union {
|
||||||
// If false, logic blending is used
|
// If false, logic blending is used
|
||||||
|
@ -454,15 +459,35 @@ struct Regs {
|
||||||
BitField< 8, 8, u32> ref;
|
BitField< 8, 8, u32> ref;
|
||||||
} alpha_test;
|
} alpha_test;
|
||||||
|
|
||||||
|
struct {
|
||||||
union {
|
union {
|
||||||
BitField< 0, 1, u32> stencil_test_enable;
|
// If true, enable stencil testing
|
||||||
BitField< 4, 3, CompareFunc> stencil_test_func;
|
BitField< 0, 1, u32> enable;
|
||||||
BitField< 8, 8, u32> stencil_replacement_value;
|
|
||||||
BitField<16, 8, u32> stencil_reference_value;
|
|
||||||
BitField<24, 8, u32> stencil_mask;
|
|
||||||
} stencil_test;
|
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x1);
|
// Comparison operation for stencil testing
|
||||||
|
BitField< 4, 3, CompareFunc> func;
|
||||||
|
|
||||||
|
// Value to calculate the new stencil value from
|
||||||
|
BitField< 8, 8, u32> replacement_value;
|
||||||
|
|
||||||
|
// Value to compare against for stencil testing
|
||||||
|
BitField<16, 8, u32> reference_value;
|
||||||
|
|
||||||
|
// Mask to apply on stencil test inputs
|
||||||
|
BitField<24, 8, u32> mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
// Action to perform when the stencil test fails
|
||||||
|
BitField< 0, 3, StencilAction> action_stencil_fail;
|
||||||
|
|
||||||
|
// Action to perform when stencil testing passed but depth testing fails
|
||||||
|
BitField< 4, 3, StencilAction> action_depth_fail;
|
||||||
|
|
||||||
|
// Action to perform when both stencil and depth testing pass
|
||||||
|
BitField< 8, 3, StencilAction> action_depth_pass;
|
||||||
|
};
|
||||||
|
} stencil_test;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField< 0, 1, u32> depth_test_enable;
|
BitField< 0, 1, u32> depth_test_enable;
|
||||||
|
@ -512,7 +537,7 @@ struct Regs {
|
||||||
struct {
|
struct {
|
||||||
INSERT_PADDING_WORDS(0x6);
|
INSERT_PADDING_WORDS(0x6);
|
||||||
|
|
||||||
DepthFormat depth_format;
|
DepthFormat depth_format; // TODO: Should be a BitField!
|
||||||
BitField<16, 3, ColorFormat> color_format;
|
BitField<16, 3, ColorFormat> color_format;
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x4);
|
INSERT_PADDING_WORDS(0x4);
|
||||||
|
|
|
@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u8 GetStencil(int x, int y) {
|
||||||
|
const auto& framebuffer = g_state.regs.framebuffer;
|
||||||
|
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
||||||
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
||||||
|
|
||||||
|
y = framebuffer.height - y;
|
||||||
|
|
||||||
|
const u32 coarse_y = y & ~7;
|
||||||
|
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
|
||||||
|
u32 stride = framebuffer.width * bytes_per_pixel;
|
||||||
|
|
||||||
|
u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||||
|
u8* src_pixel = depth_buffer + src_offset;
|
||||||
|
|
||||||
|
switch (framebuffer.depth_format) {
|
||||||
|
case Regs::DepthFormat::D24S8:
|
||||||
|
return Color::DecodeD24S8(src_pixel).y;
|
||||||
|
|
||||||
|
default:
|
||||||
|
LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void SetDepth(int x, int y, u32 value) {
|
static void SetDepth(int x, int y, u32 value) {
|
||||||
const auto& framebuffer = g_state.regs.framebuffer;
|
const auto& framebuffer = g_state.regs.framebuffer;
|
||||||
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
||||||
|
@ -144,13 +168,15 @@ static void SetDepth(int x, int y, u32 value) {
|
||||||
case Regs::DepthFormat::D16:
|
case Regs::DepthFormat::D16:
|
||||||
Color::EncodeD16(value, dst_pixel);
|
Color::EncodeD16(value, dst_pixel);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Regs::DepthFormat::D24:
|
case Regs::DepthFormat::D24:
|
||||||
Color::EncodeD24(value, dst_pixel);
|
Color::EncodeD24(value, dst_pixel);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Regs::DepthFormat::D24S8:
|
case Regs::DepthFormat::D24S8:
|
||||||
// TODO(Subv): Implement the stencil buffer
|
Color::EncodeD24X8(value, dst_pixel);
|
||||||
Color::EncodeD24S8(value, 0, dst_pixel);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
|
@ -158,6 +184,53 @@ static void SetDepth(int x, int y, u32 value) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void SetStencil(int x, int y, u8 value) {
|
||||||
|
const auto& framebuffer = g_state.regs.framebuffer;
|
||||||
|
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
|
||||||
|
u8* depth_buffer = Memory::GetPhysicalPointer(addr);
|
||||||
|
|
||||||
|
y = framebuffer.height - y;
|
||||||
|
|
||||||
|
const u32 coarse_y = y & ~7;
|
||||||
|
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
|
||||||
|
u32 stride = framebuffer.width * bytes_per_pixel;
|
||||||
|
|
||||||
|
u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
|
||||||
|
u8* dst_pixel = depth_buffer + dst_offset;
|
||||||
|
|
||||||
|
switch (framebuffer.depth_format) {
|
||||||
|
case Pica::Regs::DepthFormat::D16:
|
||||||
|
case Pica::Regs::DepthFormat::D24:
|
||||||
|
// Nothing to do
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Pica::Regs::DepthFormat::D24S8:
|
||||||
|
Color::EncodeX24S8(value, dst_pixel);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not!
|
||||||
|
static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) {
|
||||||
|
switch (action) {
|
||||||
|
case Regs::StencilAction::Keep:
|
||||||
|
return dest;
|
||||||
|
|
||||||
|
case Regs::StencilAction::Xor:
|
||||||
|
return dest ^ ref;
|
||||||
|
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
|
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
|
||||||
struct Fix12P4 {
|
struct Fix12P4 {
|
||||||
Fix12P4() {}
|
Fix12P4() {}
|
||||||
|
@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
auto textures = regs.GetTextures();
|
auto textures = regs.GetTextures();
|
||||||
auto tev_stages = regs.GetTevStages();
|
auto tev_stages = regs.GetTevStages();
|
||||||
|
|
||||||
|
bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8;
|
||||||
|
const auto stencil_test = g_state.regs.output_merger.stencil_test;
|
||||||
|
|
||||||
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
|
||||||
// TODO: Not sure if looping through x first might be faster
|
// TODO: Not sure if looping through x first might be faster
|
||||||
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
|
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
|
||||||
|
@ -647,6 +723,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& output_merger = regs.output_merger;
|
const auto& output_merger = regs.output_merger;
|
||||||
|
// TODO: Does alpha testing happen before or after stencil?
|
||||||
if (output_merger.alpha_test.enable) {
|
if (output_merger.alpha_test.enable) {
|
||||||
bool pass = false;
|
bool pass = false;
|
||||||
|
|
||||||
|
@ -688,6 +765,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u8 old_stencil = 0;
|
||||||
|
if (stencil_action_enable) {
|
||||||
|
old_stencil = GetStencil(x >> 4, y >> 4);
|
||||||
|
u8 dest = old_stencil & stencil_test.mask;
|
||||||
|
u8 ref = stencil_test.reference_value & stencil_test.mask;
|
||||||
|
|
||||||
|
bool pass = false;
|
||||||
|
switch (stencil_test.func) {
|
||||||
|
case Regs::CompareFunc::Never:
|
||||||
|
pass = false;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Regs::CompareFunc::Always:
|
||||||
|
pass = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Regs::CompareFunc::Equal:
|
||||||
|
pass = (ref == dest);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Regs::CompareFunc::NotEqual:
|
||||||
|
pass = (ref != dest);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Regs::CompareFunc::LessThan:
|
||||||
|
pass = (ref < dest);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Regs::CompareFunc::LessThanOrEqual:
|
||||||
|
pass = (ref <= dest);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Regs::CompareFunc::GreaterThan:
|
||||||
|
pass = (ref > dest);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Regs::CompareFunc::GreaterThanOrEqual:
|
||||||
|
pass = (ref >= dest);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!pass) {
|
||||||
|
u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value);
|
||||||
|
SetStencil(x >> 4, y >> 4, new_stencil);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
||||||
if (output_merger.depth_test_enable) {
|
if (output_merger.depth_test_enable) {
|
||||||
unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
|
unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
|
||||||
|
@ -732,11 +857,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!pass)
|
if (!pass) {
|
||||||
|
if (stencil_action_enable) {
|
||||||
|
u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value);
|
||||||
|
SetStencil(x >> 4, y >> 4, new_stencil);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (output_merger.depth_write_enable)
|
if (output_merger.depth_write_enable)
|
||||||
SetDepth(x >> 4, y >> 4, z);
|
SetDepth(x >> 4, y >> 4, z);
|
||||||
|
|
||||||
|
if (stencil_action_enable) {
|
||||||
|
// TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway?
|
||||||
|
u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value);
|
||||||
|
SetStencil(x >> 4, y >> 4, new_stencil);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto dest = GetPixel(x >> 4, y >> 4);
|
auto dest = GetPixel(x >> 4, y >> 4);
|
||||||
|
|
Reference in New Issue