Pica: Cleanup clipping code and change screenspace z to range from -1..0.
The change in depth range seems to reflect better to what applications are expecting, and makes for cleaner code overall (hence is more likely to reflect hardware behavior).
This commit is contained in:
parent
70a764d992
commit
365236fa4c
|
@ -15,30 +15,18 @@ namespace Clipper {
|
||||||
|
|
||||||
struct ClippingEdge {
|
struct ClippingEdge {
|
||||||
public:
|
public:
|
||||||
enum Type {
|
ClippingEdge(Math::Vec4<float24> coeffs,
|
||||||
POS_X = 0,
|
Math::Vec4<float24> bias = Math::Vec4<float24>(float24::FromFloat32(0),
|
||||||
NEG_X = 1,
|
float24::FromFloat32(0),
|
||||||
POS_Y = 2,
|
float24::FromFloat32(0),
|
||||||
NEG_Y = 3,
|
float24::FromFloat32(0)))
|
||||||
POS_Z = 4,
|
: coeffs(coeffs),
|
||||||
NEG_Z = 5,
|
bias(bias)
|
||||||
};
|
{
|
||||||
|
}
|
||||||
ClippingEdge(Type type, float24 position) : type(type), pos(position) {}
|
|
||||||
|
|
||||||
bool IsInside(const OutputVertex& vertex) const {
|
bool IsInside(const OutputVertex& vertex) const {
|
||||||
switch (type) {
|
return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
|
||||||
case POS_X: return vertex.pos.x <= pos * vertex.pos.w;
|
|
||||||
case NEG_X: return vertex.pos.x >= pos * vertex.pos.w;
|
|
||||||
case POS_Y: return vertex.pos.y <= pos * vertex.pos.w;
|
|
||||||
case NEG_Y: return vertex.pos.y >= pos * vertex.pos.w;
|
|
||||||
|
|
||||||
// TODO: Check z compares ... should be 0..1 instead?
|
|
||||||
case POS_Z: return vertex.pos.z <= pos * vertex.pos.w;
|
|
||||||
|
|
||||||
default:
|
|
||||||
case NEG_Z: return vertex.pos.z >= pos * vertex.pos.w;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsOutSide(const OutputVertex& vertex) const {
|
bool IsOutSide(const OutputVertex& vertex) const {
|
||||||
|
@ -46,31 +34,17 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
|
OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
|
||||||
auto dotpr = [this](const OutputVertex& vtx) {
|
float24 dp = Math::Dot(v0.pos + bias, coeffs);
|
||||||
switch (type) {
|
float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
|
||||||
case POS_X: return vtx.pos.x - vtx.pos.w;
|
|
||||||
case NEG_X: return -vtx.pos.x - vtx.pos.w;
|
|
||||||
case POS_Y: return vtx.pos.y - vtx.pos.w;
|
|
||||||
case NEG_Y: return -vtx.pos.y - vtx.pos.w;
|
|
||||||
|
|
||||||
// TODO: Verify z clipping
|
|
||||||
case POS_Z: return vtx.pos.z - vtx.pos.w;
|
|
||||||
|
|
||||||
default:
|
|
||||||
case NEG_Z: return -vtx.pos.w;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
float24 dp = dotpr(v0);
|
|
||||||
float24 dp_prev = dotpr(v1);
|
|
||||||
float24 factor = dp_prev / (dp_prev - dp);
|
float24 factor = dp_prev / (dp_prev - dp);
|
||||||
|
|
||||||
return OutputVertex::Lerp(factor, v0, v1);
|
return OutputVertex::Lerp(factor, v0, v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Type type;
|
|
||||||
float24 pos;
|
float24 pos;
|
||||||
|
Math::Vec4<float24> coeffs;
|
||||||
|
Math::Vec4<float24> bias;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void InitScreenCoordinates(OutputVertex& vtx)
|
static void InitScreenCoordinates(OutputVertex& vtx)
|
||||||
|
@ -98,10 +72,9 @@ static void InitScreenCoordinates(OutputVertex& vtx)
|
||||||
vtx.tc2 *= inv_w;
|
vtx.tc2 *= inv_w;
|
||||||
vtx.pos.w = inv_w;
|
vtx.pos.w = inv_w;
|
||||||
|
|
||||||
// TODO: Not sure why the viewport width needs to be divided by 2 but the viewport height does not
|
|
||||||
vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
|
vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
|
||||||
vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
|
vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
|
||||||
vtx.screenpos[2] = viewport.offset_z - vtx.pos.z * inv_w * viewport.zscale;
|
vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
|
void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
|
||||||
|
@ -117,14 +90,29 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
|
||||||
auto* output_list = &buffer_a;
|
auto* output_list = &buffer_a;
|
||||||
auto* input_list = &buffer_b;
|
auto* input_list = &buffer_b;
|
||||||
|
|
||||||
|
// NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value.
|
||||||
|
// TODO: Not sure if this is a valid approach. Also should probably instead use the smallest
|
||||||
|
// epsilon possible within float24 accuracy.
|
||||||
|
static const float24 EPSILON = float24::FromFloat32(0.00001);
|
||||||
|
static const float24 f0 = float24::FromFloat32(0.0);
|
||||||
|
static const float24 f1 = float24::FromFloat32(1.0);
|
||||||
|
static const std::array<ClippingEdge, 7> clipping_edges = {{
|
||||||
|
{ Math::MakeVec( f1, f0, f0, -f1) }, // x = +w
|
||||||
|
{ Math::MakeVec(-f1, f0, f0, -f1) }, // x = -w
|
||||||
|
{ Math::MakeVec( f0, f1, f0, -f1) }, // y = +w
|
||||||
|
{ Math::MakeVec( f0, -f1, f0, -f1) }, // y = -w
|
||||||
|
{ Math::MakeVec( f0, f0, f1, f0) }, // z = 0
|
||||||
|
{ Math::MakeVec( f0, f0, -f1, -f1) }, // z = -w
|
||||||
|
{ Math::MakeVec( f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON) }, // w = EPSILON
|
||||||
|
}};
|
||||||
|
|
||||||
|
// TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii)
|
||||||
|
// drop the whole primitive instead of clipping the primitive properly. We should test if
|
||||||
|
// this happens on the 3DS, too.
|
||||||
|
|
||||||
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
|
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
|
||||||
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
|
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
|
||||||
for (auto edge : { ClippingEdge(ClippingEdge::POS_X, float24::FromFloat32(+1.0)),
|
for (auto edge : clipping_edges) {
|
||||||
ClippingEdge(ClippingEdge::NEG_X, float24::FromFloat32(-1.0)),
|
|
||||||
ClippingEdge(ClippingEdge::POS_Y, float24::FromFloat32(+1.0)),
|
|
||||||
ClippingEdge(ClippingEdge::NEG_Y, float24::FromFloat32(-1.0)),
|
|
||||||
ClippingEdge(ClippingEdge::POS_Z, float24::FromFloat32(+1.0)),
|
|
||||||
ClippingEdge(ClippingEdge::NEG_Z, float24::FromFloat32(-1.0)) }) {
|
|
||||||
|
|
||||||
std::swap(input_list, output_list);
|
std::swap(input_list, output_list);
|
||||||
output_list->clear();
|
output_list->clear();
|
||||||
|
|
|
@ -106,16 +106,17 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||||
ScreenToRasterizerCoordinates(v1.screenpos),
|
ScreenToRasterizerCoordinates(v1.screenpos),
|
||||||
ScreenToRasterizerCoordinates(v2.screenpos) };
|
ScreenToRasterizerCoordinates(v2.screenpos) };
|
||||||
|
|
||||||
if (registers.cull_mode == Regs::CullMode::KeepClockWise) {
|
if (registers.cull_mode == Regs::CullMode::KeepCounterClockWise) {
|
||||||
// Reverse vertex order and use the CCW code path.
|
// Reverse vertex order and use the CW code path.
|
||||||
std::swap(vtxpos[1], vtxpos[2]);
|
std::swap(vtxpos[1], vtxpos[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (registers.cull_mode != Regs::CullMode::KeepAll) {
|
if (registers.cull_mode != Regs::CullMode::KeepAll) {
|
||||||
// Cull away triangles which are wound clockwise.
|
// Cull away triangles which are wound counter-clockwise.
|
||||||
// TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll
|
|
||||||
if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
|
if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0)
|
||||||
return;
|
return;
|
||||||
|
} else {
|
||||||
|
// TODO: Consider A check for degenerate triangles ("SignedArea == 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Proper scissor rect test!
|
// TODO: Proper scissor rect test!
|
||||||
|
@ -475,7 +476,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0,
|
||||||
|
|
||||||
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
// TODO: Does depth indeed only get written even if depth testing is enabled?
|
||||||
if (registers.output_merger.depth_test_enable) {
|
if (registers.output_merger.depth_test_enable) {
|
||||||
u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 +
|
u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
|
||||||
v1.screenpos[2].ToFloat32() * w1 +
|
v1.screenpos[2].ToFloat32() * w1 +
|
||||||
v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
||||||
u16 ref_z = GetDepth(x >> 4, y >> 4);
|
u16 ref_z = GetDepth(x >> 4, y >> 4);
|
||||||
|
|
Reference in New Issue