citra-emu
/
citra-canary
Archived
1
0
Fork 0

renderer/vulkan: Emulate custom border colors in shaders when unavailable. (#6878)

This commit is contained in:
Steveice10 2023-08-17 13:22:25 -07:00 committed by GitHub
parent f3d92dd3b8
commit 6ddf4b241f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 322 additions and 195 deletions

View File

@ -12,8 +12,16 @@ set(HASH_FILES
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_util.cpp" "${VIDEO_CORE}/renderer_opengl/gl_shader_util.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_util.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_util.h"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.cpp"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.h"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.cpp"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.h"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h"
"${VIDEO_CORE}/shader/shader.cpp" "${VIDEO_CORE}/shader/shader.cpp"
"${VIDEO_CORE}/shader/shader.h" "${VIDEO_CORE}/shader/shader.h"
"${VIDEO_CORE}/shader/shader_uniforms.cpp"
"${VIDEO_CORE}/shader/shader_uniforms.h"
"${VIDEO_CORE}/pica.cpp" "${VIDEO_CORE}/pica.cpp"
"${VIDEO_CORE}/pica.h" "${VIDEO_CORE}/pica.h"
"${VIDEO_CORE}/regs_framebuffer.h" "${VIDEO_CORE}/regs_framebuffer.h"

View File

@ -599,6 +599,17 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) {
SyncTextureLodBias(2); SyncTextureLodBias(2);
break; break;
// Texture borders
case PICA_REG_INDEX(texturing.texture0.border_color):
SyncTextureBorderColor(0);
break;
case PICA_REG_INDEX(texturing.texture1.border_color):
SyncTextureBorderColor(1);
break;
case PICA_REG_INDEX(texturing.texture2.border_color):
SyncTextureBorderColor(2);
break;
// Clipping plane // Clipping plane
case PICA_REG_INDEX(rasterizer.clip_coef[0]): case PICA_REG_INDEX(rasterizer.clip_coef[0]):
case PICA_REG_INDEX(rasterizer.clip_coef[1]): case PICA_REG_INDEX(rasterizer.clip_coef[1]):
@ -821,6 +832,16 @@ void RasterizerAccelerated::SyncTextureLodBias(int tex_index) {
} }
} }
void RasterizerAccelerated::SyncTextureBorderColor(int tex_index) {
const auto pica_textures = regs.texturing.GetTextures();
const auto params = pica_textures[tex_index].config;
const Common::Vec4f border_color = ColorRGBA8(params.border_color.raw);
if (border_color != uniform_block_data.data.tex_border_color[tex_index]) {
uniform_block_data.data.tex_border_color[tex_index] = border_color;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncClipCoef() { void RasterizerAccelerated::SyncClipCoef() {
const auto raw_clip_coef = regs.rasterizer.GetClipCoef(); const auto raw_clip_coef = regs.rasterizer.GetClipCoef();
const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(), const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(),

View File

@ -97,6 +97,9 @@ protected:
/// Syncs the texture LOD bias to match the PICA register /// Syncs the texture LOD bias to match the PICA register
void SyncTextureLodBias(int tex_index); void SyncTextureLodBias(int tex_index);
/// Syncs the texture border color to match the PICA registers
void SyncTextureBorderColor(int tex_index);
/// Syncs the clip coefficients to match the PICA register /// Syncs the clip coefficients to match the PICA register
void SyncClipCoef(); void SyncClipCoef();

View File

@ -409,7 +409,9 @@ bool Instance::CreateDevice() {
const bool has_extended_dynamic_state = const bool has_extended_dynamic_state =
add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, is_arm || is_qualcomm, add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, is_arm || is_qualcomm,
"it is broken on Qualcomm and ARM drivers"); "it is broken on Qualcomm and ARM drivers");
const bool has_custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); const bool has_custom_border_color =
add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, is_qualcomm,
"it is broken on most Qualcomm driver versions");
const bool has_index_type_uint8 = add_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME); const bool has_index_type_uint8 = add_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
const bool has_pipeline_creation_cache_control = const bool has_pipeline_creation_cache_control =
add_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME); add_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME);

View File

@ -69,6 +69,17 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0); state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0);
const auto pica_textures = regs.texturing.GetTextures();
for (u32 tex_index = 0; tex_index < 3; tex_index++) {
const auto config = pica_textures[tex_index].config;
state.texture_border_color[tex_index].enable_s.Assign(
!instance.IsCustomBorderColorSupported() &&
config.wrap_s == TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
state.texture_border_color[tex_index].enable_t.Assign(
!instance.IsCustomBorderColorSupported() &&
config.wrap_t == TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
}
// Emulate logic op in the shader if not supported. This is mostly for mobile GPUs // Emulate logic op in the shader if not supported. This is mostly for mobile GPUs
const bool emulate_logic_op = instance.NeedsLogicOpEmulation() && const bool emulate_logic_op = instance.NeedsLogicOpEmulation() &&
!Pica::g_state.regs.framebuffer.output_merger.alphablend_enable; !Pica::g_state.regs.framebuffer.output_merger.alphablend_enable;
@ -284,54 +295,6 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
} }
static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) {
const auto& state = config.state;
switch (texture_unit) {
case 0:
// Only unit 0 respects the texturing type
switch (state.texture0_type) {
case TexturingRegs::TextureConfig::Texture2D:
return "textureLod(tex0, texcoord0, getLod(texcoord0 * "
"vec2(textureSize(tex0, 0))) + tex_lod_bias[0])";
case TexturingRegs::TextureConfig::Projection2D:
// TODO (wwylele): find the exact LOD formula for projection texture
return "textureProj(tex0, vec3(texcoord0, texcoord0_w))";
case TexturingRegs::TextureConfig::TextureCube:
return "texture(tex_cube, vec3(texcoord0, texcoord0_w))";
case TexturingRegs::TextureConfig::Shadow2D:
return "shadowTexture(texcoord0, texcoord0_w)";
case TexturingRegs::TextureConfig::ShadowCube:
return "shadowTextureCube(texcoord0, texcoord0_w)";
case TexturingRegs::TextureConfig::Disabled:
return "vec4(0.0)";
default:
LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type);
UNIMPLEMENTED();
return "texture(tex0, texcoord0)";
}
case 1:
return "textureLod(tex1, texcoord1, getLod(texcoord1 * "
"vec2(textureSize(tex1, 0))) + tex_lod_bias[1])";
case 2:
if (state.texture2_use_coord1)
return "textureLod(tex2, texcoord1, getLod(texcoord1 * "
"vec2(textureSize(tex2, 0))) + tex_lod_bias[2])";
else
return "textureLod(tex2, texcoord2, getLod(texcoord2 * "
"vec2(textureSize(tex2, 0))) + tex_lod_bias[2])";
case 3:
if (state.proctex.enable) {
return "ProcTex()";
} else {
LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it");
return "vec4(0.0)";
}
default:
UNREACHABLE();
return "";
}
}
/// Writes the specified TEV stage source component(s) /// Writes the specified TEV stage source component(s)
static void AppendSource(std::string& out, const PicaFSConfig& config, static void AppendSource(std::string& out, const PicaFSConfig& config,
TevStageConfig::Source source, std::string_view index_name) { TevStageConfig::Source source, std::string_view index_name) {
@ -347,16 +310,16 @@ static void AppendSource(std::string& out, const PicaFSConfig& config,
out += "secondary_fragment_color"; out += "secondary_fragment_color";
break; break;
case Source::Texture0: case Source::Texture0:
out += SampleTexture(config, 0); out += "sampleTexUnit0()";
break; break;
case Source::Texture1: case Source::Texture1:
out += SampleTexture(config, 1); out += "sampleTexUnit1()";
break; break;
case Source::Texture2: case Source::Texture2:
out += SampleTexture(config, 2); out += "sampleTexUnit2()";
break; break;
case Source::Texture3: case Source::Texture3:
out += SampleTexture(config, 3); out += "sampleTexUnit3()";
break; break;
case Source::PreviousBuffer: case Source::PreviousBuffer:
out += "combiner_buffer"; out += "combiner_buffer";
@ -656,7 +619,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) {
// Compute fragment normals and tangents // Compute fragment normals and tangents
const auto perturbation = [&] { const auto perturbation = [&] {
return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector)); return fmt::format("2.0 * (sampleTexUnit{}()).rgb - 1.0", lighting.bump_selector);
}; };
switch (lighting.bump_mode) { switch (lighting.bump_mode) {
@ -700,7 +663,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) {
"vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n";
if (lighting.enable_shadow) { if (lighting.enable_shadow) {
std::string shadow_texture = SampleTexture(config, lighting.shadow_selector); std::string shadow_texture = fmt::format("sampleTexUnit{}()", lighting.shadow_selector);
if (lighting.shadow_invert) { if (lighting.shadow_invert) {
out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture); out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture);
} else { } else {
@ -1310,6 +1273,7 @@ float mix2(vec4 s, vec2 a) {
vec4 shadowTexture(vec2 uv, float w) { vec4 shadowTexture(vec2 uv, float w) {
)"; )";
if (!config.state.shadow_texture_orthographic) { if (!config.state.shadow_texture_orthographic) {
out += "uv /= w;"; out += "uv /= w;";
} }
@ -1344,9 +1308,7 @@ vec4 shadowTextureCube(vec2 uv, float w) {
uv = -c.xy; uv = -c.xy;
if (c.z > 0.0) uv.x = -uv.x; if (c.z > 0.0) uv.x = -uv.x;
} }
)"; uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));
out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));";
out += R"(
vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5); vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5);
vec2 coord_floor = floor(coord); vec2 coord_floor = floor(coord);
vec2 f = coord - coord_floor; vec2 f = coord - coord_floor;
@ -1411,8 +1373,90 @@ vec4 shadowTextureCube(vec2 uv, float w) {
} }
)"; )";
if (config.state.proctex.enable) if (config.state.proctex.enable) {
AppendProcTexSampler(out, config); AppendProcTexSampler(out, config);
}
for (u32 texture_unit = 0; texture_unit < 4; texture_unit++) {
out += fmt::format("vec4 sampleTexUnit{}() {{", texture_unit);
if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) {
out += "return vec4(0.0);}";
continue;
} else if (texture_unit == 3) {
if (state.proctex.enable) {
out += "return ProcTex();}";
} else {
out += "return vec4(0.0);}";
}
continue;
}
u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit;
if (config.state.texture_border_color[texture_unit].enable_s) {
out += fmt::format(R"(
if (texcoord{}.x < 0 || texcoord{}.x > 1) {{
return tex_border_color[{}];
}}
)",
texcoord_num, texcoord_num, texture_unit);
}
if (config.state.texture_border_color[texture_unit].enable_t) {
out += fmt::format(R"(
if (texcoord{}.y < 0 || texcoord{}.y > 1) {{
return tex_border_color[{}];
}}
)",
texcoord_num, texcoord_num, texture_unit);
}
// TODO: 3D border?
switch (texture_unit) {
case 0:
// Only unit 0 respects the texturing type
switch (state.texture0_type) {
case TexturingRegs::TextureConfig::Texture2D:
out += "return textureLod(tex0, texcoord0, getLod(texcoord0 * "
"vec2(textureSize(tex0, 0))) + tex_lod_bias[0]);";
break;
case TexturingRegs::TextureConfig::Projection2D:
// TODO (wwylele): find the exact LOD formula for projection texture
out += "return textureProj(tex0, vec3(texcoord0, texcoord0_w));";
break;
case TexturingRegs::TextureConfig::TextureCube:
out += "return texture(tex_cube, vec3(texcoord0, texcoord0_w));";
break;
case TexturingRegs::TextureConfig::Shadow2D:
out += "return shadowTexture(texcoord0, texcoord0_w);";
break;
case TexturingRegs::TextureConfig::ShadowCube:
out += "return shadowTextureCube(texcoord0, texcoord0_w);";
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type);
UNIMPLEMENTED();
out += "return texture(tex0, texcoord0);";
break;
}
case 1:
out += "return textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, "
"0))) + tex_lod_bias[1]);";
break;
case 2:
if (state.texture2_use_coord1) {
out += "return textureLod(tex2, texcoord1, getLod(texcoord1 * "
"vec2(textureSize(tex2, 0))) + tex_lod_bias[1]);";
} else {
out += "return textureLod(tex2, texcoord2, getLod(texcoord2 * "
"vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);";
}
break;
default:
UNREACHABLE();
break;
}
out += "}";
}
// We round the interpolated primary color to the nearest 1/255th // We round the interpolated primary color to the nearest 1/255th
// This maintains the PICA's 8 bits of precision // This maintains the PICA's 8 bits of precision

View File

@ -57,6 +57,11 @@ struct PicaFSConfigState {
BitField<28, 1, u32> shadow_texture_orthographic; BitField<28, 1, u32> shadow_texture_orthographic;
}; };
union {
BitField<0, 1, u32> enable_s;
BitField<1, 1, u32> enable_t;
} texture_border_color[3];
std::array<TevStageConfigRaw, 6> tev_stages; std::array<TevStageConfigRaw, 6> tev_stages;
struct { struct {

View File

@ -21,8 +21,8 @@ FragmentModule::FragmentModule(Core::TelemetrySession& telemetry_, const PicaFSC
DefineArithmeticTypes(); DefineArithmeticTypes();
DefineUniformStructs(); DefineUniformStructs();
DefineInterface(); DefineInterface();
if (config.state.proctex.enable) { for (u32 i = 0; i < NUM_TEX_UNITS; i++) {
DefineProcTexSampler(); DefineTexSampler(i);
} }
DefineEntryPoint(); DefineEntryPoint();
} }
@ -225,7 +225,8 @@ void FragmentModule::WriteLighting() {
// Compute fragment normals and tangents // Compute fragment normals and tangents
const auto perturbation = [&]() -> Id { const auto perturbation = [&]() -> Id {
const Id texel{SampleTexture(lighting.bump_selector)}; const Id texel{
OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.bump_selector])};
const Id texel_rgb{OpVectorShuffle(vec_ids.Get(3), texel, texel, 0, 1, 2)}; const Id texel_rgb{OpVectorShuffle(vec_ids.Get(3), texel, texel, 0, 1, 2)};
const Id rgb_mul_two{OpVectorTimesScalar(vec_ids.Get(3), texel_rgb, ConstF32(2.f))}; const Id rgb_mul_two{OpVectorTimesScalar(vec_ids.Get(3), texel_rgb, ConstF32(2.f))};
return OpFSub(vec_ids.Get(3), rgb_mul_two, ConstF32(1.f, 1.f, 1.f)); return OpFSub(vec_ids.Get(3), rgb_mul_two, ConstF32(1.f, 1.f, 1.f));
@ -284,7 +285,7 @@ void FragmentModule::WriteLighting() {
Id shadow{ConstF32(1.f, 1.f, 1.f, 1.f)}; Id shadow{ConstF32(1.f, 1.f, 1.f, 1.f)};
if (lighting.enable_shadow) { if (lighting.enable_shadow) {
shadow = SampleTexture(lighting.shadow_selector); shadow = OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.shadow_selector]);
if (lighting.shadow_invert) { if (lighting.shadow_invert) {
shadow = OpFSub(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f, 1.f), shadow); shadow = OpFSub(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f, 1.f), shadow);
} }
@ -710,89 +711,6 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func)
} }
} }
Id FragmentModule::SampleTexture(u32 texture_unit) {
const PicaFSConfigState& state = config.state;
const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
// PICA's LOD formula for 2D textures.
// This LOD formula is the same as the LOD lower limit defined in OpenGL.
// f(x, y) >= max{m_u, m_v, m_w}
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
const auto sample_lod = [this, texture_unit](Id tex_id, Id texcoord_id) {
const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)};
const Id tex_image{OpImage(image2d_id, sampled_image)};
const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))};
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))};
const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))};
const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))};
const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)};
const Id dx_dy_max{
OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))};
const Id lod{OpLog2(f32_id, dx_dy_max)};
const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))};
const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)};
return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord,
spv::ImageOperandsMask::Lod, biased_lod);
};
const auto sample = [this](Id tex_id, bool projection) {
const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id;
const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)};
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
const Id coord{OpCompositeConstruct(vec_ids.Get(3),
OpCompositeExtract(f32_id, texcoord0, 0),
OpCompositeExtract(f32_id, texcoord0, 1), texcoord0_w)};
if (projection) {
return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord);
} else {
return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord);
}
};
switch (texture_unit) {
case 0:
// Only unit 0 respects the texturing type
switch (state.texture0_type) {
case Pica::TexturingRegs::TextureConfig::Texture2D:
return sample_lod(tex0_id, texcoord0_id);
case Pica::TexturingRegs::TextureConfig::Projection2D:
return sample(tex0_id, true);
case Pica::TexturingRegs::TextureConfig::TextureCube:
return sample(tex_cube_id, false);
case Pica::TexturingRegs::TextureConfig::Shadow2D:
return SampleShadow();
// case Pica::TexturingRegs::TextureConfig::ShadowCube:
// return "shadowTextureCube(texcoord0, texcoord0_w)";
case Pica::TexturingRegs::TextureConfig::Disabled:
return zero_vec;
default:
LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type);
UNIMPLEMENTED();
return zero_vec;
}
case 1:
return sample_lod(tex1_id, texcoord1_id);
case 2:
if (state.texture2_use_coord1) {
return sample_lod(tex2_id, texcoord1_id);
} else {
return sample_lod(tex2_id, texcoord2_id);
}
case 3:
if (state.proctex.enable) {
return OpFunctionCall(vec_ids.Get(4), proctex_func);
} else {
LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it");
return zero_vec;
}
default:
UNREACHABLE();
return void_id;
}
}
Id FragmentModule::CompareShadow(Id pixel, Id z) { Id FragmentModule::CompareShadow(Id pixel, Id z) {
const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))}; const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))};
const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))}; const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))};
@ -802,7 +720,7 @@ Id FragmentModule::CompareShadow(Id pixel, Id z) {
} }
Id FragmentModule::SampleShadow() { Id FragmentModule::SampleShadow() {
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)}; const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord_id[0])};
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)}; const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w), ConstF32(1.f)), const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w), ConstF32(1.f)),
ConstF32(16777215.f))}; ConstF32(16777215.f))};
@ -941,11 +859,145 @@ Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id
return ProcTexLookupLUT(offset, combined); return ProcTexLookupLUT(offset, combined);
} }
void FragmentModule::DefineProcTexSampler() { void FragmentModule::DefineTexSampler(u32 texture_unit) {
const PicaFSConfigState& state = config.state;
const Id func_type{TypeFunction(vec_ids.Get(4))}; const Id func_type{TypeFunction(vec_ids.Get(4))};
proctex_func = OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type); sample_tex_unit_func[texture_unit] =
OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type);
AddLabel(OpLabel()); AddLabel(OpLabel());
const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) {
OpReturnValue(zero_vec);
OpFunctionEnd();
return;
}
if (texture_unit == 3) {
if (state.proctex.enable) {
OpReturnValue(ProcTexSampler());
} else {
OpReturnValue(zero_vec);
}
OpFunctionEnd();
return;
}
const Id border_label{OpLabel()};
const Id not_border_label{OpLabel()};
u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit;
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[texcoord_num])};
auto& texture_border_color = state.texture_border_color[texture_unit];
if (texture_border_color.enable_s || texture_border_color.enable_t) {
const Id texcoord_s{OpCompositeExtract(f32_id, texcoord, 0)};
const Id texcoord_t{OpCompositeExtract(f32_id, texcoord, 1)};
const Id s_lt_zero{OpFOrdLessThan(bool_id, texcoord_s, ConstF32(0.0f))};
const Id s_gt_one{OpFOrdGreaterThan(bool_id, texcoord_s, ConstF32(1.0f))};
const Id t_lt_zero{OpFOrdLessThan(bool_id, texcoord_t, ConstF32(0.0f))};
const Id t_gt_one{OpFOrdGreaterThan(bool_id, texcoord_t, ConstF32(1.0f))};
Id cond{};
if (texture_border_color.enable_s && texture_border_color.enable_t) {
cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(4), s_lt_zero, s_gt_one,
t_lt_zero, t_gt_one));
} else if (texture_border_color.enable_s) {
cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), s_lt_zero, s_gt_one));
} else if (texture_border_color.enable_t) {
cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), t_lt_zero, t_gt_one));
}
OpSelectionMerge(not_border_label, spv::SelectionControlMask::MaskNone);
OpBranchConditional(cond, border_label, not_border_label);
AddLabel(border_label);
const Id border_color{
GetShaderDataMember(vec_ids.Get(4), ConstS32(29), ConstU32(texture_unit))};
OpReturnValue(border_color);
AddLabel(not_border_label);
}
// PICA's LOD formula for 2D textures.
// This LOD formula is the same as the LOD lower limit defined in OpenGL.
// f(x, y) >= max{m_u, m_v, m_w}
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
const auto sample_lod = [&](Id tex_id) {
const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)};
const Id tex_image{OpImage(image2d_id, sampled_image)};
const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))};
const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))};
const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))};
const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))};
const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)};
const Id dx_dy_max{
OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))};
const Id lod{OpLog2(f32_id, dx_dy_max)};
const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))};
const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)};
return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord,
spv::ImageOperandsMask::Lod, biased_lod);
};
const auto sample_3d = [&](Id tex_id, bool projection) {
const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id;
const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)};
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
const Id coord{OpCompositeConstruct(vec_ids.Get(3), OpCompositeExtract(f32_id, texcoord, 0),
OpCompositeExtract(f32_id, texcoord, 1), texcoord0_w)};
if (projection) {
return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord);
} else {
return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord);
}
};
Id ret_val{void_id};
switch (texture_unit) {
case 0:
// Only unit 0 respects the texturing type
switch (state.texture0_type) {
case Pica::TexturingRegs::TextureConfig::Texture2D:
ret_val = sample_lod(tex0_id);
break;
case Pica::TexturingRegs::TextureConfig::Projection2D:
ret_val = sample_3d(tex0_id, true);
break;
case Pica::TexturingRegs::TextureConfig::TextureCube:
ret_val = sample_3d(tex_cube_id, false);
break;
case Pica::TexturingRegs::TextureConfig::Shadow2D:
ret_val = SampleShadow();
// case Pica::TexturingRegs::TextureConfig::ShadowCube:
// return "shadowTextureCube(texcoord0, texcoord0_w)";
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type);
UNIMPLEMENTED();
ret_val = zero_vec;
break;
}
break;
case 1:
ret_val = sample_lod(tex1_id);
break;
case 2:
ret_val = sample_lod(tex2_id);
break;
default:
UNREACHABLE();
break;
}
OpReturnValue(ret_val);
OpFunctionEnd();
}
Id FragmentModule::ProcTexSampler() {
// Define noise tables at the beginning of the function // Define noise tables at the beginning of the function
if (config.state.proctex.noise_enable) { if (config.state.proctex.noise_enable) {
noise1d_table = noise1d_table =
@ -957,24 +1009,11 @@ void FragmentModule::DefineProcTexSampler() {
Id uv{}; Id uv{};
if (config.state.proctex.coord < 3) { if (config.state.proctex.coord < 3) {
Id texcoord_id{}; const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.state.proctex.coord.Value()])};
switch (config.state.proctex.coord.Value()) {
case 0:
texcoord_id = texcoord0_id;
break;
case 1:
texcoord_id = texcoord1_id;
break;
case 2:
texcoord_id = texcoord2_id;
break;
}
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
uv = OpFAbs(vec_ids.Get(2), texcoord); uv = OpFAbs(vec_ids.Get(2), texcoord);
} else { } else {
LOG_CRITICAL(Render_Vulkan, "Unexpected proctex.coord >= 3"); LOG_CRITICAL(Render_Vulkan, "Unexpected proctex.coord >= 3");
uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord0_id)); uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord_id[0]));
} }
// This LOD formula is the same as the LOD upper limit defined in OpenGL. // This LOD formula is the same as the LOD upper limit defined in OpenGL.
@ -1058,8 +1097,7 @@ void FragmentModule::DefineProcTexSampler() {
final_color = OpCompositeInsert(vec_ids.Get(4), final_alpha, final_color, 3); final_color = OpCompositeInsert(vec_ids.Get(4), final_alpha, final_color, 3);
} }
OpReturnValue(final_color); return final_color;
OpFunctionEnd();
} }
Id FragmentModule::Byteround(Id variable_id, u32 size) { Id FragmentModule::Byteround(Id variable_id, u32 size) {
@ -1226,13 +1264,13 @@ Id FragmentModule::AppendSource(TevStageConfig::Source source, s32 index) {
case Source::SecondaryFragmentColor: case Source::SecondaryFragmentColor:
return secondary_fragment_color; return secondary_fragment_color;
case Source::Texture0: case Source::Texture0:
return SampleTexture(0); return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[0]);
case Source::Texture1: case Source::Texture1:
return SampleTexture(1); return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[1]);
case Source::Texture2: case Source::Texture2:
return SampleTexture(2); return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[2]);
case Source::Texture3: case Source::Texture3:
return SampleTexture(3); return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[3]);
case Source::PreviousBuffer: case Source::PreviousBuffer:
return combiner_buffer; return combiner_buffer;
case Source::Constant: case Source::Constant:
@ -1428,9 +1466,9 @@ void FragmentModule::DefineEntryPoint() {
const Id main_type{TypeFunction(TypeVoid())}; const Id main_type{TypeFunction(TypeVoid())};
const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)}; const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)};
AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, texcoord0_id, AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id,
texcoord1_id, texcoord2_id, texcoord0_w_id, normquat_id, view_id, color_id, texcoord_id[0], texcoord_id[1], texcoord_id[2], texcoord0_w_id, normquat_id,
gl_frag_coord_id, gl_frag_depth_id); view_id, color_id, gl_frag_coord_id, gl_frag_depth_id);
AddExecutionMode(main_func, spv::ExecutionMode::OriginUpperLeft); AddExecutionMode(main_func, spv::ExecutionMode::OriginUpperLeft);
AddExecutionMode(main_func, spv::ExecutionMode::DepthReplacing); AddExecutionMode(main_func, spv::ExecutionMode::DepthReplacing);
} }
@ -1443,21 +1481,25 @@ void FragmentModule::DefineUniformStructs() {
const Id light_src_array_id{TypeArray(light_src_struct_id, ConstU32(NUM_LIGHTS))}; const Id light_src_array_id{TypeArray(light_src_struct_id, ConstU32(NUM_LIGHTS))};
const Id lighting_lut_array_id{TypeArray(ivec_ids.Get(4), ConstU32(NUM_LIGHTING_SAMPLERS / 4))}; const Id lighting_lut_array_id{TypeArray(ivec_ids.Get(4), ConstU32(NUM_LIGHTING_SAMPLERS / 4))};
const Id const_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_TEV_STAGES))}; const Id const_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_TEV_STAGES))};
const Id border_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_NON_PROC_TEX_UNITS))};
const Id shader_data_struct_id{TypeStruct( const Id shader_data_struct_id{
i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id, i32_id, TypeStruct(i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id,
i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id, lighting_lut_array_id, i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id,
vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(3), lighting_lut_array_id, vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2),
light_src_array_id, const_color_array_id, vec_ids.Get(4), vec_ids.Get(3), vec_ids.Get(4))}; vec_ids.Get(2), vec_ids.Get(3), light_src_array_id, const_color_array_id,
vec_ids.Get(4), vec_ids.Get(3), border_color_array_id, vec_ids.Get(4))};
constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u}; constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u};
constexpr std::array shader_data_offsets{ constexpr std::array shader_data_offsets{0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u,
0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u, 32u, 36u, 40u, 44u, 48u, 52u, 56u, 32u, 36u, 40u, 44u, 48u, 52u, 56u, 60u,
60u, 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u, 224u, 240u, 1136u, 1232u, 1248u, 1264u}; 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u,
224u, 240u, 1136u, 1232u, 1248u, 1264u, 1312u};
Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u); Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u);
Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u); Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u);
Decorate(const_color_array_id, spv::Decoration::ArrayStride, 16u); Decorate(const_color_array_id, spv::Decoration::ArrayStride, 16u);
Decorate(border_color_array_id, spv::Decoration::ArrayStride, 16u);
for (u32 i = 0; i < static_cast<u32>(light_src_offsets.size()); i++) { for (u32 i = 0; i < static_cast<u32>(light_src_offsets.size()); i++) {
MemberDecorate(light_src_struct_id, i, spv::Decoration::Offset, light_src_offsets[i]); MemberDecorate(light_src_struct_id, i, spv::Decoration::Offset, light_src_offsets[i]);
} }
@ -1475,9 +1517,9 @@ void FragmentModule::DefineUniformStructs() {
void FragmentModule::DefineInterface() { void FragmentModule::DefineInterface() {
// Define interface block // Define interface block
primary_color_id = DefineInput(vec_ids.Get(4), 1); primary_color_id = DefineInput(vec_ids.Get(4), 1);
texcoord0_id = DefineInput(vec_ids.Get(2), 2); texcoord_id[0] = DefineInput(vec_ids.Get(2), 2);
texcoord1_id = DefineInput(vec_ids.Get(2), 3); texcoord_id[1] = DefineInput(vec_ids.Get(2), 3);
texcoord2_id = DefineInput(vec_ids.Get(2), 4); texcoord_id[2] = DefineInput(vec_ids.Get(2), 4);
texcoord0_w_id = DefineInput(f32_id, 5); texcoord0_w_id = DefineInput(f32_id, 5);
normquat_id = DefineInput(vec_ids.Get(4), 6); normquat_id = DefineInput(vec_ids.Get(4), 6);
view_id = DefineInput(vec_ids.Get(3), 7); view_id = DefineInput(vec_ids.Get(3), 7);

View File

@ -30,6 +30,8 @@ class FragmentModule : public Sirit::Module {
static constexpr u32 NUM_TEV_STAGES = 6; static constexpr u32 NUM_TEV_STAGES = 6;
static constexpr u32 NUM_LIGHTS = 8; static constexpr u32 NUM_LIGHTS = 8;
static constexpr u32 NUM_LIGHTING_SAMPLERS = 24; static constexpr u32 NUM_LIGHTING_SAMPLERS = 24;
static constexpr u32 NUM_TEX_UNITS = 4;
static constexpr u32 NUM_NON_PROC_TEX_UNITS = 3;
public: public:
explicit FragmentModule(Core::TelemetrySession& telemetry, const PicaFSConfig& config); explicit FragmentModule(Core::TelemetrySession& telemetry, const PicaFSConfig& config);
@ -57,15 +59,15 @@ private:
/// Writes the code to emulate the specified TEV stage /// Writes the code to emulate the specified TEV stage
void WriteTevStage(s32 index); void WriteTevStage(s32 index);
/// Defines the tex3 proctex sampling function /// Defines the basic texture sampling functions for a unit
void DefineProcTexSampler(); void DefineTexSampler(u32 texture_unit);
/// Function for sampling the procedurally generated texture unit.
Id ProcTexSampler();
/// Writes the if-statement condition used to evaluate alpha testing. /// Writes the if-statement condition used to evaluate alpha testing.
void WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func); void WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
/// Samples the current fragment texel from the provided texture unit
[[nodiscard]] Id SampleTexture(u32 texture_unit);
/// Samples the current fragment texel from shadow plane /// Samples the current fragment texel from shadow plane
[[nodiscard]] Id SampleShadow(); [[nodiscard]] Id SampleShadow();
@ -237,9 +239,7 @@ private:
Id shader_data_id{}; Id shader_data_id{};
Id primary_color_id{}; Id primary_color_id{};
Id texcoord0_id{}; Id texcoord_id[NUM_NON_PROC_TEX_UNITS]{};
Id texcoord1_id{};
Id texcoord2_id{};
Id texcoord0_w_id{}; Id texcoord0_w_id{};
Id normquat_id{}; Id normquat_id{};
Id view_id{}; Id view_id{};
@ -276,7 +276,7 @@ private:
Id alpha_results_2{}; Id alpha_results_2{};
Id alpha_results_3{}; Id alpha_results_3{};
Id proctex_func{}; Id sample_tex_unit_func[NUM_TEX_UNITS]{};
Id noise1d_table{}; Id noise1d_table{};
Id noise2d_table{}; Id noise2d_table{};
Id lut_offsets{}; Id lut_offsets{};

View File

@ -67,6 +67,7 @@ layout ({}std140) uniform shader_data {{
vec4 const_color[NUM_TEV_STAGES]; vec4 const_color[NUM_TEV_STAGES];
vec4 tev_combiner_buffer_color; vec4 tev_combiner_buffer_color;
vec3 tex_lod_bias; vec3 tex_lod_bias;
vec4 tex_border_color[3];
vec4 clip_coef; vec4 clip_coef;
}}; }};
)"; )";

View File

@ -64,10 +64,11 @@ struct UniformData {
alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) Common::Vec4f tev_combiner_buffer_color; alignas(16) Common::Vec4f tev_combiner_buffer_color;
alignas(16) Common::Vec3f tex_lod_bias; alignas(16) Common::Vec3f tex_lod_bias;
alignas(16) Common::Vec4f tex_border_color[3];
alignas(16) Common::Vec4f clip_coef; alignas(16) Common::Vec4f clip_coef;
}; };
static_assert(sizeof(UniformData) == 0x500, static_assert(sizeof(UniformData) == 0x530,
"The size of the UniformData does not match the structure in the shader"); "The size of the UniformData does not match the structure in the shader");
static_assert(sizeof(UniformData) < 16384, static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec"); "UniformData structure must be less than 16kb as per the OpenGL spec");