Merge pull request #1290 from FernandoS27/shader-header
Implemented (Partialy) Shader Header
This commit is contained in:
commit
fafc80d72e
|
@ -14,6 +14,7 @@ add_library(video_core STATIC
|
||||||
engines/maxwell_dma.cpp
|
engines/maxwell_dma.cpp
|
||||||
engines/maxwell_dma.h
|
engines/maxwell_dma.h
|
||||||
engines/shader_bytecode.h
|
engines/shader_bytecode.h
|
||||||
|
engines/shader_header.h
|
||||||
gpu.cpp
|
gpu.cpp
|
||||||
gpu.h
|
gpu.h
|
||||||
macro_interpreter.cpp
|
macro_interpreter.cpp
|
||||||
|
|
|
@ -0,0 +1,103 @@
|
||||||
|
// Copyright 2018 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/bit_field.h"
|
||||||
|
#include "common/common_funcs.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Tegra::Shader {
|
||||||
|
|
||||||
|
enum class OutputTopology : u32 {
|
||||||
|
PointList = 1,
|
||||||
|
LineStrip = 6,
|
||||||
|
TriangleStrip = 7,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Documentation in:
|
||||||
|
// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
|
||||||
|
struct Header {
|
||||||
|
union {
|
||||||
|
BitField<0, 5, u32> sph_type;
|
||||||
|
BitField<5, 5, u32> version;
|
||||||
|
BitField<10, 4, u32> shader_type;
|
||||||
|
BitField<14, 1, u32> mrt_enable;
|
||||||
|
BitField<15, 1, u32> kills_pixels;
|
||||||
|
BitField<16, 1, u32> does_global_store;
|
||||||
|
BitField<17, 4, u32> sass_version;
|
||||||
|
BitField<21, 5, u32> reserved;
|
||||||
|
BitField<26, 1, u32> does_load_or_store;
|
||||||
|
BitField<27, 1, u32> does_fp64;
|
||||||
|
BitField<28, 4, u32> stream_out_mask;
|
||||||
|
} common0;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<0, 24, u32> shader_local_memory_low_size;
|
||||||
|
BitField<24, 8, u32> per_patch_attribute_count;
|
||||||
|
} common1;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<0, 24, u32> shader_local_memory_high_size;
|
||||||
|
BitField<24, 8, u32> threads_per_input_primitive;
|
||||||
|
} common2;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<0, 24, u32> shader_local_memory_crs_size;
|
||||||
|
BitField<24, 4, OutputTopology> output_topology;
|
||||||
|
BitField<28, 4, u32> reserved;
|
||||||
|
} common3;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<0, 12, u32> max_output_vertices;
|
||||||
|
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
|
||||||
|
BitField<24, 4, u32> reserved;
|
||||||
|
BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
|
||||||
|
} common4;
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
|
||||||
|
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
|
||||||
|
INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
|
||||||
|
INSERT_PADDING_BYTES(2); // ImapColor
|
||||||
|
INSERT_PADDING_BYTES(2); // ImapSystemValuesC
|
||||||
|
INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
|
||||||
|
INSERT_PADDING_BYTES(1); // ImapReserved
|
||||||
|
INSERT_PADDING_BYTES(3); // OmapSystemValuesA
|
||||||
|
INSERT_PADDING_BYTES(1); // OmapSystemValuesB
|
||||||
|
INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
|
||||||
|
INSERT_PADDING_BYTES(2); // OmapColor
|
||||||
|
INSERT_PADDING_BYTES(2); // OmapSystemValuesC
|
||||||
|
INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
|
||||||
|
INSERT_PADDING_BYTES(1); // OmapReserved
|
||||||
|
} vtg;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
INSERT_PADDING_BYTES(3); // ImapSystemValuesA
|
||||||
|
INSERT_PADDING_BYTES(1); // ImapSystemValuesB
|
||||||
|
INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
|
||||||
|
INSERT_PADDING_BYTES(2); // ImapColor
|
||||||
|
INSERT_PADDING_BYTES(2); // ImapSystemValuesC
|
||||||
|
INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
|
||||||
|
INSERT_PADDING_BYTES(2); // ImapReserved
|
||||||
|
struct {
|
||||||
|
u32 target;
|
||||||
|
union {
|
||||||
|
BitField<0, 1, u32> sample_mask;
|
||||||
|
BitField<1, 1, u32> depth;
|
||||||
|
BitField<2, 30, u32> reserved;
|
||||||
|
};
|
||||||
|
} omap;
|
||||||
|
bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
|
||||||
|
const u32 bit = render_target * 4 + component;
|
||||||
|
return omap.target & (1 << bit);
|
||||||
|
}
|
||||||
|
} ps;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
|
||||||
|
|
||||||
|
} // namespace Tegra::Shader
|
|
@ -12,6 +12,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/shader_bytecode.h"
|
#include "video_core/engines/shader_bytecode.h"
|
||||||
|
#include "video_core/engines/shader_header.h"
|
||||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||||
|
|
||||||
|
@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
|
||||||
using Tegra::Shader::SubOp;
|
using Tegra::Shader::SubOp;
|
||||||
|
|
||||||
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
|
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
|
||||||
constexpr u32 PROGRAM_HEADER_SIZE = 0x50;
|
constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
|
||||||
|
|
||||||
class DecompileFail : public std::runtime_error {
|
class DecompileFail : public std::runtime_error {
|
||||||
public:
|
public:
|
||||||
|
@ -674,7 +675,7 @@ public:
|
||||||
u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
|
u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
|
||||||
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
|
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
|
||||||
stage(stage), suffix(suffix) {
|
stage(stage), suffix(suffix) {
|
||||||
|
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
|
||||||
Generate(suffix);
|
Generate(suffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -688,23 +689,6 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Shader program header for a Fragment Shader.
|
|
||||||
struct FragmentHeader {
|
|
||||||
INSERT_PADDING_WORDS(5);
|
|
||||||
INSERT_PADDING_WORDS(13);
|
|
||||||
u32 enabled_color_outputs;
|
|
||||||
union {
|
|
||||||
BitField<0, 1, u32> writes_samplemask;
|
|
||||||
BitField<1, 1, u32> writes_depth;
|
|
||||||
};
|
|
||||||
|
|
||||||
bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
|
|
||||||
const u32 bit = render_target * 4 + component;
|
|
||||||
return enabled_color_outputs & (1 << bit);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
|
|
||||||
|
|
||||||
/// Gets the Subroutine object corresponding to the specified address.
|
/// Gets the Subroutine object corresponding to the specified address.
|
||||||
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
|
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
|
||||||
const auto iter = subroutines.find(Subroutine{begin, end, suffix});
|
const auto iter = subroutines.find(Subroutine{begin, end, suffix});
|
||||||
|
@ -1010,10 +994,8 @@ private:
|
||||||
/// Writes the output values from a fragment shader to the corresponding GLSL output variables.
|
/// Writes the output values from a fragment shader to the corresponding GLSL output variables.
|
||||||
void EmitFragmentOutputsWrite() {
|
void EmitFragmentOutputsWrite() {
|
||||||
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
|
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
|
||||||
FragmentHeader header;
|
|
||||||
std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
|
|
||||||
|
|
||||||
ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented");
|
ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
|
||||||
|
|
||||||
// Write the color outputs using the data in the shader registers, disabled
|
// Write the color outputs using the data in the shader registers, disabled
|
||||||
// rendertargets/components are skipped in the register assignment.
|
// rendertargets/components are skipped in the register assignment.
|
||||||
|
@ -1022,7 +1004,7 @@ private:
|
||||||
++render_target) {
|
++render_target) {
|
||||||
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
|
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
|
||||||
for (u32 component = 0; component < 4; ++component) {
|
for (u32 component = 0; component < 4; ++component) {
|
||||||
if (header.IsColorComponentOutputEnabled(render_target, component)) {
|
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
|
||||||
shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
|
shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
|
||||||
regs.GetRegisterAsFloat(current_reg)));
|
regs.GetRegisterAsFloat(current_reg)));
|
||||||
++current_reg;
|
++current_reg;
|
||||||
|
@ -1030,7 +1012,7 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (header.writes_depth) {
|
if (header.ps.omap.depth) {
|
||||||
// The depth output is always 2 registers after the last color output, and current_reg
|
// The depth output is always 2 registers after the last color output, and current_reg
|
||||||
// already contains one past the last color register.
|
// already contains one past the last color register.
|
||||||
|
|
||||||
|
@ -2666,6 +2648,7 @@ private:
|
||||||
private:
|
private:
|
||||||
const std::set<Subroutine>& subroutines;
|
const std::set<Subroutine>& subroutines;
|
||||||
const ProgramCode& program_code;
|
const ProgramCode& program_code;
|
||||||
|
Tegra::Shader::Header header;
|
||||||
const u32 main_offset;
|
const u32 main_offset;
|
||||||
Maxwell3D::Regs::ShaderStage stage;
|
Maxwell3D::Regs::ShaderStage stage;
|
||||||
const std::string& suffix;
|
const std::string& suffix;
|
||||||
|
|
Reference in New Issue