Merge pull request #4049 from ReinUsesLisp/separate-samplers
shader/texture: Join separate image and sampler pairs offline
This commit is contained in:
commit
c2ea1e1bcb
|
@ -93,6 +93,7 @@ public:
|
|||
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const = 0;
|
||||
virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
|
||||
virtual u32 GetBoundBuffer() const = 0;
|
||||
|
||||
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
||||
|
|
|
@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
|
|||
ASSERT(stage == ShaderType::Compute);
|
||||
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
|
||||
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
|
||||
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||
}
|
||||
|
||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
||||
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||
|
|
|
@ -219,6 +219,8 @@ public:
|
|||
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessSampler(u32 handle) const override;
|
||||
|
||||
u32 GetBoundBuffer() const override {
|
||||
return regs.tex_cb_index;
|
||||
}
|
||||
|
|
|
@ -740,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
|
|||
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
|
||||
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
|
||||
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
|
||||
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
|
||||
}
|
||||
|
||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
||||
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||
|
|
|
@ -1404,6 +1404,8 @@ public:
|
|||
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessSampler(u32 handle) const override;
|
||||
|
||||
u32 GetBoundBuffer() const override {
|
||||
return regs.tex_cb_index;
|
||||
}
|
||||
|
|
|
@ -66,10 +66,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16;
|
|||
template <typename Engine, typename Entry>
|
||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||
ShaderType shader_type, std::size_t index = 0) {
|
||||
if (entry.is_bindless) {
|
||||
const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(tex_handle);
|
||||
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||
if (entry.is_separated) {
|
||||
const u32 buffer_1 = entry.buffer;
|
||||
const u32 buffer_2 = entry.secondary_buffer;
|
||||
const u32 offset_1 = entry.offset;
|
||||
const u32 offset_2 = entry.secondary_offset;
|
||||
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
|
||||
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
|
||||
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||
}
|
||||
}
|
||||
if (entry.is_bindless) {
|
||||
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(handle);
|
||||
}
|
||||
|
||||
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
||||
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
||||
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
||||
|
|
|
@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
|
|||
|
||||
namespace {
|
||||
|
||||
using VideoCommon::Shader::SeparateSamplerKey;
|
||||
|
||||
using ShaderCacheVersionHash = std::array<u8, 64>;
|
||||
|
||||
struct ConstBufferKey {
|
||||
|
@ -37,18 +39,26 @@ struct ConstBufferKey {
|
|||
u32 value = 0;
|
||||
};
|
||||
|
||||
struct BoundSamplerKey {
|
||||
struct BoundSamplerEntry {
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct BindlessSamplerKey {
|
||||
struct SeparateSamplerEntry {
|
||||
u32 cbuf1 = 0;
|
||||
u32 cbuf2 = 0;
|
||||
u32 offset1 = 0;
|
||||
u32 offset2 = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
struct BindlessSamplerEntry {
|
||||
u32 cbuf = 0;
|
||||
u32 offset = 0;
|
||||
Tegra::Engines::SamplerDescriptor sampler;
|
||||
};
|
||||
|
||||
constexpr u32 NativeVersion = 20;
|
||||
constexpr u32 NativeVersion = 21;
|
||||
|
||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||
ShaderCacheVersionHash hash{};
|
||||
|
@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
|||
u32 texture_handler_size_value;
|
||||
u32 num_keys;
|
||||
u32 num_bound_samplers;
|
||||
u32 num_separate_samplers;
|
||||
u32 num_bindless_samplers;
|
||||
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
|
||||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
|
||||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
|
||||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
|
||||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
|
||||
file.ReadArray(&num_separate_samplers, 1) != 1 ||
|
||||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
|
||||
return false;
|
||||
}
|
||||
|
@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
|
|||
}
|
||||
|
||||
std::vector<ConstBufferKey> flat_keys(num_keys);
|
||||
std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
|
||||
std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
|
||||
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
|
||||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
|
||||
flat_bound_samplers.size() ||
|
||||
file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
|
||||
flat_separate_samplers.size() ||
|
||||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
|
||||
flat_bindless_samplers.size()) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& key : flat_keys) {
|
||||
keys.insert({{key.cbuf, key.offset}, key.value});
|
||||
for (const auto& entry : flat_keys) {
|
||||
keys.insert({{entry.cbuf, entry.offset}, entry.value});
|
||||
}
|
||||
for (const auto& key : flat_bound_samplers) {
|
||||
bound_samplers.emplace(key.offset, key.sampler);
|
||||
for (const auto& entry : flat_bound_samplers) {
|
||||
bound_samplers.emplace(entry.offset, entry.sampler);
|
||||
}
|
||||
for (const auto& key : flat_bindless_samplers) {
|
||||
bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
|
||||
for (const auto& entry : flat_separate_samplers) {
|
||||
SeparateSamplerKey key;
|
||||
key.buffers = {entry.cbuf1, entry.cbuf2};
|
||||
key.offsets = {entry.offset1, entry.offset2};
|
||||
separate_samplers.emplace(key, entry.sampler);
|
||||
}
|
||||
for (const auto& entry : flat_bindless_samplers) {
|
||||
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
|||
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
|
||||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
|
||||
return false;
|
||||
}
|
||||
|
@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
|
|||
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
|
||||
}
|
||||
|
||||
std::vector<BoundSamplerKey> flat_bound_samplers;
|
||||
std::vector<BoundSamplerEntry> flat_bound_samplers;
|
||||
flat_bound_samplers.reserve(bound_samplers.size());
|
||||
for (const auto& [address, sampler] : bound_samplers) {
|
||||
flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
|
||||
flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
|
||||
}
|
||||
|
||||
std::vector<BindlessSamplerKey> flat_bindless_samplers;
|
||||
std::vector<SeparateSamplerEntry> flat_separate_samplers;
|
||||
flat_separate_samplers.reserve(separate_samplers.size());
|
||||
for (const auto& [key, sampler] : separate_samplers) {
|
||||
SeparateSamplerEntry entry;
|
||||
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
|
||||
std::tie(entry.offset1, entry.offset2) = key.offsets;
|
||||
entry.sampler = sampler;
|
||||
flat_separate_samplers.push_back(entry);
|
||||
}
|
||||
|
||||
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
|
||||
flat_bindless_samplers.reserve(bindless_samplers.size());
|
||||
for (const auto& [address, sampler] : bindless_samplers) {
|
||||
flat_bindless_samplers.push_back(
|
||||
BindlessSamplerKey{address.first, address.second, sampler});
|
||||
BindlessSamplerEntry{address.first, address.second, sampler});
|
||||
}
|
||||
|
||||
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
|
||||
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
|
||||
flat_bound_samplers.size() &&
|
||||
file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
|
||||
flat_separate_samplers.size() &&
|
||||
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
|
||||
flat_bindless_samplers.size();
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
|
|||
VideoCommon::Shader::ComputeInfo compute_info;
|
||||
VideoCommon::Shader::KeyMap keys;
|
||||
VideoCommon::Shader::BoundSamplerMap bound_samplers;
|
||||
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
|
||||
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
|
||||
};
|
||||
|
||||
|
|
|
@ -118,6 +118,17 @@ template <typename Engine, typename Entry>
|
|||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||
std::size_t stage, std::size_t index = 0) {
|
||||
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
|
||||
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||
if (entry.is_separated) {
|
||||
const u32 buffer_1 = entry.buffer;
|
||||
const u32 buffer_2 = entry.secondary_buffer;
|
||||
const u32 offset_1 = entry.offset;
|
||||
const u32 offset_2 = entry.secondary_offset;
|
||||
const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
|
||||
const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
|
||||
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||
}
|
||||
}
|
||||
if (entry.is_bindless) {
|
||||
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(tex_handle);
|
||||
|
|
|
@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
|||
return pc;
|
||||
}
|
||||
|
||||
ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
|
||||
std::optional<u32> buffer) {
|
||||
ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
|
||||
SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
|
||||
if (info.IsComplete()) {
|
||||
return info;
|
||||
}
|
||||
const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
|
||||
: registry.ObtainBoundSampler(offset);
|
||||
if (!sampler) {
|
||||
LOG_WARNING(HW_GPU, "Unknown sampler info");
|
||||
info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
|
||||
|
@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
|
|||
|
||||
std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
|
||||
SamplerInfo sampler_info) {
|
||||
const auto offset = static_cast<u32>(sampler.index.Value());
|
||||
const auto info = GetSamplerInfo(sampler_info, offset);
|
||||
const u32 offset = static_cast<u32>(sampler.index.Value());
|
||||
const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
|
@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
|||
const Node sampler_register = GetRegister(reg);
|
||||
const auto [base_node, tracked_sampler_info] =
|
||||
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
|
||||
ASSERT(base_node != nullptr);
|
||||
if (base_node == nullptr) {
|
||||
if (!base_node) {
|
||||
UNREACHABLE();
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (const auto bindless_sampler_info =
|
||||
std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 buffer = bindless_sampler_info->GetIndex();
|
||||
const u32 offset = bindless_sampler_info->GetOffset();
|
||||
info = GetSamplerInfo(info, offset, buffer);
|
||||
if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 buffer = sampler_info->index;
|
||||
const u32 offset = sampler_info->offset;
|
||||
info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
|
||||
[buffer = buffer, offset = offset](const Sampler& entry) {
|
||||
[buffer, offset](const Sampler& entry) {
|
||||
return entry.buffer == buffer && entry.offset == offset;
|
||||
});
|
||||
if (it != used_samplers.end()) {
|
||||
|
@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
|
|||
return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
|
||||
*info.is_shadow, *info.is_buffer, false);
|
||||
}
|
||||
if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
|
||||
index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
|
||||
info = GetSamplerInfo(info, base_offset);
|
||||
if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
|
||||
const std::pair indices = sampler_info->indices;
|
||||
const std::pair offsets = sampler_info->offsets;
|
||||
info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
|
||||
|
||||
// Try to use an already created sampler if it exists
|
||||
const auto it = std::find_if(
|
||||
used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
|
||||
return offsets == std::pair{entry.offset, entry.secondary_offset} &&
|
||||
indices == std::pair{entry.buffer, entry.secondary_buffer};
|
||||
});
|
||||
if (it != used_samplers.end()) {
|
||||
ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
|
||||
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
|
||||
return *it;
|
||||
}
|
||||
|
||||
// Otherwise create a new mapping for this sampler
|
||||
const u32 next_index = static_cast<u32>(used_samplers.size());
|
||||
return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
|
||||
*info.is_shadow, *info.is_buffer);
|
||||
}
|
||||
if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
|
||||
const u32 base_offset = sampler_info->base_offset / 4;
|
||||
index_var = GetCustomVariable(sampler_info->bindless_var);
|
||||
info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
|
||||
|
||||
// If this sampler has already been used, return the existing mapping.
|
||||
const auto it = std::find_if(
|
||||
|
|
|
@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
|
|||
using Node4 = std::array<Node, 4>;
|
||||
using NodeBlock = std::vector<Node>;
|
||||
|
||||
class BindlessSamplerNode;
|
||||
class ArraySamplerNode;
|
||||
struct ArraySamplerNode;
|
||||
struct BindlessSamplerNode;
|
||||
struct SeparateSamplerNode;
|
||||
|
||||
using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
|
||||
using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
|
||||
using TrackSampler = std::shared_ptr<TrackSamplerData>;
|
||||
|
||||
struct Sampler {
|
||||
|
@ -288,6 +289,14 @@ struct Sampler {
|
|||
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
|
||||
is_buffer{is_buffer}, is_indexed{is_indexed} {}
|
||||
|
||||
/// Separate sampler constructor
|
||||
constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
|
||||
Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
|
||||
bool is_buffer)
|
||||
: index{index}, offset{offsets.first}, secondary_offset{offsets.second},
|
||||
buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
|
||||
is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
|
||||
|
||||
/// Bindless samplers constructor
|
||||
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
|
||||
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
|
||||
|
@ -296,7 +305,9 @@ struct Sampler {
|
|||
|
||||
u32 index = 0; ///< Emulated index given for the this sampler.
|
||||
u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
|
||||
u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
|
||||
u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
|
||||
u32 buffer = 0; ///< Buffer where the bindless sampler is read.
|
||||
u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
|
||||
u32 size = 1; ///< Size of the sampler.
|
||||
|
||||
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
|
||||
|
@ -305,46 +316,24 @@ struct Sampler {
|
|||
bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
|
||||
bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
|
||||
bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
|
||||
bool is_separated = false; ///< Whether the image and sampler is separated or not.
|
||||
};
|
||||
|
||||
/// Represents a tracked bindless sampler into a direct const buffer
|
||||
class ArraySamplerNode final {
|
||||
public:
|
||||
explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
|
||||
: index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
|
||||
|
||||
constexpr u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
constexpr u32 GetBaseOffset() const {
|
||||
return base_offset;
|
||||
}
|
||||
|
||||
constexpr u32 GetIndexVar() const {
|
||||
return bindless_var;
|
||||
}
|
||||
|
||||
private:
|
||||
struct ArraySamplerNode {
|
||||
u32 index;
|
||||
u32 base_offset;
|
||||
u32 bindless_var;
|
||||
};
|
||||
|
||||
/// Represents a tracked separate sampler image pair that was folded statically
|
||||
struct SeparateSamplerNode {
|
||||
std::pair<u32, u32> indices;
|
||||
std::pair<u32, u32> offsets;
|
||||
};
|
||||
|
||||
/// Represents a tracked bindless sampler into a direct const buffer
|
||||
class BindlessSamplerNode final {
|
||||
public:
|
||||
explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
|
||||
|
||||
constexpr u32 GetIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
constexpr u32 GetOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
private:
|
||||
struct BindlessSamplerNode {
|
||||
u32 index;
|
||||
u32 offset;
|
||||
};
|
||||
|
|
|
@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
|
|||
template <typename T, typename... Args>
|
||||
TrackSampler MakeTrackSampler(Args&&... args) {
|
||||
static_assert(std::is_convertible_v<T, TrackSamplerData>);
|
||||
return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
|
||||
return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
|
|
|
@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
|
|||
return value;
|
||||
}
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
|
||||
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
|
||||
SeparateSamplerKey key;
|
||||
key.buffers = buffers;
|
||||
key.offsets = offsets;
|
||||
const auto iter = separate_samplers.find(key);
|
||||
if (iter != separate_samplers.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
if (!engine) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
|
||||
const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
|
||||
const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
|
||||
separate_samplers.emplace(key, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
|
||||
u32 offset) {
|
||||
const std::pair key = {buffer, offset};
|
||||
|
|
|
@ -19,8 +19,39 @@
|
|||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
struct SeparateSamplerKey {
|
||||
std::pair<u32, u32> buffers;
|
||||
std::pair<u32, u32> offsets;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<VideoCommon::Shader::SeparateSamplerKey> {
|
||||
std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
|
||||
return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
|
||||
key.offsets.second);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
|
||||
bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
|
||||
const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
|
||||
return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
|
||||
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
|
||||
using SeparateSamplerMap =
|
||||
std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
|
||||
using BindlessSamplerMap =
|
||||
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
|
||||
|
||||
|
@ -73,6 +104,9 @@ public:
|
|||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
|
||||
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
|
||||
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
|
||||
|
||||
/// Inserts a key.
|
||||
|
@ -128,6 +162,7 @@ private:
|
|||
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
||||
KeyMap keys;
|
||||
BoundSamplerMap bound_samplers;
|
||||
SeparateSamplerMap separate_samplers;
|
||||
BindlessSamplerMap bindless_samplers;
|
||||
u32 bound_buffer;
|
||||
GraphicsInfo graphics_info;
|
||||
|
|
|
@ -330,8 +330,8 @@ private:
|
|||
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
|
||||
|
||||
/// Queries the missing sampler info from the execution context.
|
||||
SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset,
|
||||
std::optional<u32> buffer = std::nullopt);
|
||||
SamplerInfo GetSamplerInfo(SamplerInfo info,
|
||||
std::optional<Tegra::Engines::SamplerDescriptor> sampler);
|
||||
|
||||
/// Accesses a texture sampler.
|
||||
std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
|
||||
|
@ -409,7 +409,13 @@ private:
|
|||
|
||||
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
|
||||
std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
s64 cursor);
|
||||
|
||||
std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
|
||||
const OperationNode& operation,
|
||||
Node gpr, Node base_offset,
|
||||
Node tracked, const NodeBlock& code,
|
||||
s64 cursor);
|
||||
|
||||
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
namespace VideoCommon::Shader {
|
||||
|
||||
namespace {
|
||||
|
||||
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
|
||||
OperationCode operation_code) {
|
||||
for (; cursor >= 0; --cursor) {
|
||||
|
@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
|
|||
if (const auto operation = std::get_if<OperationNode>(&*node)) {
|
||||
operation->SetAmendIndex(amend_index);
|
||||
return true;
|
||||
} else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||
}
|
||||
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
|
||||
conditional->SetAmendIndex(amend_index);
|
||||
return true;
|
||||
}
|
||||
|
@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
|
|||
|
||||
} // Anonymous namespace
|
||||
|
||||
std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
|
||||
s64 cursor) {
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
||||
const u32 cbuf_index = cbuf->GetIndex();
|
||||
|
||||
// Constant buffer found, test if it's an immediate
|
||||
const auto& offset = cbuf->GetOffset();
|
||||
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
|
||||
auto track =
|
||||
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
|
||||
auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
|
||||
return {tracked, track};
|
||||
}
|
||||
if (const auto operation = std::get_if<OperationNode>(&*offset)) {
|
||||
const u32 bound_buffer = registry.GetBoundBuffer();
|
||||
if (bound_buffer != cbuf->GetIndex()) {
|
||||
return {};
|
||||
}
|
||||
const auto pair = DecoupleIndirectRead(*operation);
|
||||
if (!pair) {
|
||||
if (bound_buffer != cbuf_index) {
|
||||
return {};
|
||||
}
|
||||
if (const std::optional pair = DecoupleIndirectRead(*operation)) {
|
||||
auto [gpr, base_offset] = *pair;
|
||||
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
|
||||
const auto& gpu_driver = registry.AccessGuestDriverProfile();
|
||||
const u32 bindless_cv = NewCustomVariable();
|
||||
Node op =
|
||||
Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
|
||||
|
||||
const Node cv_node = GetCustomVariable(bindless_cv);
|
||||
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
|
||||
const std::size_t amend_index = DeclareAmend(std::move(amend_op));
|
||||
AmendNodeCv(amend_index, code[cursor]);
|
||||
// TODO Implement Bindless Index custom variable
|
||||
auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
|
||||
offset_inm->GetValue(), bindless_cv);
|
||||
return {tracked, track};
|
||||
return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
|
||||
code, cursor);
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
|
|||
return TrackBindlessSampler(source, code, new_cursor);
|
||||
}
|
||||
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
|
||||
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
|
||||
if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
|
||||
std::get<0>(found)) {
|
||||
// Cbuf found in operand.
|
||||
const OperationNode& op = *operation;
|
||||
|
||||
const OperationCode opcode = operation->GetCode();
|
||||
if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
|
||||
ASSERT(op.GetOperandsCount() == 2);
|
||||
auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
|
||||
auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
|
||||
if (node_a && node_b) {
|
||||
auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
|
||||
std::pair{offset_a, offset_b});
|
||||
return {tracked, std::move(track)};
|
||||
}
|
||||
}
|
||||
std::size_t i = op.GetOperandsCount();
|
||||
while (i--) {
|
||||
if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
|
||||
// Constant buffer found in operand.
|
||||
return found;
|
||||
}
|
||||
}
|
||||
|
@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
|
|||
return {};
|
||||
}
|
||||
|
||||
std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
|
||||
const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
|
||||
const NodeBlock& code, s64 cursor) {
|
||||
const auto offset_imm = std::get<ImmediateNode>(*base_offset);
|
||||
const auto& gpu_driver = registry.AccessGuestDriverProfile();
|
||||
const u32 bindless_cv = NewCustomVariable();
|
||||
const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
|
||||
Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
|
||||
|
||||
Node cv_node = GetCustomVariable(bindless_cv);
|
||||
Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
|
||||
const std::size_t amend_index = DeclareAmend(std::move(amend_op));
|
||||
AmendNodeCv(amend_index, code[cursor]);
|
||||
|
||||
// TODO: Implement bindless index custom variable
|
||||
auto track =
|
||||
MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
|
||||
return {tracked, track};
|
||||
}
|
||||
|
||||
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
|
||||
s64 cursor) const {
|
||||
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
|
||||
|
|
Reference in New Issue