GPU: Implement guest driver profile and deduce texture handler sizes.
This commit is contained in:
parent
a104b985a8
commit
c921e496eb
|
@ -29,6 +29,8 @@ add_library(video_core STATIC
|
||||||
gpu_synch.h
|
gpu_synch.h
|
||||||
gpu_thread.cpp
|
gpu_thread.cpp
|
||||||
gpu_thread.h
|
gpu_thread.h
|
||||||
|
guest_driver.cpp
|
||||||
|
guest_driver.h
|
||||||
macro_interpreter.cpp
|
macro_interpreter.cpp
|
||||||
macro_interpreter.h
|
macro_interpreter.h
|
||||||
memory_manager.cpp
|
memory_manager.cpp
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/shader_bytecode.h"
|
#include "video_core/engines/shader_bytecode.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
|
#include "video_core/guest_driver.h"
|
||||||
#include "video_core/textures/texture.h"
|
#include "video_core/textures/texture.h"
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
@ -106,6 +107,8 @@ public:
|
||||||
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||||
u64 offset) const = 0;
|
u64 offset) const = 0;
|
||||||
virtual u32 GetBoundBuffer() const = 0;
|
virtual u32 GetBoundBuffer() const = 0;
|
||||||
|
|
||||||
|
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Tegra::Engines
|
} // namespace Tegra::Engines
|
||||||
|
|
|
@ -94,6 +94,10 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
|
||||||
|
return rasterizer.AccessGuestDriverProfile();
|
||||||
|
}
|
||||||
|
|
||||||
void KeplerCompute::ProcessLaunch() {
|
void KeplerCompute::ProcessLaunch() {
|
||||||
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
||||||
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
||||||
|
|
|
@ -218,6 +218,8 @@ public:
|
||||||
return regs.tex_cb_index;
|
return regs.tex_cb_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
|
|
@ -784,4 +784,8 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
|
||||||
|
return rasterizer.AccessGuestDriverProfile();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Tegra::Engines
|
} // namespace Tegra::Engines
|
||||||
|
|
|
@ -1306,6 +1306,8 @@ public:
|
||||||
return regs.tex_cb_index;
|
return regs.tex_cb_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
|
||||||
|
|
||||||
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
||||||
/// we've seen used.
|
/// we've seen used.
|
||||||
using MacroMemory = std::array<u32, 0x40000>;
|
using MacroMemory = std::array<u32, 0x40000>;
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
// Copyright 2019 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "video_core/guest_driver.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
|
||||||
|
if (texture_handler_size_deduced) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::size_t size = bound_offsets.size();
|
||||||
|
if (size < 2) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::sort(bound_offsets.begin(), bound_offsets.end(),
|
||||||
|
[](const u32& a, const u32& b) { return a < b; });
|
||||||
|
u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer;
|
||||||
|
for (std::size_t i = 1; i < size; i++) {
|
||||||
|
if (bound_offsets[i] == bound_offsets[i - 1]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
|
||||||
|
min_val = std::min(min_val, new_min);
|
||||||
|
}
|
||||||
|
if (min_val > 2) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
texture_handler_size_deduced = true;
|
||||||
|
texture_handler_size = sizeof(u32) * min_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
|
@ -0,0 +1,37 @@
|
||||||
|
// Copyright 2019 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
|
||||||
|
* information necessary for impossible to avoid HLE methods like shader tracks.
|
||||||
|
*/
|
||||||
|
class GuestDriverProfile {
|
||||||
|
public:
|
||||||
|
u32 GetTextureHandlerSize() const {
|
||||||
|
return texture_handler_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TextureHandlerSizeKnown() const {
|
||||||
|
return texture_handler_size_deduced;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
|
||||||
|
|
||||||
|
private:
|
||||||
|
// This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
|
||||||
|
// use 4 bytes instead. Thus, certain drivers may squish the size.
|
||||||
|
static constexpr u32 default_texture_handler_size = 8;
|
||||||
|
u32 texture_handler_size{default_texture_handler_size};
|
||||||
|
bool texture_handler_size_deduced{};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
|
@ -9,6 +9,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/fermi_2d.h"
|
#include "video_core/engines/fermi_2d.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/guest_driver.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
class MemoryManager;
|
class MemoryManager;
|
||||||
|
@ -78,5 +79,12 @@ public:
|
||||||
/// Initialize disk cached resources for the game being emulated
|
/// Initialize disk cached resources for the game being emulated
|
||||||
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
|
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
|
||||||
const DiskResourceLoadCallback& callback = {}) {}
|
const DiskResourceLoadCallback& callback = {}) {}
|
||||||
|
|
||||||
|
GuestDriverProfile& AccessGuestDriverProfile() {
|
||||||
|
return guest_driver_profile;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
GuestDriverProfile guest_driver_profile{};
|
||||||
};
|
};
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include "common/hash.h"
|
#include "common/hash.h"
|
||||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
|
#include "video_core/guest_driver.h"
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
@ -71,6 +72,13 @@ public:
|
||||||
return bindless_samplers;
|
return bindless_samplers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VideoCore::GuestDriverProfile* AccessGuestDriverProfile() {
|
||||||
|
if (engine) {
|
||||||
|
return &(engine->AccessGuestDriverProfile());
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Tegra::Engines::ShaderType stage;
|
const Tegra::Engines::ShaderType stage;
|
||||||
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
||||||
|
|
|
@ -315,4 +315,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
||||||
return pc + 1;
|
return pc + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ShaderIR::PostDecode() {
|
||||||
|
// Deduce texture handler size if needed
|
||||||
|
auto* gpu_driver = locker.AccessGuestDriverProfile();
|
||||||
|
if (gpu_driver) {
|
||||||
|
if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) {
|
||||||
|
u32 count{};
|
||||||
|
std::vector<u32> bound_offsets;
|
||||||
|
for (const auto& sampler : used_samplers) {
|
||||||
|
if (sampler.IsBindless()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
bound_offsets.emplace_back(sampler.GetOffset());
|
||||||
|
}
|
||||||
|
if (count > 1) {
|
||||||
|
gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon::Shader
|
} // namespace VideoCommon::Shader
|
||||||
|
|
|
@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
|
||||||
ConstBufferLocker& locker)
|
ConstBufferLocker& locker)
|
||||||
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
|
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
|
||||||
Decode();
|
Decode();
|
||||||
|
PostDecode();
|
||||||
}
|
}
|
||||||
|
|
||||||
ShaderIR::~ShaderIR() = default;
|
ShaderIR::~ShaderIR() = default;
|
||||||
|
|
|
@ -191,6 +191,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
void Decode();
|
void Decode();
|
||||||
|
void PostDecode();
|
||||||
|
|
||||||
NodeBlock DecodeRange(u32 begin, u32 end);
|
NodeBlock DecodeRange(u32 begin, u32 end);
|
||||||
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
|
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
|
||||||
|
|
Reference in New Issue