renderer_vulkan: Import host memory for screenshots (#7132)
This commit is contained in:
parent
23ca10472a
commit
831c9c4a38
|
@ -8,12 +8,11 @@
|
|||
#include <sysinfoapi.h>
|
||||
#else
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#if defined(__APPLE__) || defined(__FreeBSD__)
|
||||
#include <sys/sysctl.h>
|
||||
#elif defined(__linux__)
|
||||
#include <sys/sysinfo.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -64,4 +63,14 @@ const MemoryInfo GetMemInfo() {
|
|||
return mem_info;
|
||||
}
|
||||
|
||||
u64 GetPageSize() {
|
||||
#ifdef _WIN32
|
||||
SYSTEM_INFO info;
|
||||
::GetSystemInfo(&info);
|
||||
return static_cast<u64>(info.dwPageSize);
|
||||
#else
|
||||
return static_cast<u64>(sysconf(_SC_PAGESIZE));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
|
|
|
@ -19,4 +19,10 @@ struct MemoryInfo {
|
|||
*/
|
||||
[[nodiscard]] const MemoryInfo GetMemInfo();
|
||||
|
||||
/**
|
||||
* Gets the page size of the host system
|
||||
* @return Page size in bytes of the host system
|
||||
*/
|
||||
u64 GetPageSize();
|
||||
|
||||
} // namespace Common
|
||||
|
|
|
@ -109,6 +109,8 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_graphics_pipeline.h
|
||||
renderer_vulkan/vk_master_semaphore.cpp
|
||||
renderer_vulkan/vk_master_semaphore.h
|
||||
renderer_vulkan/vk_memory_util.cpp
|
||||
renderer_vulkan/vk_memory_util.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
renderer_vulkan/vk_rasterizer.h
|
||||
renderer_vulkan/vk_rasterizer_cache.cpp
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/memory_detect.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/settings.h"
|
||||
#include "common/texture.h"
|
||||
|
@ -13,6 +14,7 @@
|
|||
#include "core/hw/hw.h"
|
||||
#include "core/hw/lcd.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
|
||||
#include "video_core/host_shaders/vulkan_present_anaglyph_frag_spv.h"
|
||||
|
@ -865,6 +867,16 @@ void RendererVulkan::RenderScreenshot() {
|
|||
return;
|
||||
}
|
||||
|
||||
if (!TryRenderScreenshotWithHostMemory()) {
|
||||
RenderScreenshotWithStagingCopy();
|
||||
}
|
||||
|
||||
settings.screenshot_complete_callback(false);
|
||||
}
|
||||
|
||||
void RendererVulkan::RenderScreenshotWithStagingCopy() {
|
||||
const vk::Device device = instance.GetDevice();
|
||||
|
||||
const Layout::FramebufferLayout layout{settings.screenshot_framebuffer_layout};
|
||||
const u32 width = layout.width;
|
||||
const u32 height = layout.height;
|
||||
|
@ -895,6 +907,7 @@ void RendererVulkan::RenderScreenshot() {
|
|||
LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
vk::Buffer staging_buffer{unsafe_buffer};
|
||||
|
||||
Frame frame{};
|
||||
|
@ -969,18 +982,169 @@ void RendererVulkan::RenderScreenshot() {
|
|||
// Ensure the copy is fully completed before saving the screenshot
|
||||
scheduler.Finish();
|
||||
|
||||
const vk::Device device = instance.GetDevice();
|
||||
|
||||
// Copy backing image data to the QImage screenshot buffer
|
||||
std::memcpy(settings.screenshot_bits, alloc_info.pMappedData, staging_buffer_info.size);
|
||||
|
||||
// Destroy allocated resources
|
||||
vmaDestroyBuffer(instance.GetAllocator(), unsafe_buffer, allocation);
|
||||
vmaDestroyBuffer(instance.GetAllocator(), staging_buffer, allocation);
|
||||
vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation);
|
||||
device.destroyFramebuffer(frame.framebuffer);
|
||||
device.destroyImageView(frame.image_view);
|
||||
}
|
||||
|
||||
settings.screenshot_complete_callback(false);
|
||||
bool RendererVulkan::TryRenderScreenshotWithHostMemory() {
|
||||
// If the host-memory import alignment matches the allocation granularity of the platform, then
|
||||
// the entire span of memory can be trivially imported
|
||||
const bool trivial_import =
|
||||
instance.IsExternalMemoryHostSupported() &&
|
||||
instance.GetMinImportedHostPointerAlignment() == Common::GetPageSize();
|
||||
if (!trivial_import) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const vk::Device device = instance.GetDevice();
|
||||
|
||||
const Layout::FramebufferLayout layout{settings.screenshot_framebuffer_layout};
|
||||
const u32 width = layout.width;
|
||||
const u32 height = layout.height;
|
||||
|
||||
// For a span of memory [x, x + s], import [AlignDown(x, alignment), AlignUp(x + s, alignment)]
|
||||
// and maintain an offset to the start of the data
|
||||
const u64 import_alignment = instance.GetMinImportedHostPointerAlignment();
|
||||
const uintptr_t address = reinterpret_cast<uintptr_t>(settings.screenshot_bits);
|
||||
void* aligned_pointer = reinterpret_cast<void*>(Common::AlignDown(address, import_alignment));
|
||||
const u64 offset = address % import_alignment;
|
||||
const u64 aligned_size = Common::AlignUp(offset + width * height * 4ull, import_alignment);
|
||||
|
||||
// Buffer<->Image mapping for the imported imported buffer
|
||||
const vk::BufferImageCopy buffer_image_copy = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = 0,
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {width, height, 1},
|
||||
};
|
||||
|
||||
const vk::MemoryHostPointerPropertiesEXT import_properties =
|
||||
device.getMemoryHostPointerPropertiesEXT(
|
||||
vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, aligned_pointer);
|
||||
|
||||
if (!import_properties.memoryTypeBits) {
|
||||
// Could not import memory
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::optional<u32> memory_type_index = FindMemoryType(
|
||||
instance.GetPhysicalDevice().getMemoryProperties(),
|
||||
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
|
||||
import_properties.memoryTypeBits);
|
||||
|
||||
if (!memory_type_index.has_value()) {
|
||||
// Could not find memory type index
|
||||
return false;
|
||||
}
|
||||
|
||||
const vk::StructureChain<vk::MemoryAllocateInfo, vk::ImportMemoryHostPointerInfoEXT>
|
||||
allocation_chain = {
|
||||
vk::MemoryAllocateInfo{
|
||||
.allocationSize = aligned_size,
|
||||
.memoryTypeIndex = memory_type_index.value(),
|
||||
},
|
||||
vk::ImportMemoryHostPointerInfoEXT{
|
||||
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
|
||||
.pHostPointer = aligned_pointer,
|
||||
},
|
||||
};
|
||||
|
||||
// Import host memory
|
||||
const vk::UniqueDeviceMemory imported_memory =
|
||||
device.allocateMemoryUnique(allocation_chain.get());
|
||||
|
||||
const vk::StructureChain<vk::BufferCreateInfo, vk::ExternalMemoryBufferCreateInfo> buffer_info =
|
||||
{
|
||||
vk::BufferCreateInfo{
|
||||
.size = aligned_size,
|
||||
.usage = vk::BufferUsageFlagBits::eTransferDst,
|
||||
.sharingMode = vk::SharingMode::eExclusive,
|
||||
},
|
||||
vk::ExternalMemoryBufferCreateInfo{
|
||||
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
|
||||
},
|
||||
};
|
||||
|
||||
// Bind imported memory to buffer
|
||||
const vk::UniqueBuffer imported_buffer = device.createBufferUnique(buffer_info.get());
|
||||
device.bindBufferMemory(imported_buffer.get(), imported_memory.get(), 0);
|
||||
|
||||
Frame frame{};
|
||||
main_window.RecreateFrame(&frame, width, height);
|
||||
|
||||
DrawScreens(&frame, layout, false);
|
||||
|
||||
scheduler.Record([buffer_image_copy, source_image = frame.image,
|
||||
imported_buffer = imported_buffer.get()](vk::CommandBuffer cmdbuf) {
|
||||
const vk::ImageMemoryBarrier read_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = source_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
const vk::ImageMemoryBarrier write_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = source_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
static constexpr vk::MemoryBarrier memory_write_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
||||
};
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier);
|
||||
cmdbuf.copyImageToBuffer(source_image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
imported_buffer, buffer_image_copy);
|
||||
cmdbuf.pipelineBarrier(
|
||||
vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, write_barrier);
|
||||
});
|
||||
|
||||
// Ensure the copy is fully completed before saving the screenshot
|
||||
scheduler.Finish();
|
||||
|
||||
// Image data has been copied directly to host memory
|
||||
device.destroyFramebuffer(frame.framebuffer);
|
||||
device.destroyImageView(frame.image_view);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -90,6 +90,8 @@ private:
|
|||
void ConfigureRenderPipeline();
|
||||
void PrepareRendertarget();
|
||||
void RenderScreenshot();
|
||||
void RenderScreenshotWithStagingCopy();
|
||||
bool TryRenderScreenshotWithHostMemory();
|
||||
void PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout);
|
||||
void RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout,
|
||||
bool flipped);
|
||||
|
|
|
@ -407,7 +407,8 @@ bool Instance::CreateDevice() {
|
|||
vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>();
|
||||
const vk::StructureChain properties_chain =
|
||||
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
|
||||
vk::PhysicalDevicePortabilitySubsetPropertiesKHR>();
|
||||
vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
|
||||
|
||||
features = feature_chain.get().features;
|
||||
if (available_extensions.empty()) {
|
||||
|
@ -415,7 +416,7 @@ bool Instance::CreateDevice() {
|
|||
return false;
|
||||
}
|
||||
|
||||
boost::container::static_vector<const char*, 12> enabled_extensions;
|
||||
boost::container::static_vector<const char*, 13> enabled_extensions;
|
||||
const auto add_extension = [&](std::string_view extension, bool blacklist = false,
|
||||
std::string_view reason = "") -> bool {
|
||||
const auto result =
|
||||
|
@ -445,6 +446,7 @@ bool Instance::CreateDevice() {
|
|||
add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
image_format_list = add_extension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
|
||||
shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
|
||||
external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
|
||||
tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
|
||||
const bool has_timeline_semaphores =
|
||||
add_extension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, is_qualcomm || is_turnip,
|
||||
|
@ -589,6 +591,11 @@ bool Instance::CreateDevice() {
|
|||
device_chain.unlink<vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>();
|
||||
}
|
||||
|
||||
if (external_memory_host) {
|
||||
PROP_GET(vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, minImportedHostPointerAlignment,
|
||||
min_imported_host_pointer_alignment);
|
||||
}
|
||||
|
||||
if (has_fragment_shader_barycentric) {
|
||||
FEAT_SET(vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR, fragmentShaderBarycentric,
|
||||
fragment_shader_barycentric)
|
||||
|
|
|
@ -168,6 +168,11 @@ public:
|
|||
return shader_stencil_export;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_external_memory_host is supported
|
||||
bool IsExternalMemoryHostSupported() const {
|
||||
return external_memory_host;
|
||||
}
|
||||
|
||||
/// Returns true when VK_KHR_fragment_shader_barycentric is supported
|
||||
bool IsFragmentShaderBarycentricSupported() const {
|
||||
return fragment_shader_barycentric;
|
||||
|
@ -248,6 +253,11 @@ public:
|
|||
return min_vertex_stride_alignment;
|
||||
}
|
||||
|
||||
/// Returns the minimum imported host pointer alignment
|
||||
u64 GetMinImportedHostPointerAlignment() const {
|
||||
return min_imported_host_pointer_alignment;
|
||||
}
|
||||
|
||||
/// Returns true if commands should be flushed at the end of each major renderpass
|
||||
bool ShouldFlush() const {
|
||||
return driver_id == vk::DriverIdKHR::eArmProprietary ||
|
||||
|
@ -314,6 +324,8 @@ private:
|
|||
bool pipeline_creation_cache_control{};
|
||||
bool fragment_shader_barycentric{};
|
||||
bool shader_stencil_export{};
|
||||
bool external_memory_host{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
bool tooling_info{};
|
||||
bool debug_utils_supported{};
|
||||
bool has_nsight_graphics{};
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_memory_util.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
|
||||
vk::MemoryPropertyFlags wanted, std::bitset<32> memory_type_mask,
|
||||
vk::MemoryPropertyFlags excluded) {
|
||||
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
|
||||
if (!memory_type_mask.test(i)) {
|
||||
continue;
|
||||
}
|
||||
const auto flags = properties.memoryTypes[i].propertyFlags;
|
||||
if (((flags & wanted) == wanted) && (!(flags & excluded))) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
} // namespace Vulkan
|
|
@ -0,0 +1,20 @@
|
|||
// Copyright 2023 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <optional>
|
||||
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
/// Find a memory type with the passed requirements
|
||||
std::optional<u32> FindMemoryType(
|
||||
const vk::PhysicalDeviceMemoryProperties& properties, vk::MemoryPropertyFlags wanted,
|
||||
std::bitset<32> memory_type_mask = 0xFFFFFFFF,
|
||||
vk::MemoryPropertyFlags excluded = vk::MemoryPropertyFlagBits::eProtected);
|
||||
|
||||
} // namespace Vulkan
|
|
@ -7,6 +7,7 @@
|
|||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
|
@ -43,19 +44,6 @@ vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Find a memory type with the passed requirements
|
||||
std::optional<u32> FindMemoryType(
|
||||
const vk::PhysicalDeviceMemoryProperties& properties, vk::MemoryPropertyFlags wanted,
|
||||
vk::MemoryPropertyFlags excluded = vk::MemoryPropertyFlagBits::eProtected) {
|
||||
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
|
||||
const auto flags = properties.memoryTypes[i].propertyFlags;
|
||||
if (((flags & wanted) == wanted) && (!(flags & excluded))) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// Get the preferred host visible memory type.
|
||||
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) {
|
||||
vk::MemoryPropertyFlags flags = MakePropertyFlags(type);
|
||||
|
|
Reference in New Issue