From ba18047e8d06584de0ce18cdbb303a6d9a8742aa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 19 Jul 2021 04:32:03 +0200 Subject: [PATCH] Texture Cache: Implement Vulkan UpScaling & DownScaling --- .../renderer_opengl/gl_texture_cache.cpp | 10 +- .../renderer_vulkan/vk_texture_cache.cpp | 259 ++++++++++++++++-- .../renderer_vulkan/vk_texture_cache.h | 10 +- src/video_core/texture_cache/image_info.cpp | 3 + src/video_core/texture_cache/texture_cache.h | 84 +++++- .../texture_cache/texture_cache_base.h | 3 + 6 files changed, 327 insertions(+), 42 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 1e594838f..cdd352aef 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -849,20 +849,22 @@ void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t b } } -void Image::ScaleUp() { +bool Image::ScaleUp() { if (True(flags & ImageFlagBits::Rescaled)) { - return; + return false; } flags |= ImageFlagBits::Rescaled; UNIMPLEMENTED(); + return true; } -void Image::ScaleDown() { +bool Image::ScaleDown() { if (False(flags & ImageFlagBits::Rescaled)) { - return; + return false; } flags &= ~ImageFlagBits::Rescaled; UNIMPLEMENTED(); + return true; } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index be5b1d84d..668554d1e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -137,6 +137,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; } const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples); + const bool is_2d = info.type == ImageType::e2D; return VkImageCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = nullptr, @@ -144,9 +145,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { .imageType = ConvertImageType(info.type), .format = format_info.format, .extent{ - .width = ((info.size.width << up) >> down) >> samples_x, - .height = ((info.size.height << up) >> down) >> samples_y, - .depth = (info.size.depth << up) >> down, + .width = ((info.size.width * up) >> down) >> samples_x, + .height = (is_2d ? ((info.size.height * up) >> down) : info.size.height) >> samples_y, + .depth = info.size.depth, }, .mipLevels = static_cast(info.resources.levels), .arrayLayers = static_cast(info.resources.layers), @@ -160,7 +161,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info, u32 up = 0, +[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info, u32 up = 1, u32 down = 0) { if (info.type == ImageType::Buffer) { return vk::Image{}; @@ -851,7 +852,6 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { void TextureCacheRuntime::TickFrame() { prescaled_images.Tick(); prescaled_commits.Tick(); - prescaled_views.Tick(); } Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, @@ -923,7 +923,7 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); if (is_rescaled) { - ScaleDown(); + ScaleDown(true); } std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); @@ -978,38 +978,253 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span& blit_regions, + VkImageAspectFlags aspect_mask) { + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, aspect_mask, + regions = std::move(blit_regions)](vk::CommandBuffer cmdbuf) { + const std::array read_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + VkImageMemoryBarrier write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, read_barriers); + const VkFilter vk_filter = VK_FILTER_NEAREST; + cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, write_barrier); + }); +} + +bool Image::ScaleUp(bool save_as_backup) { if (True(flags & ImageFlagBits::Rescaled)) { - return; + return false; } ASSERT(info.type != ImageType::Linear); - if (!runtime->is_rescaling_on) { - flags |= ImageFlagBits::Rescaled; - return; - } - flags |= ImageFlagBits::Rescaled; scaling_count++; ASSERT(scaling_count < 10); - return; + flags |= ImageFlagBits::Rescaled; + /*if (!runtime->is_rescaling_on) { + return; + }*/ + const auto& resolution = runtime->resolution; + vk::Image rescaled_image = + MakeImage(runtime->device, info, resolution.up_scale, resolution.down_shift); + MemoryCommit new_commit( + runtime->memory_allocator.Commit(rescaled_image, MemoryUsage::DeviceLocal)); + + const auto scale_up = [&](u32 value) { + return (value * resolution.up_scale) >> resolution.down_shift; + }; + + const bool is_2d = info.type == ImageType::e2D; + boost::container::small_vector vkRegions(info.resources.levels); + for (s32 level = 0; level < info.resources.levels; level++) { + VkImageBlit blit{ + .srcSubresource{ + .aspectMask = aspect_mask, + .mipLevel = u32(level), + .baseArrayLayer = 0, + .layerCount = u32(info.resources.layers), + }, + .srcOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = s32(info.size.width), + .y = s32(info.size.height), + .z = 1, + }, + }, + .dstSubresource{ + .aspectMask = aspect_mask, + .mipLevel = u32(level), + .baseArrayLayer = 0, + .layerCount = u32(info.resources.layers), + }, + .dstOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = s32(scale_up(info.size.width)), + .y = is_2d ? s32(scale_up(info.size.height)) : s32(info.size.height), + .z = 1, + }, + }, + }; + vkRegions.push_back(blit); + } + BlitScale(*scheduler, *image, *rescaled_image, vkRegions, aspect_mask); + if (save_as_backup) { + backup_image = std::move(image); + backup_commit = std::move(commit); + has_backup = true; + } else { + runtime->prescaled_images.Push(std::move(image)); + runtime->prescaled_commits.Push(std::move(commit)); + } + image = std::move(rescaled_image); + commit = std::move(new_commit); + return true; } -void Image::ScaleDown() { +void Image::SwapBackup() { + ASSERT(has_backup); + runtime->prescaled_images.Push(std::move(image)); + runtime->prescaled_commits.Push(std::move(commit)); + image = std::move(backup_image); + commit = std::move(backup_commit); + has_backup = false; +} + +bool Image::ScaleDown(bool save_as_backup) { if (False(flags & ImageFlagBits::Rescaled)) { - return; + return false; } ASSERT(info.type != ImageType::Linear); - if (!runtime->is_rescaling_on) { - flags &= ~ImageFlagBits::Rescaled; - return; - } flags &= ~ImageFlagBits::Rescaled; scaling_count++; ASSERT(scaling_count < 10); - return; + /*if (!runtime->is_rescaling_on) { + return false; + }*/ + + const auto& resolution = runtime->resolution; + vk::Image downscaled_image = + MakeImage(runtime->device, info, resolution.up_scale, resolution.down_shift); + MemoryCommit new_commit( + runtime->memory_allocator.Commit(downscaled_image, MemoryUsage::DeviceLocal)); + + const auto scale_up = [&](u32 value) { + return (value * resolution.up_scale) >> resolution.down_shift; + }; + + const bool is_2d = info.type == ImageType::e2D; + boost::container::small_vector vkRegions(info.resources.levels); + for (s32 level = 0; level < info.resources.levels; level++) { + VkImageBlit blit{ + .srcSubresource{ + .aspectMask = aspect_mask, + .mipLevel = u32(level), + .baseArrayLayer = 0, + .layerCount = u32(info.resources.layers), + }, + .srcOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = s32(scale_up(info.size.width)), + .y = is_2d ? s32(scale_up(info.size.height)) : s32(info.size.height), + .z = 1, + }, + }, + .dstSubresource{ + .aspectMask = aspect_mask, + .mipLevel = u32(level), + .baseArrayLayer = 0, + .layerCount = u32(info.resources.layers), + }, + .dstOffsets{ + { + .x = 0, + .y = 0, + .z = 0, + }, + { + .x = s32(info.size.width), + .y = s32(info.size.height), + .z = 1, + }, + }, + }; + vkRegions.push_back(blit); + } + BlitScale(*scheduler, *image, *downscaled_image, vkRegions, aspect_mask); + if (save_as_backup) { + backup_image = std::move(image); + backup_commit = std::move(commit); + has_backup = true; + } else { + runtime->prescaled_images.Push(std::move(image)); + runtime->prescaled_commits.Push(std::move(commit)); + } + image = std::move(downscaled_image); + commit = std::move(new_commit); + return true; } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f7e782c44..958a64651 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -45,7 +45,6 @@ struct TextureCacheRuntime { static constexpr size_t TICKS_TO_DESTROY = 6; DelayedDestructionRing prescaled_images; DelayedDestructionRing prescaled_commits; - DelayedDestructionRing prescaled_views; Settings::ResolutionScalingInfo resolution; bool is_rescaling_on{}; @@ -126,9 +125,11 @@ public: return std::exchange(initialized, true); } - void ScaleUp(); + bool ScaleUp(bool save_as_backup = false); - void ScaleDown(); + bool ScaleDown(bool save_as_backup = false); + + void SwapBackup(); private: VKScheduler* scheduler; @@ -140,6 +141,9 @@ private: bool initialized = false; TextureCacheRuntime* runtime; u32 scaling_count{}; + vk::Image backup_image{}; + MemoryCommit backup_commit{}; + bool has_backup{}; }; class ImageView : public VideoCommon::ImageViewBase { diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 64fd7010a..022ca9033 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -41,6 +41,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { ASSERT(config.BaseLayer() == 0); type = ImageType::e1D; size.width = config.Width(); + resources.layers = 1; break; case TextureType::Texture1DArray: UNIMPLEMENTED_IF(config.BaseLayer() != 0); @@ -82,10 +83,12 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept { size.width = config.Width(); size.height = config.Height(); size.depth = config.Depth(); + resources.layers = 1; break; case TextureType::Texture1DBuffer: type = ImageType::Buffer; size.width = config.Width(); + resources.layers = 1; break; default: UNREACHABLE_MSG("Invalid texture_type={}", static_cast(config.texture_type.Value())); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 560da4f16..95a9e8fe9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -242,24 +242,36 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { const auto scale_up = [this](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; - image.ScaleUp(); + return ScaleUp(image); } + return false; }; for (size_t index = 0; index < NUM_RT; ++index) { - scale_up(tmp_color_images[index]); + if (scale_up(tmp_color_images[index])) { + BindRenderTarget(&render_targets.color_buffer_ids[index], + FindColorBuffer(index, is_clear)); + } + } + if (scale_up(tmp_depth_image)) { + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); } - scale_up(tmp_depth_image); } else { const auto scale_down = [this](ImageId image_id) { if (image_id != CORRUPT_ID) { Image& image = slot_images[image_id]; - image.ScaleDown(); + return ScaleDown(image); } + return false; }; for (size_t index = 0; index < NUM_RT; ++index) { - scale_down(tmp_color_images[index]); + if (scale_down(tmp_color_images[index])) { + BindRenderTarget(&render_targets.color_buffer_ids[index], + FindColorBuffer(index, is_clear)); + } + } + if (scale_down(tmp_depth_image)) { + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); } - scale_down(tmp_depth_image); } // Rescale End @@ -695,6 +707,47 @@ bool TextureCache

::ImageCanRescale(Image& image) { return true; } +template +void TextureCache

::InvalidateScale(Image& image, bool invalidate_rt) { + const std::span image_view_ids = image.image_view_ids; + if (invalidate_rt) { + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); +} + +template +bool TextureCache

::ScaleUp(Image& image, bool invalidate_rt) { + const bool rescaled = image.ScaleUp(); + if (!rescaled) { + return false; + } + InvalidateScale(image, invalidate_rt); + return true; +} + +template +bool TextureCache

::ScaleDown(Image& image, bool invalidate_rt) { + const bool rescaled = image.ScaleDown(); + if (!rescaled) { + return false; + } + InvalidateScale(image, invalidate_rt); + return true; +} + template ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { @@ -793,33 +846,32 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bool can_rescale = (info.type == ImageType::e1D || info.type == ImageType::e2D) && info.block.depth == 0; + bool any_rescaled = false; for (const ImageId sibling_id : all_siblings) { if (!can_rescale) { break; } Image& sibling = slot_images[sibling_id]; can_rescale &= ImageCanRescale(sibling); + any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); } + can_rescale &= any_rescaled; + if (can_rescale) { for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; - sibling.ScaleUp(); + ScaleUp(sibling, true); } } else { for (const ImageId sibling_id : all_siblings) { Image& sibling = slot_images[sibling_id]; - sibling.ScaleDown(); + ScaleDown(sibling, true); } } const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; - if (can_rescale) { - new_image.ScaleUp(); - } else { - new_image.ScaleDown(); - } if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { new_image.flags |= ImageFlagBits::Sparse; @@ -840,6 +892,12 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA // TODO: Only upload what we need RefreshContents(new_image, new_image_id); + if (can_rescale) { + new_image.ScaleUp(); + } else { + new_image.ScaleDown(); + } + for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; if (overlap.info.num_samples != new_image.info.num_samples) { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index a4a2c0832..042678786 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -327,6 +327,9 @@ private: [[nodiscard]] bool IsFullClear(ImageViewId id); bool ImageCanRescale(Image& image); + void InvalidateScale(Image& image, bool invalidate_rt = false); + bool ScaleUp(Image& image, bool invalidate_rt = false); + bool ScaleDown(Image& image, bool invalidate_rt = false); Runtime& runtime; VideoCore::RasterizerInterface& rasterizer;