From 31b125ef578dd5df4e289d1057154dd34f73cb19 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 19 Jun 2021 00:55:13 -0400 Subject: [PATCH] astc: Various robustness enhancements for the gpu decoder These changes should help in reducing crashes/drivers panics that may occur due to synchronization issues between the shader completion and later access of the decoded texture. --- src/video_core/host_shaders/astc_decoder.comp | 15 +++---- .../renderer_opengl/util_shaders.cpp | 5 +-- .../renderer_vulkan/vk_compute_pass.cpp | 39 ++++--------------- src/video_core/textures/astc.cpp | 2 + src/video_core/textures/astc.h | 2 - 5 files changed, 16 insertions(+), 47 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index eaba1b103..71327e233 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -14,9 +14,8 @@ #define BINDING_6_TO_8_BUFFER 2 #define BINDING_7_TO_8_BUFFER 3 #define BINDING_8_TO_8_BUFFER 4 -#define BINDING_BYTE_TO_16_BUFFER 5 -#define BINDING_SWIZZLE_BUFFER 6 -#define BINDING_OUTPUT_IMAGE 7 +#define BINDING_SWIZZLE_BUFFER 5 +#define BINDING_OUTPUT_IMAGE 6 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv @@ -29,7 +28,6 @@ #define BINDING_6_TO_8_BUFFER 3 #define BINDING_7_TO_8_BUFFER 4 #define BINDING_8_TO_8_BUFFER 5 -#define BINDING_BYTE_TO_16_BUFFER 6 #define BINDING_OUTPUT_IMAGE 0 #endif @@ -86,9 +84,6 @@ layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_ layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { uint REPLICATE_8_BIT_TO_8_TABLE[]; }; -layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 { - uint REPLICATE_BYTE_TO_16_TABLE[]; -}; layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; @@ -207,8 +202,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) { } uvec4 ReplicateByteTo16(uvec4 value) { - return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y], - REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]); + return value * 0x101; } uint ReplicateBitTo7(uint value) { @@ -1327,6 +1321,9 @@ void main() { offset += swizzle; const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); + if (any(greaterThanEqual(coord, imageSize(dest_image)))) { + return; + } uint block_index = pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 47fddcb6e..d57998cdc 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -83,7 +83,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_6_TO_8_BUFFER = 3; static constexpr GLuint BINDING_7_TO_8_BUFFER = 4; static constexpr GLuint BINDING_8_TO_8_BUFFER = 5; - static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; @@ -105,9 +104,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle, offsetof(AstcBufferData, replicate_8_to_8), sizeof(AstcBufferData::replicate_8_to_8)); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle, - offsetof(AstcBufferData, replicate_byte_to_16), - sizeof(AstcBufferData::replicate_byte_to_16)); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(1, tile_size.width, tile_size.height); @@ -137,6 +133,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } + glMemoryBarrier(GL_ALL_BARRIER_BITS); program_manager.RestoreGuestCompute(); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index e11406e58..123bed794 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -40,9 +40,9 @@ constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; -constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5; -constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6; -constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7; +constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 5; +constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 6; +constexpr size_t ASTC_NUM_BINDINGS = 7; VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { return { @@ -71,7 +71,7 @@ std::array BuildInputOutputDescriptorSetBinding }}; } -std::array BuildASTCDescriptorSetBindings() { +std::array BuildASTCDescriptorSetBindings() { return {{ { .binding = ASTC_BINDING_INPUT_BUFFER, @@ -108,13 +108,6 @@ std::array BuildASTCDescriptorSetBindings() { .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .pImmutableSamplers = nullptr, }, - { - .binding = ASTC_BINDING_BYTE_TO_16_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, { .binding = ASTC_BINDING_SWIZZLE_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, @@ -143,7 +136,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { }; } -std::array BuildASTCPassDescriptorUpdateTemplateEntry() { +std::array +BuildASTCPassDescriptorUpdateTemplateEntry() { return {{ { .dstBinding = ASTC_BINDING_INPUT_BUFFER, @@ -185,14 +179,6 @@ std::array BuildASTCPassDescriptorUpdateT .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), .stride = sizeof(DescriptorUpdateEntry), }, - { - .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry), - .stride = sizeof(DescriptorUpdateEntry), - }, { .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, .dstArrayElement = 0, @@ -222,15 +208,6 @@ struct AstcPushConstants { u32 block_height_mask; }; -struct AstcBufferData { - decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE; - decltype(EncodingsValues) encoding_values = EncodingsValues; - decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; - decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; - decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; - decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; -} constexpr ASTC_BUFFER_DATA; - } // Anonymous namespace VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, @@ -517,9 +494,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, sizeof(AstcBufferData::replicate_7_to_8)); update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), sizeof(AstcBufferData::replicate_8_to_8)); - update_descriptor_queue.AddBuffer(*data_buffer, - offsetof(AstcBufferData, replicate_byte_to_16), - sizeof(AstcBufferData::replicate_byte_to_16)); update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData), sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); @@ -569,6 +543,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); }); + scheduler.Finish(); } } // namespace Vulkan diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 9b2177ebd..b6e2022f2 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -551,6 +551,8 @@ static void FillError(std::span outBuf, u32 blockWidth, u32 blockHeight) { } } } + +static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable(); static constexpr u32 ReplicateByteTo16(std::size_t value) { return REPLICATE_BYTE_TO_16_TABLE[value]; } diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index c1c37dfe7..441e8eb04 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -116,7 +116,6 @@ constexpr auto MakeReplicateTable() { return table; } -constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable(); constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable(); constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable(); constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable(); @@ -126,7 +125,6 @@ struct AstcBufferData { decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; - decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; } constexpr ASTC_BUFFER_DATA; void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth,