texture_cache: Add async texture decoding
This commit is contained in:
parent
8f3e2a1b48
commit
090bc588e5
|
@ -23,6 +23,7 @@ public:
|
||||||
buffer{Common::make_unique_for_overwrite<T[]>(initial_capacity)} {}
|
buffer{Common::make_unique_for_overwrite<T[]>(initial_capacity)} {}
|
||||||
|
|
||||||
~ScratchBuffer() = default;
|
~ScratchBuffer() = default;
|
||||||
|
ScratchBuffer(ScratchBuffer&&) = default;
|
||||||
|
|
||||||
/// This will only grow the buffer's capacity if size is greater than the current capacity.
|
/// This will only grow the buffer's capacity if size is greater than the current capacity.
|
||||||
/// The previously held data will remain intact.
|
/// The previously held data will remain intact.
|
||||||
|
|
|
@ -38,6 +38,9 @@ enum class ImageFlagBits : u32 {
|
||||||
Rescaled = 1 << 13,
|
Rescaled = 1 << 13,
|
||||||
CheckingRescalable = 1 << 14,
|
CheckingRescalable = 1 << 14,
|
||||||
IsRescalable = 1 << 15,
|
IsRescalable = 1 << 15,
|
||||||
|
|
||||||
|
AsynchronousDecode = 1 << 16,
|
||||||
|
IsDecoding = 1 << 17, ///< Is currently being decoded asynchornously.
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||||
|
|
||||||
|
|
|
@ -85,6 +85,11 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||||
}
|
}
|
||||||
--num_iterations;
|
--num_iterations;
|
||||||
auto& image = slot_images[image_id];
|
auto& image = slot_images[image_id];
|
||||||
|
if (True(image.flags & ImageFlagBits::IsDecoding)) {
|
||||||
|
// This image is still being decoded, deleting it will invalidate the slot
|
||||||
|
// used by the async decoder thread.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const bool must_download =
|
const bool must_download =
|
||||||
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
|
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
|
||||||
if (!high_priority_mode &&
|
if (!high_priority_mode &&
|
||||||
|
@ -133,6 +138,8 @@ void TextureCache<P>::TickFrame() {
|
||||||
sentenced_images.Tick();
|
sentenced_images.Tick();
|
||||||
sentenced_framebuffers.Tick();
|
sentenced_framebuffers.Tick();
|
||||||
sentenced_image_view.Tick();
|
sentenced_image_view.Tick();
|
||||||
|
TickAsyncDecode();
|
||||||
|
|
||||||
runtime.TickFrame();
|
runtime.TickFrame();
|
||||||
critical_gc = 0;
|
critical_gc = 0;
|
||||||
++frame_tick;
|
++frame_tick;
|
||||||
|
@ -777,6 +784,10 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
|
||||||
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
|
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (True(image.flags & ImageFlagBits::AsynchronousDecode)) {
|
||||||
|
QueueAsyncDecode(image, image_id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
|
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
|
||||||
UploadImageContents(image, staging);
|
UploadImageContents(image, staging);
|
||||||
runtime.InsertUploadMemoryBarrier();
|
runtime.InsertUploadMemoryBarrier();
|
||||||
|
@ -989,6 +1000,64 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) {
|
||||||
return fitted_size;
|
return fitted_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
|
||||||
|
UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted));
|
||||||
|
|
||||||
|
image.flags |= ImageFlagBits::IsDecoding;
|
||||||
|
auto decode = std::make_unique<AsyncDecodeContext>();
|
||||||
|
auto* decode_ptr = decode.get();
|
||||||
|
decode->image_id = image_id;
|
||||||
|
async_decodes.push_back(std::move(decode));
|
||||||
|
|
||||||
|
Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes);
|
||||||
|
const size_t guest_size_bytes = image.guest_size_bytes;
|
||||||
|
swizzle_data_buffer.resize_destructive(guest_size_bytes);
|
||||||
|
gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
|
||||||
|
auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer,
|
||||||
|
local_unswizzle_data_buffer);
|
||||||
|
const size_t out_size = MapSizeBytes(image);
|
||||||
|
|
||||||
|
auto func = [out_size, copies, info = image.info,
|
||||||
|
input = std::move(local_unswizzle_data_buffer),
|
||||||
|
async_decode = decode_ptr]() mutable {
|
||||||
|
async_decode->decoded_data.resize_destructive(out_size);
|
||||||
|
std::span copies_span{copies.data(), copies.size()};
|
||||||
|
ConvertImage(input, info, async_decode->decoded_data, copies_span);
|
||||||
|
|
||||||
|
// TODO: Do we need this lock?
|
||||||
|
std::unique_lock lock{async_decode->mutex};
|
||||||
|
async_decode->copies = std::move(copies);
|
||||||
|
async_decode->complete = true;
|
||||||
|
};
|
||||||
|
texture_decode_worker.QueueWork(std::move(func));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void TextureCache<P>::TickAsyncDecode() {
|
||||||
|
bool has_uploads{};
|
||||||
|
auto i = async_decodes.begin();
|
||||||
|
while (i != async_decodes.end()) {
|
||||||
|
auto* async_decode = i->get();
|
||||||
|
std::unique_lock lock{async_decode->mutex};
|
||||||
|
if (!async_decode->complete) {
|
||||||
|
++i;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Image& image = slot_images[async_decode->image_id];
|
||||||
|
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
|
||||||
|
std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(),
|
||||||
|
async_decode->decoded_data.size());
|
||||||
|
image.UploadMemory(staging, async_decode->copies);
|
||||||
|
image.flags &= ~ImageFlagBits::IsDecoding;
|
||||||
|
has_uploads = true;
|
||||||
|
i = async_decodes.erase(i);
|
||||||
|
}
|
||||||
|
if (has_uploads) {
|
||||||
|
runtime.InsertUploadMemoryBarrier();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
bool TextureCache<P>::ScaleUp(Image& image) {
|
bool TextureCache<P>::ScaleUp(Image& image) {
|
||||||
const bool has_copy = image.HasScaled();
|
const bool has_copy = image.HasScaled();
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
@ -18,6 +19,7 @@
|
||||||
#include "common/lru_cache.h"
|
#include "common/lru_cache.h"
|
||||||
#include "common/polyfill_ranges.h"
|
#include "common/polyfill_ranges.h"
|
||||||
#include "common/scratch_buffer.h"
|
#include "common/scratch_buffer.h"
|
||||||
|
#include "common/thread_worker.h"
|
||||||
#include "video_core/compatible_formats.h"
|
#include "video_core/compatible_formats.h"
|
||||||
#include "video_core/control/channel_state_cache.h"
|
#include "video_core/control/channel_state_cache.h"
|
||||||
#include "video_core/delayed_destruction_ring.h"
|
#include "video_core/delayed_destruction_ring.h"
|
||||||
|
@ -54,6 +56,14 @@ struct ImageViewInOut {
|
||||||
ImageViewId id{};
|
ImageViewId id{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct AsyncDecodeContext {
|
||||||
|
ImageId image_id;
|
||||||
|
Common::ScratchBuffer<u8> decoded_data;
|
||||||
|
std::vector<BufferImageCopy> copies;
|
||||||
|
std::mutex mutex;
|
||||||
|
std::atomic_bool complete;
|
||||||
|
};
|
||||||
|
|
||||||
using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>;
|
using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>;
|
||||||
|
|
||||||
class TextureCacheChannelInfo : public ChannelInfo {
|
class TextureCacheChannelInfo : public ChannelInfo {
|
||||||
|
@ -377,6 +387,9 @@ private:
|
||||||
bool ScaleDown(Image& image);
|
bool ScaleDown(Image& image);
|
||||||
u64 GetScaledImageSizeBytes(const ImageBase& image);
|
u64 GetScaledImageSizeBytes(const ImageBase& image);
|
||||||
|
|
||||||
|
void QueueAsyncDecode(Image& image, ImageId image_id);
|
||||||
|
void TickAsyncDecode();
|
||||||
|
|
||||||
Runtime& runtime;
|
Runtime& runtime;
|
||||||
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
@ -430,6 +443,9 @@ private:
|
||||||
|
|
||||||
u64 modification_tick = 0;
|
u64 modification_tick = 0;
|
||||||
u64 frame_tick = 0;
|
u64 frame_tick = 0;
|
||||||
|
|
||||||
|
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
||||||
|
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
Reference in New Issue