yuzu-emu
/
yuzu-mainline
Archived
1
0
Fork 0

Merge pull request #10418 from liamwhite/blink-and-youll-miss-it

texture_cache: process aliases and overlaps in the correct order
This commit is contained in:
Matías Locatti 2023-05-26 17:36:09 -03:00 committed by GitHub
commit 919b54848b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 105 additions and 61 deletions

View File

@ -155,7 +155,7 @@ void ImageBase::CheckAliasState() {
flags &= ~ImageFlagBits::Alias; flags &= ~ImageFlagBits::Alias;
} }
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
ASSERT(lhs.info.type == rhs.info.type); ASSERT(lhs.info.type == rhs.info.type);
std::optional<SubresourceBase> base; std::optional<SubresourceBase> base;
@ -169,7 +169,7 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
} }
if (!base) { if (!base) {
LOG_ERROR(HW_GPU, "Image alias should have been flipped"); LOG_ERROR(HW_GPU, "Image alias should have been flipped");
return; return false;
} }
const PixelFormat lhs_format = lhs.info.format; const PixelFormat lhs_format = lhs.info.format;
const PixelFormat rhs_format = rhs.info.format; const PixelFormat rhs_format = rhs.info.format;
@ -248,12 +248,13 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
} }
ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
if (lhs_alias.copies.empty()) { if (lhs_alias.copies.empty()) {
return; return false;
} }
lhs.aliased_images.push_back(std::move(lhs_alias)); lhs.aliased_images.push_back(std::move(lhs_alias));
rhs.aliased_images.push_back(std::move(rhs_alias)); rhs.aliased_images.push_back(std::move(rhs_alias));
lhs.flags &= ~ImageFlagBits::IsRescalable; lhs.flags &= ~ImageFlagBits::IsRescalable;
rhs.flags &= ~ImageFlagBits::IsRescalable; rhs.flags &= ~ImageFlagBits::IsRescalable;
return true;
} }
} // namespace VideoCommon } // namespace VideoCommon

View File

@ -142,6 +142,6 @@ struct ImageAllocBase {
std::vector<ImageId> images; std::vector<ImageId> images;
}; };
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
} // namespace VideoCommon } // namespace VideoCommon

View File

@ -1311,17 +1311,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const size_t size_bytes = CalculateGuestSizeInBytes(new_info); const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr(); const bool native_bgr = runtime.HasNativeBgr();
boost::container::small_vector<ImageId, 4> overlap_ids; join_overlap_ids.clear();
std::unordered_set<ImageId> overlaps_found; join_overlaps_found.clear();
boost::container::small_vector<ImageId, 4> left_aliased_ids; join_left_aliased_ids.clear();
boost::container::small_vector<ImageId, 4> right_aliased_ids; join_right_aliased_ids.clear();
std::unordered_set<ImageId> ignore_textures; join_ignore_textures.clear();
boost::container::small_vector<ImageId, 4> bad_overlap_ids; join_bad_overlap_ids.clear();
boost::container::small_vector<ImageId, 4> all_siblings; join_copies_to_do.clear();
join_alias_indices.clear();
const bool this_is_linear = info.type == ImageType::Linear; const bool this_is_linear = info.type == ImageType::Linear;
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
if (True(overlap.flags & ImageFlagBits::Remapped)) { if (True(overlap.flags & ImageFlagBits::Remapped)) {
ignore_textures.insert(overlap_id); join_ignore_textures.insert(overlap_id);
return; return;
} }
const bool overlap_is_linear = overlap.info.type == ImageType::Linear; const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
@ -1331,11 +1332,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
if (this_is_linear && overlap_is_linear) { if (this_is_linear && overlap_is_linear) {
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
// Alias linear images with the same pitch // Alias linear images with the same pitch
left_aliased_ids.push_back(overlap_id); join_left_aliased_ids.push_back(overlap_id);
} }
return; return;
} }
overlaps_found.insert(overlap_id); join_overlaps_found.insert(overlap_id);
static constexpr bool strict_size = true; static constexpr bool strict_size = true;
const std::optional<OverlapResult> solution = ResolveOverlap( const std::optional<OverlapResult> solution = ResolveOverlap(
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@ -1343,33 +1344,33 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
gpu_addr = solution->gpu_addr; gpu_addr = solution->gpu_addr;
cpu_addr = solution->cpu_addr; cpu_addr = solution->cpu_addr;
new_info.resources = solution->resources; new_info.resources = solution->resources;
overlap_ids.push_back(overlap_id); join_overlap_ids.push_back(overlap_id);
all_siblings.push_back(overlap_id); join_copies_to_do.emplace_back(JoinCopy{false, overlap_id});
return; return;
} }
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
left_aliased_ids.push_back(overlap_id); join_left_aliased_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::Alias; overlap.flags |= ImageFlagBits::Alias;
all_siblings.push_back(overlap_id); join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
broken_views, native_bgr)) { broken_views, native_bgr)) {
right_aliased_ids.push_back(overlap_id); join_right_aliased_ids.push_back(overlap_id);
overlap.flags |= ImageFlagBits::Alias; overlap.flags |= ImageFlagBits::Alias;
all_siblings.push_back(overlap_id); join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
} else { } else {
bad_overlap_ids.push_back(overlap_id); join_bad_overlap_ids.push_back(overlap_id);
} }
}; };
ForEachImageInRegion(cpu_addr, size_bytes, region_check); ForEachImageInRegion(cpu_addr, size_bytes, region_check);
const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
if (!overlaps_found.contains(overlap_id)) { if (!join_overlaps_found.contains(overlap_id)) {
if (True(overlap.flags & ImageFlagBits::Remapped)) { if (True(overlap.flags & ImageFlagBits::Remapped)) {
ignore_textures.insert(overlap_id); join_ignore_textures.insert(overlap_id);
} }
if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
ignore_textures.insert(overlap_id); join_ignore_textures.insert(overlap_id);
} }
} }
}; };
@ -1377,11 +1378,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
bool can_rescale = info.rescaleable; bool can_rescale = info.rescaleable;
bool any_rescaled = false; bool any_rescaled = false;
for (const ImageId sibling_id : all_siblings) { for (const auto& copy : join_copies_to_do) {
if (!can_rescale) { if (!can_rescale) {
break; break;
} }
Image& sibling = slot_images[sibling_id]; Image& sibling = slot_images[copy.id];
can_rescale &= ImageCanRescale(sibling); can_rescale &= ImageCanRescale(sibling);
any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
} }
@ -1389,13 +1390,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
can_rescale &= any_rescaled; can_rescale &= any_rescaled;
if (can_rescale) { if (can_rescale) {
for (const ImageId sibling_id : all_siblings) { for (const auto& copy : join_copies_to_do) {
Image& sibling = slot_images[sibling_id]; Image& sibling = slot_images[copy.id];
ScaleUp(sibling); ScaleUp(sibling);
} }
} else { } else {
for (const ImageId sibling_id : all_siblings) { for (const auto& copy : join_copies_to_do) {
Image& sibling = slot_images[sibling_id]; Image& sibling = slot_images[copy.id];
ScaleDown(sibling); ScaleDown(sibling);
} }
} }
@ -1407,7 +1408,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
new_image.flags |= ImageFlagBits::Sparse; new_image.flags |= ImageFlagBits::Sparse;
} }
for (const ImageId overlap_id : ignore_textures) { for (const ImageId overlap_id : join_ignore_textures) {
Image& overlap = slot_images[overlap_id]; Image& overlap = slot_images[overlap_id];
if (True(overlap.flags & ImageFlagBits::GpuModified)) { if (True(overlap.flags & ImageFlagBits::GpuModified)) {
UNIMPLEMENTED(); UNIMPLEMENTED();
@ -1428,14 +1429,60 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
ScaleDown(new_image); ScaleDown(new_image);
} }
std::ranges::sort(overlap_ids, [this](const ImageId lhs, const ImageId rhs) { std::ranges::sort(join_copies_to_do, [this](const JoinCopy& lhs, const JoinCopy& rhs) {
const ImageBase& lhs_image = slot_images[lhs]; const ImageBase& lhs_image = slot_images[lhs.id];
const ImageBase& rhs_image = slot_images[rhs]; const ImageBase& rhs_image = slot_images[rhs.id];
return lhs_image.modification_tick < rhs_image.modification_tick; return lhs_image.modification_tick < rhs_image.modification_tick;
}); });
for (const ImageId overlap_id : overlap_ids) { ImageBase& new_image_base = new_image;
Image& overlap = slot_images[overlap_id]; for (const ImageId aliased_id : join_right_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
size_t alias_index = new_image_base.aliased_images.size();
if (!AddImageAlias(new_image_base, aliased, new_image_id, aliased_id)) {
continue;
}
join_alias_indices.emplace(aliased_id, alias_index);
new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : join_left_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
size_t alias_index = new_image_base.aliased_images.size();
if (!AddImageAlias(aliased, new_image_base, aliased_id, new_image_id)) {
continue;
}
join_alias_indices.emplace(aliased_id, alias_index);
new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : join_bad_overlap_ids) {
ImageBase& aliased = slot_images[aliased_id];
aliased.overlapping_images.push_back(new_image_id);
new_image.overlapping_images.push_back(aliased_id);
if (aliased.info.resources.levels == 1 && aliased.info.block.depth == 0 &&
aliased.overlapping_images.size() > 1) {
aliased.flags |= ImageFlagBits::BadOverlap;
}
if (new_image.info.resources.levels == 1 && new_image.info.block.depth == 0 &&
new_image.overlapping_images.size() > 1) {
new_image.flags |= ImageFlagBits::BadOverlap;
}
}
for (const auto& copy_object : join_copies_to_do) {
Image& overlap = slot_images[copy_object.id];
if (copy_object.is_alias) {
if (!overlap.IsSafeDownload()) {
continue;
}
const auto alias_pointer = join_alias_indices.find(copy_object.id);
if (alias_pointer == join_alias_indices.end()) {
continue;
}
const AliasedImage& aliased = new_image.aliased_images[alias_pointer->second];
CopyImage(new_image_id, aliased.id, aliased.copies);
new_image.modification_tick = overlap.modification_tick;
continue;
}
if (True(overlap.flags & ImageFlagBits::GpuModified)) { if (True(overlap.flags & ImageFlagBits::GpuModified)) {
new_image.flags |= ImageFlagBits::GpuModified; new_image.flags |= ImageFlagBits::GpuModified;
const auto& resolution = Settings::values.resolution_info; const auto& resolution = Settings::values.resolution_info;
@ -1448,35 +1495,15 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
} else { } else {
runtime.CopyImage(new_image, overlap, std::move(copies)); runtime.CopyImage(new_image, overlap, std::move(copies));
} }
new_image.modification_tick = overlap.modification_tick;
} }
if (True(overlap.flags & ImageFlagBits::Tracked)) { if (True(overlap.flags & ImageFlagBits::Tracked)) {
UntrackImage(overlap, overlap_id); UntrackImage(overlap, copy_object.id);
}
UnregisterImage(overlap_id);
DeleteImage(overlap_id);
}
ImageBase& new_image_base = new_image;
for (const ImageId aliased_id : right_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : left_aliased_ids) {
ImageBase& aliased = slot_images[aliased_id];
AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
new_image.flags |= ImageFlagBits::Alias;
}
for (const ImageId aliased_id : bad_overlap_ids) {
ImageBase& aliased = slot_images[aliased_id];
aliased.overlapping_images.push_back(new_image_id);
new_image.overlapping_images.push_back(aliased_id);
if (aliased.info.resources.levels == 1 && aliased.overlapping_images.size() > 1) {
aliased.flags |= ImageFlagBits::BadOverlap;
}
if (new_image.info.resources.levels == 1 && new_image.overlapping_images.size() > 1) {
new_image.flags |= ImageFlagBits::BadOverlap;
} }
UnregisterImage(copy_object.id);
DeleteImage(copy_object.id);
} }
RegisterImage(new_image_id); RegisterImage(new_image_id);
return new_image_id; return new_image_id;
} }

View File

@ -10,7 +10,9 @@
#include <span> #include <span>
#include <type_traits> #include <type_traits>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp>
#include <queue> #include <queue>
#include "common/common_types.h" #include "common/common_types.h"
@ -476,6 +478,20 @@ private:
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"}; Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes; std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
// Join caching
boost::container::small_vector<ImageId, 4> join_overlap_ids;
std::unordered_set<ImageId> join_overlaps_found;
boost::container::small_vector<ImageId, 4> join_left_aliased_ids;
boost::container::small_vector<ImageId, 4> join_right_aliased_ids;
std::unordered_set<ImageId> join_ignore_textures;
boost::container::small_vector<ImageId, 4> join_bad_overlap_ids;
struct JoinCopy {
bool is_alias;
ImageId id;
};
boost::container::small_vector<JoinCopy, 4> join_copies_to_do;
std::unordered_map<ImageId, size_t> join_alias_indices;
}; };
} // namespace VideoCommon } // namespace VideoCommon