Implement Block Linear copies in Kepler Memory.
This commit is contained in:
parent
8a099ac99f
commit
bec28d692d
|
@ -10,7 +10,6 @@
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
#include "video_core/textures/convert.h"
|
|
||||||
#include "video_core/textures/decoders.h"
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
@ -47,13 +46,12 @@ void KeplerMemory::ProcessExec() {
|
||||||
|
|
||||||
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
|
void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
|
||||||
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
|
const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
|
||||||
std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size);
|
std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size);
|
||||||
state.write_offset += sub_copy_size;
|
state.write_offset += sub_copy_size;
|
||||||
if (is_last_call) {
|
if (is_last_call) {
|
||||||
UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented");
|
|
||||||
if (regs.exec.linear != 0) {
|
|
||||||
const GPUVAddr address{regs.dest.Address()};
|
const GPUVAddr address{regs.dest.Address()};
|
||||||
const auto host_ptr = memory_manager.GetPointer(address);
|
const auto host_ptr = memory_manager.GetPointer(address);
|
||||||
|
if (regs.exec.linear != 0) {
|
||||||
// We have to invalidate the destination region to evict any outdated surfaces from the
|
// We have to invalidate the destination region to evict any outdated surfaces from the
|
||||||
// cache. We do this before actually writing the new data because the destination
|
// cache. We do this before actually writing the new data because the destination
|
||||||
// address might contain a dirty surface that will have to be written back to memory.
|
// address might contain a dirty surface that will have to be written back to memory.
|
||||||
|
@ -61,6 +59,17 @@ void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
|
||||||
rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size);
|
rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size);
|
||||||
std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size);
|
std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size);
|
||||||
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
|
||||||
|
} else {
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.z != 0);
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.depth != 1);
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
|
||||||
|
UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
|
||||||
|
const std::size_t dst_size = Tegra::Texture::CalculateSize(
|
||||||
|
true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
|
||||||
|
rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), dst_size);
|
||||||
|
Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
|
||||||
|
regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
|
||||||
|
state.inner_buffer.data(), host_ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -288,6 +288,27 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
|
||||||
|
std::size_t copy_size, u8* source_data, u8* swizzle_data) {
|
||||||
|
const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
|
||||||
|
std::size_t count = 0;
|
||||||
|
for (u32 y = dst_y; y < height && count < copy_size; ++y) {
|
||||||
|
const u32 gob_address_y =
|
||||||
|
(y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
|
||||||
|
((y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
|
||||||
|
const auto& table = legacy_swizzle_table[y % gob_size_y];
|
||||||
|
for (u32 x = dst_x; x < width && count < copy_size; ++x) {
|
||||||
|
const u32 gob_address = gob_address_y + (x / gob_size_x) * gob_size * block_height;
|
||||||
|
const u32 swizzled_offset = gob_address + table[x % gob_size_x];
|
||||||
|
const u8* source_line = source_data + count;
|
||||||
|
u8* dest_addr = swizzle_data + swizzled_offset;
|
||||||
|
count++;
|
||||||
|
|
||||||
|
std::memcpy(dest_addr, source_line, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
|
std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
|
||||||
u32 height) {
|
u32 height) {
|
||||||
std::vector<u8> rgba_data;
|
std::vector<u8> rgba_data;
|
||||||
|
|
|
@ -51,4 +51,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
|
||||||
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
|
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
|
||||||
u32 offset_x, u32 offset_y);
|
u32 offset_x, u32 offset_y);
|
||||||
|
|
||||||
|
void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
|
||||||
|
std::size_t copy_size, u8* source_data, u8* swizzle_data);
|
||||||
|
|
||||||
} // namespace Tegra::Texture
|
} // namespace Tegra::Texture
|
||||||
|
|
Reference in New Issue