Implemented Tile Width Spacing
This commit is contained in:
parent
f9a211220c
commit
ddfbe0b58d
|
@ -68,13 +68,13 @@ void Fermi2D::HandleSurfaceCopy() {
|
|||
Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
|
||||
src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
|
||||
dst_buffer, true, regs.src.BlockHeight(),
|
||||
regs.src.BlockDepth());
|
||||
regs.src.BlockDepth(), 0);
|
||||
} else {
|
||||
// If the input is linear and the output is tiled, swizzle the input and copy it over.
|
||||
Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
|
||||
src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
|
||||
src_buffer, false, regs.dst.BlockHeight(),
|
||||
regs.dst.BlockDepth());
|
||||
regs.dst.BlockDepth(), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,12 +16,12 @@ namespace VideoCore {
|
|||
using Surface::GetBytesPerPixel;
|
||||
using Surface::PixelFormat;
|
||||
|
||||
using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
|
||||
using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
|
||||
using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format>
|
||||
static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
|
||||
u8* buffer, std::size_t buffer_size, VAddr addr) {
|
||||
u32 tile_width_spacing, u8* buffer, std::size_t buffer_size, VAddr addr) {
|
||||
constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
|
||||
|
||||
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
|
||||
|
@ -31,12 +31,13 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
|
|||
|
||||
if constexpr (morton_to_linear) {
|
||||
Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
|
||||
stride, height, depth, block_height, block_depth);
|
||||
stride, height, depth, block_height, block_depth,
|
||||
tile_width_spacing);
|
||||
} else {
|
||||
Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
|
||||
(height + tile_size_y - 1) / tile_size_y, depth,
|
||||
bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
|
||||
buffer, false, block_height, block_depth);
|
||||
Tegra::Texture::CopySwizzledData(
|
||||
(stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y,
|
||||
depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false,
|
||||
block_height, block_depth, tile_width_spacing);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -325,11 +326,11 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
|
|||
}
|
||||
|
||||
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
|
||||
u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
|
||||
std::size_t buffer_size, VAddr addr) {
|
||||
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
|
||||
u8* buffer, std::size_t buffer_size, VAddr addr) {
|
||||
|
||||
GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer,
|
||||
buffer_size, addr);
|
||||
GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
|
||||
tile_width_spacing, buffer, buffer_size, addr);
|
||||
}
|
||||
|
||||
void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
|
||||
|
@ -350,4 +351,4 @@ void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_
|
|||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -12,10 +12,10 @@ namespace VideoCore {
|
|||
enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
|
||||
|
||||
void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
|
||||
u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
|
||||
std::size_t buffer_size, VAddr addr);
|
||||
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
|
||||
u8* buffer, std::size_t buffer_size, VAddr addr);
|
||||
|
||||
void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
|
||||
u8* morton_data, u8* linear_data, bool morton_to_linear);
|
||||
|
||||
} // namespace VideoCore
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -97,6 +97,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
|
|||
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
|
||||
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
|
||||
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
|
||||
params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
|
||||
params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
|
||||
params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
|
||||
params.srgb_conversion);
|
||||
|
@ -162,6 +163,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
|
|||
params.block_width = 1 << config.memory_layout.block_width;
|
||||
params.block_height = 1 << config.memory_layout.block_height;
|
||||
params.block_depth = 1 << config.memory_layout.block_depth;
|
||||
params.tile_width_spacing = 1;
|
||||
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
|
||||
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
|
||||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
|
||||
|
@ -197,6 +199,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
|
|||
params.block_width = 1 << std::min(block_width, 5U);
|
||||
params.block_height = 1 << std::min(block_height, 5U);
|
||||
params.block_depth = 1 << std::min(block_depth, 5U);
|
||||
params.tile_width_spacing = 1;
|
||||
params.pixel_format = PixelFormatFromDepthFormat(format);
|
||||
params.component_type = ComponentTypeFromDepthFormat(format);
|
||||
params.type = GetFormatType(params.pixel_format);
|
||||
|
@ -223,6 +226,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
|
|||
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
|
||||
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
|
||||
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
|
||||
params.tile_width_spacing = 1;
|
||||
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
|
||||
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
|
||||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
|
||||
|
@ -387,8 +391,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
|
|||
for (u32 i = 0; i < params.depth; i++) {
|
||||
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
|
||||
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
|
||||
params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size,
|
||||
params.addr + offset);
|
||||
params.MipBlockDepth(mip_level), params.tile_width_spacing, 1,
|
||||
gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
|
||||
offset += layer_size;
|
||||
offset_gl += gl_size;
|
||||
}
|
||||
|
@ -396,8 +400,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
|
|||
const u64 offset = params.GetMipmapLevelOffset(mip_level);
|
||||
MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
|
||||
params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
|
||||
params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(),
|
||||
params.addr + offset);
|
||||
params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
|
||||
gl_buffer.data(), gl_buffer.size(), params.addr + offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -196,9 +196,15 @@ struct SurfaceParams {
|
|||
|
||||
/// Checks if surfaces are compatible for caching
|
||||
bool IsCompatibleSurface(const SurfaceParams& other) const {
|
||||
return std::tie(pixel_format, type, width, height, target, depth) ==
|
||||
std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
|
||||
other.depth);
|
||||
if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) ==
|
||||
std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
|
||||
other.depth, other.is_tiled)) {
|
||||
if (!is_tiled)
|
||||
return true;
|
||||
return std::tie(block_height, block_depth, tile_width_spacing) ==
|
||||
std::tie(other.block_height, other.block_depth, other.tile_width_spacing);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Initializes parameters for caching, should be called after everything has been initialized
|
||||
|
@ -208,6 +214,7 @@ struct SurfaceParams {
|
|||
u32 block_width;
|
||||
u32 block_height;
|
||||
u32 block_depth;
|
||||
u32 tile_width_spacing;
|
||||
PixelFormat pixel_format;
|
||||
ComponentType component_type;
|
||||
SurfaceType type;
|
||||
|
|
|
@ -127,7 +127,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
|
|||
template <bool fast>
|
||||
void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
|
||||
const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel,
|
||||
const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) {
|
||||
const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth,
|
||||
const u32 width_spacing) {
|
||||
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
|
||||
const u32 stride_x = width * out_bytes_per_pixel;
|
||||
const u32 layer_z = height * stride_x;
|
||||
|
@ -137,7 +138,8 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
|
|||
const u32 block_x_elements = gob_elements_x;
|
||||
const u32 block_y_elements = gob_elements_y * block_height;
|
||||
const u32 block_z_elements = gob_elements_z * block_depth;
|
||||
const u32 blocks_on_x = div_ceil(width, block_x_elements);
|
||||
const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing);
|
||||
const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements);
|
||||
const u32 blocks_on_y = div_ceil(height, block_y_elements);
|
||||
const u32 blocks_on_z = div_ceil(depth, block_z_elements);
|
||||
const u32 xy_block_size = gob_size * block_height;
|
||||
|
@ -169,13 +171,15 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
|
|||
|
||||
void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
|
||||
u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
|
||||
bool unswizzle, u32 block_height, u32 block_depth) {
|
||||
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
|
||||
if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) {
|
||||
SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
|
||||
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
|
||||
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
|
||||
width_spacing);
|
||||
} else {
|
||||
SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
|
||||
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
|
||||
bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth,
|
||||
width_spacing);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -228,19 +232,19 @@ u32 BytesPerPixel(TextureFormat format) {
|
|||
|
||||
void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
|
||||
u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
|
||||
u32 block_depth) {
|
||||
u32 block_depth, u32 width_spacing) {
|
||||
CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
|
||||
(height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
|
||||
bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
|
||||
block_height, block_depth);
|
||||
block_height, block_depth, width_spacing);
|
||||
}
|
||||
|
||||
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
|
||||
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
|
||||
u32 block_height, u32 block_depth) {
|
||||
u32 block_height, u32 block_depth, u32 width_spacing) {
|
||||
std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
|
||||
UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
|
||||
width, height, depth, block_height, block_depth);
|
||||
width, height, depth, block_height, block_depth, width_spacing);
|
||||
return unswizzled_data;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,19 +22,20 @@ inline std::size_t GetGOBSize() {
|
|||
void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
|
||||
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
|
||||
u32 block_height = TICEntry::DefaultBlockHeight,
|
||||
u32 block_depth = TICEntry::DefaultBlockHeight);
|
||||
u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
|
||||
/**
|
||||
* Unswizzles a swizzled texture without changing its format.
|
||||
*/
|
||||
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
|
||||
u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
|
||||
u32 block_height = TICEntry::DefaultBlockHeight,
|
||||
u32 block_depth = TICEntry::DefaultBlockHeight);
|
||||
u32 block_depth = TICEntry::DefaultBlockHeight,
|
||||
u32 width_spacing = 0);
|
||||
|
||||
/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.
|
||||
void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
|
||||
u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
|
||||
bool unswizzle, u32 block_height, u32 block_depth);
|
||||
bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
|
||||
|
||||
/**
|
||||
* Decodes an unswizzled texture into a A8R8G8B8 texture.
|
||||
|
|
|
@ -166,6 +166,8 @@ struct TICEntry {
|
|||
BitField<3, 3, u32> block_height;
|
||||
BitField<6, 3, u32> block_depth;
|
||||
|
||||
BitField<10, 3, u32> tile_width_spacing;
|
||||
|
||||
// High 16 bits of the pitch value
|
||||
BitField<0, 16, u32> pitch_high;
|
||||
BitField<26, 1, u32> use_header_opt_control;
|
||||
|
|
Reference in New Issue