citra-emu
/
citra
Archived
1
0
Fork 0

gpu: move MemoryFill, TextureCopy and DisplayTransfer into functions

The old code indented too much to read. Split into functions and do general cleanup.
This commit is contained in:
wwylele 2016-09-27 18:38:42 +08:00
parent d2419570b9
commit c88cdc9a2b
1 changed files with 248 additions and 246 deletions

View File

@ -80,29 +80,7 @@ static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_
MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100)); MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100));
template <typename T> static void MemoryFill(const Regs::MemoryFillConfig& config) {
inline void Write(u32 addr, const T data) {
addr -= HW::VADDR_GPU;
u32 index = addr / 4;
// Writes other than u32 are untested, so I'd rather have them abort than silently fail
if (index >= Regs::NumIds() || !std::is_same<T, u32>::value) {
LOG_ERROR(HW_GPU, "unknown Write%lu 0x%08X @ 0x%08X", sizeof(data) * 8, (u32)data, addr);
return;
}
g_regs[index] = static_cast<u32>(data);
switch (index) {
// Memory fills are triggered once the fill value is written.
case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3):
case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): {
const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
auto& config = g_regs.memory_fill_config[is_second_filler];
if (config.trigger) {
if (config.address_start) { // Some games pass invalid values here
u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
@ -115,10 +93,11 @@ inline void Write(u32 addr, const T data) {
// Then fill all completely covered surfaces, and return the // Then fill all completely covered surfaces, and return the
// regions that were between surfaces or within the touching // regions that were between surfaces or within the touching
// ones for cpu to manually fill here. // ones for cpu to manually fill here.
if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) { if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
return;
Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
config.GetEndAddress() - config.GetEndAddress() - config.GetStartAddress());
config.GetStartAddress());
if (config.fill_24bit) { if (config.fill_24bit) {
// fill with 24-bit values // fill with 24-bit values
@ -141,104 +120,26 @@ inline void Write(u32 addr, const T data) {
for (u8* ptr = start; ptr < end; ptr += sizeof(u16)) for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
memcpy(ptr, &value_16bit, sizeof(u16)); memcpy(ptr, &value_16bit, sizeof(u16));
} }
} }
LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), static void DisplayTransfer(const Regs::DisplayTransferConfig& config) {
config.GetEndAddress()); if (VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config))
return;
if (!is_second_filler) {
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
} else {
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
}
}
// Reset "trigger" flag and set the "finish" flag
// NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
config.trigger.Assign(0);
config.finished.Assign(1);
}
break;
}
case GPU_REG_INDEX(display_transfer_config.trigger): {
MICROPROFILE_SCOPE(GPU_DisplayTransfer);
const auto& config = g_regs.display_transfer_config;
if (config.trigger & 1) {
if (Pica::g_debug_context)
Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
nullptr);
if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
if (config.is_texture_copy) {
u32 input_width = config.texture_copy.input_width * 16;
u32 input_gap = config.texture_copy.input_gap * 16;
u32 output_width = config.texture_copy.output_width * 16;
u32 output_gap = config.texture_copy.output_gap * 16;
size_t contiguous_input_size =
config.texture_copy.size / input_width * (input_width + input_gap);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
static_cast<u32>(contiguous_input_size));
size_t contiguous_output_size =
config.texture_copy.size / output_width * (output_width + output_gap);
Memory::RasterizerFlushAndInvalidateRegion(
config.GetPhysicalOutputAddress(),
static_cast<u32>(contiguous_output_size));
u32 remaining_size = config.texture_copy.size;
u32 remaining_input = input_width;
u32 remaining_output = output_width;
while (remaining_size > 0) {
u32 copy_size =
std::min({remaining_input, remaining_output, remaining_size});
std::memcpy(dst_pointer, src_pointer, copy_size);
src_pointer += copy_size;
dst_pointer += copy_size;
remaining_input -= copy_size;
remaining_output -= copy_size;
remaining_size -= copy_size;
if (remaining_input == 0) {
remaining_input = input_width;
src_pointer += input_gap;
}
if (remaining_output == 0) {
remaining_output = output_width;
dst_pointer += output_gap;
}
}
LOG_TRACE(
HW_GPU,
"TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
config.texture_copy.size, config.GetPhysicalInputAddress(), input_width,
input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap,
config.flags);
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
break;
}
if (config.scaling > config.ScaleXY) { if (config.scaling > config.ScaleXY) {
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u",
config.scaling.Value()); config.scaling.Value());
UNIMPLEMENTED(); UNIMPLEMENTED();
break; return;
} }
if (config.input_linear && config.scaling != config.NoScale) { if (config.input_linear && config.scaling != config.NoScale) {
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
UNIMPLEMENTED(); UNIMPLEMENTED();
break; return;
} }
int horizontal_scale = config.scaling != config.NoScale ? 1 : 0; int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
@ -247,14 +148,12 @@ inline void Write(u32 addr, const T data) {
u32 output_width = config.output_width >> horizontal_scale; u32 output_width = config.output_width >> horizontal_scale;
u32 output_height = config.output_height >> vertical_scale; u32 output_height = config.output_height >> vertical_scale;
u32 input_size = config.input_width * config.input_height * u32 input_size =
GPU::Regs::BytesPerPixel(config.input_format); config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
u32 output_size = u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
output_size);
for (u32 y = 0; y < output_height; ++y) { for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) { for (u32 x = 0; x < output_width; ++x) {
@ -283,14 +182,12 @@ inline void Write(u32 addr, const T data) {
u32 coarse_y = y & ~7; u32 coarse_y = y & ~7;
u32 stride = output_width * dst_bytes_per_pixel; u32 stride = output_width * dst_bytes_per_pixel;
src_offset = src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
(input_x + input_y * config.input_width) * src_bytes_per_pixel; dst_offset =
dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
coarse_y * stride;
} else { } else {
// Both input and output are linear // Both input and output are linear
src_offset = src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
(input_x + input_y * config.input_width) * src_bytes_per_pixel;
dst_offset = (x + y * output_width) * dst_bytes_per_pixel; dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
} }
} else { } else {
@ -299,8 +196,7 @@ inline void Write(u32 addr, const T data) {
u32 coarse_y = input_y & ~7; u32 coarse_y = input_y & ~7;
u32 stride = config.input_width * src_bytes_per_pixel; u32 stride = config.input_width * src_bytes_per_pixel;
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
src_bytes_per_pixel) +
coarse_y * stride; coarse_y * stride;
dst_offset = (x + y * output_width) * dst_bytes_per_pixel; dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
} else { } else {
@ -311,8 +207,7 @@ inline void Write(u32 addr, const T data) {
u32 in_coarse_y = input_y & ~7; u32 in_coarse_y = input_y & ~7;
u32 in_stride = config.input_width * src_bytes_per_pixel; u32 in_stride = config.input_width * src_bytes_per_pixel;
src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) +
src_bytes_per_pixel) +
in_coarse_y * in_stride; in_coarse_y * in_stride;
dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
out_coarse_y * out_stride; out_coarse_y * out_stride;
@ -326,12 +221,12 @@ inline void Write(u32 addr, const T data) {
DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
src_color = ((src_color + pixel) / 2).Cast<u8>(); src_color = ((src_color + pixel) / 2).Cast<u8>();
} else if (config.scaling == config.ScaleXY) { } else if (config.scaling == config.ScaleXY) {
Math::Vec4<u8> pixel1 = DecodePixel( Math::Vec4<u8> pixel1 =
config.input_format, src_pixel + 1 * src_bytes_per_pixel); DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
Math::Vec4<u8> pixel2 = DecodePixel( Math::Vec4<u8> pixel2 =
config.input_format, src_pixel + 2 * src_bytes_per_pixel); DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
Math::Vec4<u8> pixel3 = DecodePixel( Math::Vec4<u8> pixel3 =
config.input_format, src_pixel + 3 * src_bytes_per_pixel); DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
} }
@ -364,14 +259,121 @@ inline void Write(u32 addr, const T data) {
} }
} }
} }
}
LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> " static void TextureCopy(const Regs::DisplayTransferConfig& config) {
if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config))
return;
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
u32 input_width = config.texture_copy.input_width * 16;
u32 input_gap = config.texture_copy.input_gap * 16;
u32 output_width = config.texture_copy.output_width * 16;
u32 output_gap = config.texture_copy.output_gap * 16;
size_t contiguous_input_size =
config.texture_copy.size / input_width * (input_width + input_gap);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
static_cast<u32>(contiguous_input_size));
size_t contiguous_output_size =
config.texture_copy.size / output_width * (output_width + output_gap);
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
static_cast<u32>(contiguous_output_size));
u32 remaining_size = config.texture_copy.size;
u32 remaining_input = input_width;
u32 remaining_output = output_width;
while (remaining_size > 0) {
u32 copy_size = std::min({remaining_input, remaining_output, remaining_size});
std::memcpy(dst_pointer, src_pointer, copy_size);
src_pointer += copy_size;
dst_pointer += copy_size;
remaining_input -= copy_size;
remaining_output -= copy_size;
remaining_size -= copy_size;
if (remaining_input == 0) {
remaining_input = input_width;
src_pointer += input_gap;
}
if (remaining_output == 0) {
remaining_output = output_width;
dst_pointer += output_gap;
}
}
}
template <typename T>
inline void Write(u32 addr, const T data) {
addr -= HW::VADDR_GPU;
u32 index = addr / 4;
// Writes other than u32 are untested, so I'd rather have them abort than silently fail
if (index >= Regs::NumIds() || !std::is_same<T, u32>::value) {
LOG_ERROR(HW_GPU, "unknown Write%lu 0x%08X @ 0x%08X", sizeof(data) * 8, (u32)data, addr);
return;
}
g_regs[index] = static_cast<u32>(data);
switch (index) {
// Memory fills are triggered once the fill value is written.
case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3):
case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): {
const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
auto& config = g_regs.memory_fill_config[is_second_filler];
if (config.trigger) {
MemoryFill(config);
LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(),
config.GetEndAddress());
if (!is_second_filler) {
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
} else {
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
}
// Reset "trigger" flag and set the "finish" flag
// NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
config.trigger.Assign(0);
config.finished.Assign(1);
}
break;
}
case GPU_REG_INDEX(display_transfer_config.trigger): {
MICROPROFILE_SCOPE(GPU_DisplayTransfer);
const auto& config = g_regs.display_transfer_config;
if (config.trigger & 1) {
if (Pica::g_debug_context)
Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
nullptr);
if (config.is_texture_copy) {
TextureCopy(config);
LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> "
"0x%08X(%u+%u), flags 0x%08X",
config.texture_copy.size, config.GetPhysicalInputAddress(),
config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16,
config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16,
config.texture_copy.output_gap * 16, config.flags);
} else {
DisplayTransfer(config);
LOG_TRACE(HW_GPU, "DisplayTransfer: 0x%08x(%ux%u)-> "
"0x%08x(%ux%u), dst format %x, flags 0x%08X", "0x%08x(%ux%u), dst format %x, flags 0x%08X",
config.output_height * output_width *
GPU::Regs::BytesPerPixel(config.output_format),
config.GetPhysicalInputAddress(), config.input_width.Value(), config.GetPhysicalInputAddress(), config.input_width.Value(),
config.input_height.Value(), config.GetPhysicalOutputAddress(), config.input_height.Value(), config.GetPhysicalOutputAddress(),
output_width, output_height, config.output_format.Value(), config.flags); config.output_width.Value(), config.output_height.Value(),
config.output_format.Value(), config.flags);
} }
g_regs.display_transfer_config.trigger = 0; g_regs.display_transfer_config.trigger = 0;