citra-emu
/
citra-canary
Archived
1
0
Fork 0

Memory: Remove count of cached pages and add InvalidateRegion

In a future commit, the count of cached pages will be reintroduced in
the actual surface cache. Also adds an Invalidate only to the cache
which marks a region as invalid in order to try to avoid a costly flush
from 3ds memory
This commit is contained in:
James Rowe 2017-11-23 10:43:12 -07:00
parent c821c14908
commit 13606a6d0b
8 changed files with 50 additions and 54 deletions

View File

@ -58,7 +58,6 @@ void VMManager::Reset() {
page_table.pointers.fill(nullptr);
page_table.attributes.fill(Memory::PageType::Unmapped);
page_table.cached_res_count.fill(0);
UpdatePageTableForVMA(initial_vma);
}

View File

@ -423,7 +423,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
command.dma_request.size, Memory::FlushMode::Flush);
Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address,
command.dma_request.size,
Memory::FlushMode::FlushAndInvalidate);
Memory::FlushMode::Invalidate);
// TODO(Subv): These memory accesses should not go through the application's memory mapping.
// They should go through the GSP module's memory mapping.

View File

@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) {
u8* start = Memory::GetPhysicalPointer(start_addr);
u8* end = Memory::GetPhysicalPointer(end_addr);
// TODO: Consider always accelerating and returning vector of
// regions that the accelerated fill did not cover to
// reduce/eliminate the fill that the cpu has to do.
// This would also mean that the flush below is not needed.
// Fill should first flush all surfaces that touch but are
// not completely within the fill range.
// Then fill all completely covered surfaces, and return the
// regions that were between surfaces or within the touching
// ones for cpu to manually fill here.
if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
return;
Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
config.GetEndAddress() - config.GetStartAddress());
Memory::RasterizerInvalidateRegion(config.GetStartAddress(),
config.GetEndAddress() - config.GetStartAddress());
if (config.fill_24bit) {
// fill with 24-bit values
@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) {
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
@ -363,8 +354,10 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {
size_t contiguous_output_size =
config.texture_copy.size / output_width * (output_width + output_gap);
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
static_cast<u32>(contiguous_output_size));
// Only need to flush output if it has a gap
const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion
: Memory::RasterizerInvalidateRegion;
FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
u32 remaining_input = input_width;
u32 remaining_output = output_width;
@ -446,16 +439,18 @@ inline void Write(u32 addr, const T data) {
if (config.is_texture_copy) {
TextureCopy(config);
LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> "
"0x%08X(%u+%u), flags 0x%08X",
LOG_TRACE(HW_GPU,
"TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> "
"0x%08X(%u+%u), flags 0x%08X",
config.texture_copy.size, config.GetPhysicalInputAddress(),
config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16,
config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16,
config.texture_copy.output_gap * 16, config.flags);
} else {
DisplayTransfer(config);
LOG_TRACE(HW_GPU, "DisplayTransfer: 0x%08x(%ux%u)-> "
"0x%08x(%ux%u), dst format %x, flags 0x%08X",
LOG_TRACE(HW_GPU,
"DisplayTransfer: 0x%08x(%ux%u)-> "
"0x%08x(%ux%u), dst format %x, flags 0x%08X",
config.GetPhysicalInputAddress(), config.input_width.Value(),
config.input_height.Value(), config.GetPhysicalOutputAddress(),
config.output_width.Value(), config.output_height.Value(),
@ -570,4 +565,4 @@ void Shutdown() {
LOG_DEBUG(HW_GPU, "shutdown OK");
}
} // namespace
} // namespace GPU

View File

@ -50,7 +50,6 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page
page_table.attributes[base] = type;
page_table.pointers[base] = memory;
page_table.cached_res_count[base] = 0;
base += 1;
if (memory != nullptr)
@ -200,7 +199,7 @@ void Write(const VAddr vaddr, const T data) {
ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
break;
case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate);
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
break;
}
@ -208,7 +207,7 @@ void Write(const VAddr vaddr, const T data) {
WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
break;
case PageType::RasterizerCachedSpecial: {
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate);
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
break;
}
@ -334,7 +333,7 @@ u8* GetPhysicalPointer(PAddr address) {
return target_pointer;
}
void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) {
if (start == 0) {
return;
}
@ -355,14 +354,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
}
VAddr vaddr = *maybe_vaddr;
u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS];
ASSERT_MSG(count_delta <= UINT8_MAX - res_count,
"Rasterizer resource cache counter overflow!");
ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
// Switch page type to cached if now cached
if (res_count == 0) {
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
if (cached) {
// Switch page type to cached if now cached
switch (page_type) {
case PageType::Unmapped:
// It is not necessary for a process to have this region mapped into its address
@ -378,13 +373,8 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
default:
UNREACHABLE();
}
}
res_count += count_delta;
// Switch page type to uncached if now uncached
if (res_count == 0) {
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
} else {
// Switch page type to uncached if now uncached
switch (page_type) {
case PageType::Unmapped:
// It is not necessary for a process to have this region mapped into its address
@ -419,6 +409,12 @@ void RasterizerFlushRegion(PAddr start, u32 size) {
}
}
void RasterizerInvalidateRegion(PAddr start, u32 size) {
if (VideoCore::g_renderer != nullptr) {
VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size);
}
}
void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
// Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
// null here
@ -450,6 +446,9 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) {
case FlushMode::Flush:
rasterizer->FlushRegion(physical_start, overlap_size);
break;
case FlushMode::Invalidate:
rasterizer->InvalidateRegion(physical_start, overlap_size);
break;
case FlushMode::FlushAndInvalidate:
rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size);
break;
@ -588,7 +587,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
}
case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::FlushAndInvalidate);
FlushMode::Invalidate);
std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
break;
}
@ -596,7 +595,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
DEBUG_ASSERT(handler);
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::FlushAndInvalidate);
FlushMode::Invalidate);
handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
break;
}
@ -648,7 +647,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size
}
case PageType::RasterizerCachedMemory: {
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::FlushAndInvalidate);
FlushMode::Invalidate);
std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
break;
}
@ -656,7 +655,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
DEBUG_ASSERT(handler);
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
FlushMode::FlushAndInvalidate);
FlushMode::Invalidate);
handler->WriteBlock(current_vaddr, zeros.data(), copy_amount);
break;
}

View File

@ -72,12 +72,6 @@ struct PageTable {
* the corresponding entry in `pointers` MUST be set to null.
*/
std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes;
/**
* Indicates the number of externally cached resources touching a page that should be
* flushed before the memory is accessed
*/
std::array<u8, PAGE_TABLE_NUM_ENTRIES> cached_res_count;
};
/// Physical memory regions as seen from the ARM11
@ -245,16 +239,20 @@ boost::optional<VAddr> PhysicalToVirtualAddress(PAddr addr);
u8* GetPhysicalPointer(PAddr address);
/**
* Adds the supplied value to the rasterizer resource cache counter of each
* page touching the region.
* Mark each page touching the region as cached.
*/
void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta);
void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached);
/**
* Flushes any externally cached rasterizer resources touching the given region.
*/
void RasterizerFlushRegion(PAddr start, u32 size);
/**
* Invalidates any externally cached rasterizer resources touching the given region.
*/
void RasterizerInvalidateRegion(PAddr start, u32 size);
/**
* Flushes and invalidates any externally cached rasterizer resources touching the given region.
*/
@ -263,6 +261,8 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
enum class FlushMode {
/// Write back modified surfaces to RAM
Flush,
/// Remove region from the cache
Invalidate,
/// Write back modified surfaces to RAM, and also remove them from the cache
FlushAndInvalidate,
};

View File

@ -20,7 +20,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
page_table->pointers.fill(nullptr);
page_table->attributes.fill(Memory::PageType::Unmapped);
page_table->cached_res_count.fill(0);
Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);

View File

@ -38,6 +38,9 @@ public:
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
virtual void FlushRegion(PAddr addr, u32 size) = 0;
/// Notify rasterizer that any caches of the specified region should be invalidated
virtual void InvalidateRegion(PAddr addr, u32 size) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
/// and invalidated
virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;

View File

@ -11,7 +11,7 @@ namespace Pica {
namespace Shader {
struct OutputVertex;
}
}
} // namespace Pica
namespace VideoCore {
@ -22,6 +22,7 @@ class SWRasterizer : public RasterizerInterface {
void NotifyPicaRegisterChanged(u32 id) override {}
void FlushAll() override {}
void FlushRegion(PAddr addr, u32 size) override {}
void InvalidateRegion(PAddr addr, u32 size) override {}
void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
};