SMMU: Implement physical memory mirroring
This commit is contained in:
parent
0a2536a0df
commit
34a8d0cc8e
|
@ -10,8 +10,10 @@
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "common/scratch_buffer.h"
|
||||||
#include "common/virtual_buffer.h"
|
#include "common/virtual_buffer.h"
|
||||||
|
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
class DeviceMemory;
|
class DeviceMemory;
|
||||||
|
@ -49,9 +51,25 @@ public:
|
||||||
template <typename T>
|
template <typename T>
|
||||||
const T* GetPointer(DAddr address) const;
|
const T* GetPointer(DAddr address) const;
|
||||||
|
|
||||||
DAddr GetAddressFromPAddr(PAddr address) const {
|
template <typename Func>
|
||||||
|
void ApplyOpOnPAddr(PAddr address, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
|
||||||
DAddr subbits = static_cast<DAddr>(address & page_mask);
|
DAddr subbits = static_cast<DAddr>(address & page_mask);
|
||||||
return (static_cast<DAddr>(compressed_device_addr[(address >> page_bits)]) << page_bits) + subbits;
|
const u32 base = compressed_device_addr[(address >> page_bits)];
|
||||||
|
if ((base >> MULTI_FLAG_BITS) == 0) [[likely]] {
|
||||||
|
const DAddr d_address = static_cast<DAddr>(base << page_bits) + subbits;
|
||||||
|
operation(d_address);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
InnerGatherDeviceAddresses(buffer, address);
|
||||||
|
for (u32 value : buffer) {
|
||||||
|
operation(static_cast<DAddr>(value << page_bits) + subbits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
void ApplyOpOnPointer(const u8* p, Common::ScratchBuffer<u32>& buffer, Func&& operation) {
|
||||||
|
PAddr address = GetRawPhysicalAddr<u8>(p);
|
||||||
|
ApplyOpOnPAddr(address, buffer, operation);
|
||||||
}
|
}
|
||||||
|
|
||||||
PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const {
|
PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const {
|
||||||
|
@ -98,6 +116,9 @@ private:
|
||||||
static constexpr size_t page_size = 1ULL << page_bits;
|
static constexpr size_t page_size = 1ULL << page_bits;
|
||||||
static constexpr size_t page_mask = page_size - 1ULL;
|
static constexpr size_t page_mask = page_size - 1ULL;
|
||||||
static constexpr u32 physical_address_base = 1U << page_bits;
|
static constexpr u32 physical_address_base = 1U << page_bits;
|
||||||
|
static constexpr u32 MULTI_FLAG_BITS = 31;
|
||||||
|
static constexpr u32 MULTI_FLAG = 1U << MULTI_FLAG_BITS;
|
||||||
|
static constexpr u32 MULTI_MASK = ~MULTI_FLAG;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T* GetPointerFromRaw(PAddr addr) {
|
T* GetPointerFromRaw(PAddr addr) {
|
||||||
|
@ -117,6 +138,8 @@ private:
|
||||||
void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory,
|
void WalkBlock(const DAddr addr, const std::size_t size, auto on_unmapped, auto on_memory,
|
||||||
auto increment);
|
auto increment);
|
||||||
|
|
||||||
|
void InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer, PAddr address);
|
||||||
|
|
||||||
std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl;
|
std::unique_ptr<DeviceMemoryManagerAllocator<Traits>> impl;
|
||||||
|
|
||||||
const uintptr_t physical_base;
|
const uintptr_t physical_base;
|
||||||
|
|
|
@ -18,10 +18,117 @@
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class PhysicalAddressContainer {
|
||||||
|
public:
|
||||||
|
PhysicalAddressContainer() = default;
|
||||||
|
~PhysicalAddressContainer() = default;
|
||||||
|
|
||||||
|
void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
|
||||||
|
buffer.resize(8);
|
||||||
|
buffer.resize(0);
|
||||||
|
size_t index = 0;
|
||||||
|
const auto add_value = [&](u32 value) {
|
||||||
|
buffer[index] = value;
|
||||||
|
index++;
|
||||||
|
buffer.resize(index);
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 iter_entry = start_entry;
|
||||||
|
Entry* current = &storage[iter_entry - 1];
|
||||||
|
add_value(current->value);
|
||||||
|
while (current->next_entry != 0) {
|
||||||
|
iter_entry = current->next_entry;
|
||||||
|
current = &storage[iter_entry - 1];
|
||||||
|
add_value(current->value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 Register(u32 value) {
|
||||||
|
return RegisterImplementation(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Register(u32 value, u32 start_entry) {
|
||||||
|
auto entry_id = RegisterImplementation(value);
|
||||||
|
u32 iter_entry = start_entry;
|
||||||
|
Entry* current = &storage[iter_entry - 1];
|
||||||
|
while (current->next_entry != 0) {
|
||||||
|
iter_entry = current->next_entry;
|
||||||
|
current = &storage[iter_entry - 1];
|
||||||
|
}
|
||||||
|
current->next_entry = entry_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<bool, u32> Unregister(u32 value, u32 start_entry) {
|
||||||
|
u32 iter_entry = start_entry;
|
||||||
|
Entry* previous{};
|
||||||
|
Entry* current = &storage[iter_entry - 1];
|
||||||
|
Entry* next{};
|
||||||
|
bool more_than_one_remaining = false;
|
||||||
|
u32 result_start{start_entry};
|
||||||
|
size_t count = 0;
|
||||||
|
while (current->value != value) {
|
||||||
|
count++;
|
||||||
|
previous = current;
|
||||||
|
iter_entry = current->next_entry;
|
||||||
|
current = &storage[iter_entry - 1];
|
||||||
|
}
|
||||||
|
// Find next
|
||||||
|
u32 next_entry = current->next_entry;
|
||||||
|
if (next_entry != 0) {
|
||||||
|
next = &storage[next_entry - 1];
|
||||||
|
more_than_one_remaining = next->next_entry != 0;
|
||||||
|
}
|
||||||
|
if (previous) {
|
||||||
|
previous->next_entry = next_entry;
|
||||||
|
} else {
|
||||||
|
result_start = next_entry;
|
||||||
|
}
|
||||||
|
free_entries.emplace_back(iter_entry);
|
||||||
|
return std::make_pair(more_than_one_remaining || count > 1, result_start);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 ReleaseEntry(u32 start_entry) {
|
||||||
|
Entry* current = &storage[start_entry - 1];
|
||||||
|
free_entries.emplace_back(start_entry);
|
||||||
|
return current->value;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 RegisterImplementation(u32 value) {
|
||||||
|
auto entry_id = GetNewEntry();
|
||||||
|
auto& entry = storage[entry_id - 1];
|
||||||
|
entry.next_entry = 0;
|
||||||
|
entry.value = value;
|
||||||
|
return entry_id;
|
||||||
|
}
|
||||||
|
u32 GetNewEntry() {
|
||||||
|
if (!free_entries.empty()) {
|
||||||
|
u32 result = free_entries.front();
|
||||||
|
free_entries.pop_front();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
storage.emplace_back();
|
||||||
|
u32 new_entry = static_cast<u32>(storage.size());
|
||||||
|
return new_entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Entry {
|
||||||
|
u32 next_entry{};
|
||||||
|
u32 value{};
|
||||||
|
};
|
||||||
|
|
||||||
|
std::deque<Entry> storage;
|
||||||
|
std::deque<u32> free_entries;
|
||||||
|
};
|
||||||
|
|
||||||
struct EmptyAllocator {
|
struct EmptyAllocator {
|
||||||
EmptyAllocator([[maybe_unused]] DAddr address) {}
|
EmptyAllocator([[maybe_unused]] DAddr address) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
template <typename DTraits>
|
template <typename DTraits>
|
||||||
struct DeviceMemoryManagerAllocator {
|
struct DeviceMemoryManagerAllocator {
|
||||||
static constexpr bool supports_pinning = DTraits::supports_pinning;
|
static constexpr bool supports_pinning = DTraits::supports_pinning;
|
||||||
|
@ -38,6 +145,7 @@ struct DeviceMemoryManagerAllocator {
|
||||||
std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator>
|
std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator>
|
||||||
pin_allocator;
|
pin_allocator;
|
||||||
Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
|
Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
|
||||||
|
PhysicalAddressContainer multi_dev_address;
|
||||||
|
|
||||||
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
|
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
|
||||||
template <bool pin_area>
|
template <bool pin_area>
|
||||||
|
@ -109,6 +217,9 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
|
||||||
cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
|
cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
|
||||||
impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
|
impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
|
||||||
cached_pages = std::make_unique<CachedPages>();
|
cached_pages = std::make_unique<CachedPages>();
|
||||||
|
for (size_t i = 0; i < 1ULL << (33 - 12); i++) {
|
||||||
|
compressed_device_addr[i] = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
|
@ -155,8 +266,19 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
|
||||||
}
|
}
|
||||||
auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
|
auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
|
||||||
compressed_physical_ptr[start_page_d + i] = phys_addr;
|
compressed_physical_ptr[start_page_d + i] = phys_addr;
|
||||||
compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i);
|
|
||||||
InsertCPUBacking(start_page_d + i, new_vaddress, process_id);
|
InsertCPUBacking(start_page_d + i, new_vaddress, process_id);
|
||||||
|
const u32 base_dev = compressed_device_addr[phys_addr - 1U];
|
||||||
|
const u32 new_dev = static_cast<u32>(start_page_d + i);
|
||||||
|
if (base_dev == 0) [[likely]] {
|
||||||
|
compressed_device_addr[phys_addr - 1U] = new_dev;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u32 start_id = base_dev & MULTI_MASK;
|
||||||
|
if ((base_dev >> MULTI_FLAG_BITS) == 0) {
|
||||||
|
start_id = impl->multi_dev_address.Register(base_dev);
|
||||||
|
compressed_device_addr[phys_addr - 1U] = MULTI_FLAG | start_id;
|
||||||
|
}
|
||||||
|
impl->multi_dev_address.Register(new_dev, start_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,12 +292,38 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
|
||||||
auto phys_addr = compressed_physical_ptr[start_page_d + i];
|
auto phys_addr = compressed_physical_ptr[start_page_d + i];
|
||||||
compressed_physical_ptr[start_page_d + i] = 0;
|
compressed_physical_ptr[start_page_d + i] = 0;
|
||||||
cpu_backing_address[start_page_d + i] = 0;
|
cpu_backing_address[start_page_d + i] = 0;
|
||||||
if (phys_addr != 0) {
|
if (phys_addr != 0) [[likely]] {
|
||||||
compressed_device_addr[phys_addr - 1] = 0;
|
const u32 base_dev = compressed_device_addr[phys_addr - 1U];
|
||||||
|
if ((base_dev >> MULTI_FLAG_BITS) == 0) [[likely]] {
|
||||||
|
compressed_device_addr[phys_addr - 1] = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto [more_entries, new_start] = impl->multi_dev_address.Unregister(
|
||||||
|
static_cast<u32>(start_page_d + i), base_dev & MULTI_MASK);
|
||||||
|
if (!more_entries) {
|
||||||
|
compressed_device_addr[phys_addr - 1] =
|
||||||
|
impl->multi_dev_address.ReleaseEntry(new_start);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Traits>
|
||||||
|
void DeviceMemoryManager<Traits>::InnerGatherDeviceAddresses(Common::ScratchBuffer<u32>& buffer,
|
||||||
|
PAddr address) {
|
||||||
|
size_t phys_addr = address >> page_bits;
|
||||||
|
std::scoped_lock lk(mapping_guard);
|
||||||
|
u32 backing = compressed_device_addr[phys_addr];
|
||||||
|
if ((backing >> MULTI_FLAG_BITS) != 0) {
|
||||||
|
impl->multi_dev_address.GatherValues(backing & MULTI_MASK, buffer);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
buffer.resize(1);
|
||||||
|
buffer[0] = backing;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) {
|
T* DeviceMemoryManager<Traits>::GetPointer(DAddr address) {
|
||||||
|
|
|
@ -16,8 +16,8 @@
|
||||||
namespace Service::Nvidia::NvCore {
|
namespace Service::Nvidia::NvCore {
|
||||||
|
|
||||||
struct ContainerImpl {
|
struct ContainerImpl {
|
||||||
explicit ContainerImpl(Tegra::Host1x::Host1x& host1x_)
|
explicit ContainerImpl(Container& core, Tegra::Host1x::Host1x& host1x_)
|
||||||
: host1x{host1x_}, file{host1x_}, manager{host1x_}, device_file_data{} {}
|
: host1x{host1x_}, file{core, host1x_}, manager{host1x_}, device_file_data{} {}
|
||||||
Tegra::Host1x::Host1x& host1x;
|
Tegra::Host1x::Host1x& host1x;
|
||||||
NvMap file;
|
NvMap file;
|
||||||
SyncpointManager manager;
|
SyncpointManager manager;
|
||||||
|
@ -29,7 +29,7 @@ struct ContainerImpl {
|
||||||
};
|
};
|
||||||
|
|
||||||
Container::Container(Tegra::Host1x::Host1x& host1x_) {
|
Container::Container(Tegra::Host1x::Host1x& host1x_) {
|
||||||
impl = std::make_unique<ContainerImpl>(host1x_);
|
impl = std::make_unique<ContainerImpl>(*this, host1x_);
|
||||||
}
|
}
|
||||||
|
|
||||||
Container::~Container() = default;
|
Container::~Container() = default;
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
#include "core/hle/service/nvdrv/core/container.h"
|
||||||
#include "core/hle/service/nvdrv/core/nvmap.h"
|
#include "core/hle/service/nvdrv/core/nvmap.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/host1x/host1x.h"
|
#include "video_core/host1x/host1x.h"
|
||||||
|
@ -64,7 +65,7 @@ NvResult NvMap::Handle::Duplicate(bool internal_session) {
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
NvMap::NvMap(Tegra::Host1x::Host1x& host1x_) : host1x{host1x_} {}
|
NvMap::NvMap(Container& core_, Tegra::Host1x::Host1x& host1x_) : host1x{host1x_}, core{core_} {}
|
||||||
|
|
||||||
void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) {
|
void NvMap::AddHandle(std::shared_ptr<Handle> handle_description) {
|
||||||
std::scoped_lock lock(handles_lock);
|
std::scoped_lock lock(handles_lock);
|
||||||
|
@ -160,6 +161,8 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
|
||||||
// If not then allocate some space and map it
|
// If not then allocate some space and map it
|
||||||
DAddr address{};
|
DAddr address{};
|
||||||
auto& smmu = host1x.MemoryManager();
|
auto& smmu = host1x.MemoryManager();
|
||||||
|
auto* session = core.GetSession(session_id);
|
||||||
|
|
||||||
auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
|
auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
|
||||||
//: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
|
//: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
|
||||||
while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) {
|
while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) {
|
||||||
|
@ -179,7 +182,7 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
|
||||||
handle_description->d_address = address;
|
handle_description->d_address = address;
|
||||||
|
|
||||||
smmu.Map(address, handle_description->address, handle_description->aligned_size,
|
smmu.Map(address, handle_description->address, handle_description->aligned_size,
|
||||||
session_id);
|
session->smmu_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
handle_description->pins++;
|
handle_description->pins++;
|
||||||
|
|
|
@ -25,6 +25,8 @@ class Host1x;
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
||||||
namespace Service::Nvidia::NvCore {
|
namespace Service::Nvidia::NvCore {
|
||||||
|
|
||||||
|
class Container;
|
||||||
/**
|
/**
|
||||||
* @brief The nvmap core class holds the global state for nvmap and provides methods to manage
|
* @brief The nvmap core class holds the global state for nvmap and provides methods to manage
|
||||||
* handles
|
* handles
|
||||||
|
@ -109,7 +111,7 @@ public:
|
||||||
bool can_unlock; //!< If the address region is ready to be unlocked
|
bool can_unlock; //!< If the address region is ready to be unlocked
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit NvMap(Tegra::Host1x::Host1x& host1x);
|
explicit NvMap(Container& core, Tegra::Host1x::Host1x& host1x);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Creates an unallocated handle of the given size
|
* @brief Creates an unallocated handle of the given size
|
||||||
|
@ -173,5 +175,7 @@ private:
|
||||||
* @return If the handle was removed from the map
|
* @return If the handle was removed from the map
|
||||||
*/
|
*/
|
||||||
bool TryRemoveHandle(const Handle& handle_description);
|
bool TryRemoveHandle(const Handle& handle_description);
|
||||||
|
|
||||||
|
Container& core;
|
||||||
};
|
};
|
||||||
} // namespace Service::Nvidia::NvCore
|
} // namespace Service::Nvidia::NvCore
|
||||||
|
|
|
@ -44,7 +44,8 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
|
||||||
// from outside classes. This also allows modification to the internals of the memory
|
// from outside classes. This also allows modification to the internals of the memory
|
||||||
// subsystem without needing to rebuild all files that make use of the memory interface.
|
// subsystem without needing to rebuild all files that make use of the memory interface.
|
||||||
struct Memory::Impl {
|
struct Memory::Impl {
|
||||||
explicit Impl(Core::System& system_) : system{system_} {}
|
explicit Impl(Core::System& system_)
|
||||||
|
: system{system_} {}
|
||||||
|
|
||||||
void SetCurrentPageTable(Kernel::KProcess& process) {
|
void SetCurrentPageTable(Kernel::KProcess& process) {
|
||||||
current_page_table = &process.GetPageTable().GetImpl();
|
current_page_table = &process.GetPageTable().GetImpl();
|
||||||
|
@ -817,26 +818,31 @@ struct Memory::Impl {
|
||||||
void HandleRasterizerDownload(VAddr v_address, size_t size) {
|
void HandleRasterizerDownload(VAddr v_address, size_t size) {
|
||||||
const auto* p = GetPointerImpl(
|
const auto* p = GetPointerImpl(
|
||||||
v_address, []() {}, []() {});
|
v_address, []() {}, []() {});
|
||||||
auto& gpu_device_memory = system.Host1x().MemoryManager();
|
if (!gpu_device_memory) [[unlikely]] {
|
||||||
DAddr address =
|
gpu_device_memory = &system.Host1x().MemoryManager();
|
||||||
gpu_device_memory.GetAddressFromPAddr(system.DeviceMemory().GetRawPhysicalAddr(p));
|
}
|
||||||
const size_t core = system.GetCurrentHostThreadID();
|
const size_t core = system.GetCurrentHostThreadID();
|
||||||
auto& current_area = rasterizer_read_areas[core];
|
auto& current_area = rasterizer_read_areas[core];
|
||||||
const DAddr end_address = address + size;
|
gpu_device_memory->ApplyOpOnPointer(
|
||||||
if (current_area.start_address <= address && end_address <= current_area.end_address)
|
p, scratch_buffers[core], [&](DAddr address) {
|
||||||
[[likely]] {
|
const DAddr end_address = address + size;
|
||||||
return;
|
if (current_area.start_address <= address && end_address <= current_area.end_address)
|
||||||
}
|
[[likely]] {
|
||||||
current_area = system.GPU().OnCPURead(address, size);
|
return;
|
||||||
|
}
|
||||||
|
current_area = system.GPU().OnCPURead(address, size);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void HandleRasterizerWrite(VAddr v_address, size_t size) {
|
void HandleRasterizerWrite(VAddr v_address, size_t size) {
|
||||||
const auto* p = GetPointerImpl(
|
const auto* p = GetPointerImpl(
|
||||||
v_address, []() {}, []() {});
|
v_address, []() {}, []() {});
|
||||||
PAddr address = system.DeviceMemory().GetRawPhysicalAddr(p);
|
|
||||||
constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
|
constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
|
||||||
const size_t core = std::min(system.GetCurrentHostThreadID(),
|
const size_t core = std::min(system.GetCurrentHostThreadID(),
|
||||||
sys_core); // any other calls threads go to syscore.
|
sys_core); // any other calls threads go to syscore.
|
||||||
|
if (!gpu_device_memory) [[unlikely]] {
|
||||||
|
gpu_device_memory = &system.Host1x().MemoryManager();
|
||||||
|
}
|
||||||
// Guard on sys_core;
|
// Guard on sys_core;
|
||||||
if (core == sys_core) [[unlikely]] {
|
if (core == sys_core) [[unlikely]] {
|
||||||
sys_core_guard.lock();
|
sys_core_guard.lock();
|
||||||
|
@ -846,17 +852,20 @@ struct Memory::Impl {
|
||||||
sys_core_guard.unlock();
|
sys_core_guard.unlock();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
auto& current_area = rasterizer_write_areas[core];
|
gpu_device_memory->ApplyOpOnPointer(
|
||||||
PAddr subaddress = address >> YUZU_PAGEBITS;
|
p, scratch_buffers[core], [&](DAddr address) {
|
||||||
bool do_collection = current_area.last_address == subaddress;
|
auto& current_area = rasterizer_write_areas[core];
|
||||||
if (!do_collection) [[unlikely]] {
|
PAddr subaddress = address >> YUZU_PAGEBITS;
|
||||||
do_collection = system.GPU().OnCPUWrite(address, size);
|
bool do_collection = current_area.last_address == subaddress;
|
||||||
if (!do_collection) {
|
if (!do_collection) [[unlikely]] {
|
||||||
return;
|
do_collection = system.GPU().OnCPUWrite(address, size);
|
||||||
|
if (!do_collection) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
current_area.last_address = subaddress;
|
||||||
}
|
}
|
||||||
current_area.last_address = subaddress;
|
gpu_dirty_managers[core].Collect(address, size);
|
||||||
}
|
});
|
||||||
gpu_dirty_managers[core].Collect(address, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct GPUDirtyState {
|
struct GPUDirtyState {
|
||||||
|
@ -872,10 +881,12 @@ struct Memory::Impl {
|
||||||
}
|
}
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{};
|
||||||
Common::PageTable* current_page_table = nullptr;
|
Common::PageTable* current_page_table = nullptr;
|
||||||
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
|
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
|
||||||
rasterizer_read_areas{};
|
rasterizer_read_areas{};
|
||||||
std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
|
std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
|
||||||
|
std::array<Common::ScratchBuffer<u32>, Core::Hardware::NUM_CPU_CORES> scratch_buffers{};
|
||||||
std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
|
std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
|
||||||
std::mutex sys_core_guard;
|
std::mutex sys_core_guard;
|
||||||
|
|
||||||
|
|
|
@ -554,9 +554,8 @@ void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::Cache
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) {
|
bool RasterizerOpenGL::OnCPUWrite(DAddr addr, u64 size) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
|
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -577,9 +576,9 @@ bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::OnCacheInvalidation(PAddr p_addr, u64 size) {
|
void RasterizerOpenGL::OnCacheInvalidation(DAddr addr, u64 size) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
|
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -602,8 +602,7 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) {
|
bool RasterizerVulkan::OnCPUWrite(DAddr addr, u64 size) {
|
||||||
const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
|
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -624,8 +623,7 @@ bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::OnCacheInvalidation(PAddr p_addr, u64 size) {
|
void RasterizerVulkan::OnCacheInvalidation(DAddr addr, u64 size) {
|
||||||
const DAddr addr = device_memory.GetAddressFromPAddr(p_addr);
|
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
Reference in New Issue