From 20011dfeb8d1fa00a862e9b31ce10ceca8015fa2 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 17 Nov 2023 20:22:38 +0200 Subject: [PATCH 01/28] common: Add libc sigaction hook --- src/common/CMakeLists.txt | 7 +++++++ src/common/signal_chain.cpp | 42 +++++++++++++++++++++++++++++++++++++ src/common/signal_chain.h | 19 +++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 src/common/signal_chain.cpp create mode 100644 src/common/signal_chain.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e216eb3de..7107f4f78 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -166,6 +166,13 @@ if (WIN32) target_link_libraries(common PRIVATE ntdll) endif() +if (NOT WIN32) + target_sources(common PRIVATE + signal_chain.cpp + signal_chain.h + ) +endif() + if(ANDROID) target_sources(common PRIVATE diff --git a/src/common/signal_chain.cpp b/src/common/signal_chain.cpp new file mode 100644 index 000000000..e0c6b9d4e --- /dev/null +++ b/src/common/signal_chain.cpp @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "common/assert.h" +#include "common/dynamic_library.h" +#include "common/scope_exit.h" +#include "common/signal_chain.h" + +namespace Common { + +template +T* LookupLibcSymbol(const char* name) { +#if defined(__BIONIC__) + Common::DynamicLibrary provider("libc.so"); + if (!provider.IsOpen()) { + UNREACHABLE_MSG("Failed to open libc!"); + } +#else + // For other operating environments, we assume the symbol is not overriden. + const char* base = nullptr; + Common::DynamicLibrary provider(base); +#endif + + void* sym = provider.GetSymbolAddress(name); + if (sym == nullptr) { + sym = dlsym(RTLD_DEFAULT, name); + } + if (sym == nullptr) { + UNREACHABLE_MSG("Unable to find symbol {}!", name); + } + + return reinterpret_cast(sym); +} + +int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact) { + static auto libc_sigaction = LookupLibcSymbol("sigaction"); + return libc_sigaction(signum, act, oldact); +} + +} // namespace Common diff --git a/src/common/signal_chain.h b/src/common/signal_chain.h new file mode 100644 index 000000000..e3bfe6882 --- /dev/null +++ b/src/common/signal_chain.h @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#ifndef _WIN32 + +#include + +namespace Common { + +// Android's ART overrides sigaction with its own wrapper. This is problematic for SIGSEGV +// in particular, because ARTs handler access TPIDR_EL0, so this extracts the libc version +// and calls it directly. +int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact); + +} // namespace Common + +#endif From 29e7d79a869090a39127442cb9553bf46a8cd009 Mon Sep 17 00:00:00 2001 From: --author=Liam Date: Fri, 17 Nov 2023 20:25:23 +0200 Subject: [PATCH 02/28] common: Add free region manager * Abstraction for placeholder region tracking in host_memory --- src/common/CMakeLists.txt | 1 + src/common/free_region_manager.h | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 src/common/free_region_manager.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 7107f4f78..98fd5f1e4 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -52,6 +52,7 @@ add_library(common STATIC fiber.cpp fiber.h fixed_point.h + free_region_manager.h fs/file.cpp fs/file.h fs/fs.cpp diff --git a/src/common/free_region_manager.h b/src/common/free_region_manager.h new file mode 100644 index 000000000..2e590d609 --- /dev/null +++ b/src/common/free_region_manager.h @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +namespace Common { + +class FreeRegionManager { +public: + explicit FreeRegionManager() = default; + ~FreeRegionManager() = default; + + void SetAddressSpace(void* start, size_t size) { + this->FreeBlock(start, size); + } + + std::pair FreeBlock(void* block_ptr, size_t size) { + std::scoped_lock lk(m_mutex); + + // Check to see if we are adjacent to any regions. + auto start_address = reinterpret_cast(block_ptr); + auto end_address = start_address + size; + auto it = m_free_regions.find({start_address - 1, end_address + 1}); + + // If we are, join with them, ensuring we stay in bounds. + if (it != m_free_regions.end()) { + start_address = std::min(start_address, it->lower()); + end_address = std::max(end_address, it->upper()); + } + + // Free the relevant region. + m_free_regions.insert({start_address, end_address}); + + // Return the adjusted pointers. + block_ptr = reinterpret_cast(start_address); + size = end_address - start_address; + return {block_ptr, size}; + } + + void AllocateBlock(void* block_ptr, size_t size) { + std::scoped_lock lk(m_mutex); + + auto address = reinterpret_cast(block_ptr); + m_free_regions.subtract({address, address + size}); + } + +private: + std::mutex m_mutex; + boost::icl::interval_set m_free_regions; +}; + +} // namespace Common From 448d4815decea4b3b29c768f3507c240932d1999 Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 17 Nov 2023 20:33:43 +0200 Subject: [PATCH 03/28] host_memory: ensure map base is between 36 and 39 bits --- src/common/host_memory.cpp | 60 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index ba22595e0..41ca12ab0 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -21,12 +21,14 @@ #include #include #include +#include #include #include "common/scope_exit.h" #endif // ^^^ Linux ^^^ #include +#include #include "common/alignment.h" #include "common/assert.h" @@ -353,6 +355,61 @@ private: #elif defined(__linux__) || defined(__FreeBSD__) // ^^^ Windows ^^^ vvv Linux vvv +#ifdef ARCHITECTURE_arm64 + +uint64_t GetRandomU64() { + uint64_t ret; + ASSERT(getrandom(&ret, sizeof(ret), 0) == 0); + return ret; +} + +void* ChooseVirtualBase(size_t virtual_size) { + constexpr uintptr_t Map39BitSize = (1ULL << 39); + constexpr uintptr_t Map36BitSize = (1ULL << 36); + + // Seed the MT with some initial strong randomness. + // + // This is not a cryptographic application, we just want something more + // random than the current time. + std::mt19937_64 rng(GetRandomU64()); + + // We want to ensure we are allocating at an address aligned to the L2 block size. + // For Qualcomm devices, we must also allocate memory above 36 bits. + const size_t lower = Map36BitSize / HugePageSize; + const size_t upper = (Map39BitSize - virtual_size) / HugePageSize; + const size_t range = upper - lower; + + // Try up to 64 times to allocate memory at random addresses in the range. + for (int i = 0; i < 64; i++) { + // Calculate a possible location. + uintptr_t hint_address = ((rng() % range) + lower) * HugePageSize; + + // Try to map. + // Note: we may be able to take advantage of MAP_FIXED_NOREPLACE here. + void* map_pointer = + mmap(reinterpret_cast(hint_address), virtual_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + + // If we successfully mapped, we're done. + if (reinterpret_cast(map_pointer) == hint_address) { + return map_pointer; + } + + // Unmap if necessary, and try again. + if (map_pointer != MAP_FAILED) { + munmap(map_pointer, virtual_size); + } + } + + return MAP_FAILED; +} +#else +void* ChooseVirtualBase(size_t virtual_size) { + return mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); +} +#endif + class HostMemory::Impl { public: explicit Impl(size_t backing_size_, size_t virtual_size_) @@ -415,8 +472,7 @@ public: } } #else - virtual_base = static_cast(mmap(nullptr, virtual_size, PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0)); + virtual_base = static_cast(ChooseVirtualBase(virtual_size)); if (virtual_base == MAP_FAILED) { LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno)); throw std::bad_alloc{}; From 4766baddf3501695b6048ed78f251f4ec28ae0aa Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 17 Nov 2023 20:57:39 +0200 Subject: [PATCH 04/28] host_memory: Switch to FreeRegionManager --- src/common/host_memory.cpp | 94 ++++++++++++++++++++++++++------------ src/common/host_memory.h | 2 + 2 files changed, 67 insertions(+), 29 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 41ca12ab0..a66fc49e2 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -32,6 +32,7 @@ #include "common/alignment.h" #include "common/assert.h" +#include "common/free_region_manager.h" #include "common/host_memory.h" #include "common/logging/log.h" @@ -339,6 +340,11 @@ private: return false; } + void EnableDirectMappedAddress() { + // TODO + UNREACHABLE(); + } + HANDLE process{}; ///< Current process handle HANDLE backing_handle{}; ///< File based backing memory @@ -472,7 +478,7 @@ public: } } #else - virtual_base = static_cast(ChooseVirtualBase(virtual_size)); + virtual_base = virtual_map_base = static_cast(ChooseVirtualBase(virtual_size)); if (virtual_base == MAP_FAILED) { LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno)); throw std::bad_alloc{}; @@ -480,7 +486,7 @@ public: madvise(virtual_base, virtual_size, MADV_HUGEPAGE); #endif - placeholders.add({0, virtual_size}); + free_manager.SetAddressSpace(virtual_base, virtual_size); good = true; } @@ -489,10 +495,11 @@ public: } void Map(size_t virtual_offset, size_t host_offset, size_t length) { - { - std::scoped_lock lock{placeholder_mutex}; - placeholders.subtract({virtual_offset, virtual_offset + length}); - } + // Intersect the range with our address space. + AdjustMap(&virtual_offset, &length); + + // We are removing a placeholder. + free_manager.AllocateBlock(virtual_base + virtual_offset, length); void* ret = mmap(virtual_base + virtual_offset, length, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, host_offset); @@ -503,26 +510,23 @@ public: // The method name is wrong. We're still talking about the virtual range. // We don't want to unmap, we want to reserve this memory. - { - std::scoped_lock lock{placeholder_mutex}; - auto it = placeholders.find({virtual_offset - 1, virtual_offset + length + 1}); + // Intersect the range with our address space. + AdjustMap(&virtual_offset, &length); - if (it != placeholders.end()) { - size_t prev_upper = virtual_offset + length; - virtual_offset = std::min(virtual_offset, it->lower()); - length = std::max(it->upper(), prev_upper) - virtual_offset; - } + // Merge with any adjacent placeholder mappings. + auto [merged_pointer, merged_size] = + free_manager.FreeBlock(virtual_base + virtual_offset, length); - placeholders.add({virtual_offset, virtual_offset + length}); - } - - void* ret = mmap(virtual_base + virtual_offset, length, PROT_NONE, + void* ret = mmap(merged_pointer, merged_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); } void Protect(size_t virtual_offset, size_t length, bool read, bool write) { - int flags = 0; + // Intersect the range with our address space. + AdjustMap(&virtual_offset, &length); + + int flags = PROT_NONE; if (read) { flags |= PROT_READ; } @@ -533,17 +537,22 @@ public: ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno)); } + void EnableDirectMappedAddress() { + virtual_base = nullptr; + } + const size_t backing_size; ///< Size of the backing memory in bytes const size_t virtual_size; ///< Size of the virtual address placeholder in bytes u8* backing_base{reinterpret_cast(MAP_FAILED)}; u8* virtual_base{reinterpret_cast(MAP_FAILED)}; + u8* virtual_map_base{reinterpret_cast(MAP_FAILED)}; private: /// Release all resources in the object void Release() { - if (virtual_base != MAP_FAILED) { - int ret = munmap(virtual_base, virtual_size); + if (virtual_map_base != MAP_FAILED) { + int ret = munmap(virtual_map_base, virtual_size); ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno)); } @@ -558,10 +567,29 @@ private: } } - int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create + void AdjustMap(size_t* virtual_offset, size_t* length) { + if (virtual_base != nullptr) { + return; + } - boost::icl::interval_set placeholders; ///< Mapped placeholders - std::mutex placeholder_mutex; ///< Mutex for placeholders + // If we are direct mapped, we want to make sure we are operating on a region + // that is in range of our virtual mapping. + size_t intended_start = *virtual_offset; + size_t intended_end = intended_start + *length; + size_t address_space_start = reinterpret_cast(virtual_map_base); + size_t address_space_end = address_space_start + virtual_size; + + if (address_space_start > intended_end || intended_start > address_space_end) { + *virtual_offset = 0; + *length = 0; + } else { + *virtual_offset = std::max(intended_start, address_space_start); + *length = std::min(intended_end, address_space_end) - *virtual_offset; + } + } + + int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create + FreeRegionManager free_manager{}; }; #else // ^^^ Linux ^^^ vvv Generic vvv @@ -591,15 +619,16 @@ HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_) try { // Try to allocate a fastmem arena. // The implementation will fail with std::bad_alloc on errors. - impl = std::make_unique(AlignUp(backing_size, PageAlignment), - AlignUp(virtual_size, PageAlignment) + - 3 * HugePageSize); + impl = + std::make_unique(AlignUp(backing_size, PageAlignment), + AlignUp(virtual_size, PageAlignment) + HugePageSize); backing_base = impl->backing_base; virtual_base = impl->virtual_base; if (virtual_base) { - virtual_base += 2 * HugePageSize - 1; - virtual_base -= reinterpret_cast(virtual_base) & (HugePageSize - 1); + // Ensure the virtual base is aligned to the L2 block size. + virtual_base = reinterpret_cast( + Common::AlignUp(reinterpret_cast(virtual_base), HugePageSize)); virtual_base_offset = virtual_base - impl->virtual_base; } @@ -650,4 +679,11 @@ void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool w impl->Protect(virtual_offset + virtual_base_offset, length, read, write); } +void HostMemory::EnableDirectMappedAddress() { + if (impl) { + impl->EnableDirectMappedAddress(); + virtual_size += reinterpret_cast(virtual_base); + } +} + } // namespace Common diff --git a/src/common/host_memory.h b/src/common/host_memory.h index 447975ded..4014a1962 100644 --- a/src/common/host_memory.h +++ b/src/common/host_memory.h @@ -37,6 +37,8 @@ public: void Protect(size_t virtual_offset, size_t length, bool read, bool write); + void EnableDirectMappedAddress(); + [[nodiscard]] u8* BackingBasePointer() noexcept { return backing_base; } From 5938a9582a238d7679eb15f9812f8a85bfdb1cc3 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 17 Nov 2023 21:43:15 +0200 Subject: [PATCH 05/28] core: Respect memory permissions in Map --- src/common/host_memory.cpp | 45 ++++++++++---- src/common/host_memory.h | 13 ++++- src/core/hle/kernel/k_page_table_base.cpp | 32 +++++++++- src/core/memory.cpp | 8 +-- src/core/memory.h | 6 +- src/tests/common/host_memory.cpp | 71 ++++++++++++----------- 6 files changed, 117 insertions(+), 58 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index a66fc49e2..3e4b34de6 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -144,7 +144,7 @@ public: Release(); } - void Map(size_t virtual_offset, size_t host_offset, size_t length) { + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) { std::unique_lock lock{placeholder_mutex}; if (!IsNiechePlaceholder(virtual_offset, length)) { Split(virtual_offset, length); @@ -163,7 +163,7 @@ public: } } - void Protect(size_t virtual_offset, size_t length, bool read, bool write) { + void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) { DWORD new_flags{}; if (read && write) { new_flags = PAGE_READWRITE; @@ -494,15 +494,29 @@ public: Release(); } - void Map(size_t virtual_offset, size_t host_offset, size_t length) { + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) { // Intersect the range with our address space. AdjustMap(&virtual_offset, &length); // We are removing a placeholder. free_manager.AllocateBlock(virtual_base + virtual_offset, length); - void* ret = mmap(virtual_base + virtual_offset, length, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, fd, host_offset); + // Deduce mapping protection flags. + int flags = PROT_NONE; + if (True(perms & MemoryPermission::Read)) { + flags |= PROT_READ; + } + if (True(perms & MemoryPermission::Write)) { + flags |= PROT_WRITE; + } +#ifdef ARCHITECTURE_arm64 + if (True(perms & MemoryPermission::Execute)) { + flags |= PROT_EXEC; + } +#endif + + void* ret = mmap(virtual_base + virtual_offset, length, flags, MAP_SHARED | MAP_FIXED, fd, + host_offset); ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); } @@ -522,7 +536,7 @@ public: ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); } - void Protect(size_t virtual_offset, size_t length, bool read, bool write) { + void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) { // Intersect the range with our address space. AdjustMap(&virtual_offset, &length); @@ -533,6 +547,11 @@ public: if (write) { flags |= PROT_WRITE; } +#ifdef ARCHITECTURE_arm64 + if (execute) { + flags |= PROT_EXEC; + } +#endif int ret = mprotect(virtual_base + virtual_offset, length, flags); ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno)); } @@ -602,11 +621,11 @@ public: throw std::bad_alloc{}; } - void Map(size_t virtual_offset, size_t host_offset, size_t length) {} + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm) {} void Unmap(size_t virtual_offset, size_t length) {} - void Protect(size_t virtual_offset, size_t length, bool read, bool write) {} + void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {} u8* backing_base{nullptr}; u8* virtual_base{nullptr}; @@ -647,7 +666,8 @@ HostMemory::HostMemory(HostMemory&&) noexcept = default; HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default; -void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) { +void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length, + MemoryPermission perms) { ASSERT(virtual_offset % PageAlignment == 0); ASSERT(host_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); @@ -656,7 +676,7 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) { if (length == 0 || !virtual_base || !impl) { return; } - impl->Map(virtual_offset + virtual_base_offset, host_offset, length); + impl->Map(virtual_offset + virtual_base_offset, host_offset, length, perms); } void HostMemory::Unmap(size_t virtual_offset, size_t length) { @@ -669,14 +689,15 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length) { impl->Unmap(virtual_offset + virtual_base_offset, length); } -void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) { +void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write, + bool execute) { ASSERT(virtual_offset % PageAlignment == 0); ASSERT(length % PageAlignment == 0); ASSERT(virtual_offset + length <= virtual_size); if (length == 0 || !virtual_base || !impl) { return; } - impl->Protect(virtual_offset + virtual_base_offset, length, read, write); + impl->Protect(virtual_offset + virtual_base_offset, length, read, write, execute); } void HostMemory::EnableDirectMappedAddress() { diff --git a/src/common/host_memory.h b/src/common/host_memory.h index 4014a1962..cebfacab2 100644 --- a/src/common/host_memory.h +++ b/src/common/host_memory.h @@ -4,11 +4,20 @@ #pragma once #include +#include "common/common_funcs.h" #include "common/common_types.h" #include "common/virtual_buffer.h" namespace Common { +enum class MemoryPermission : u32 { + Read = 1 << 0, + Write = 1 << 1, + ReadWrite = Read | Write, + Execute = 1 << 2, +}; +DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission) + /** * A low level linear memory buffer, which supports multiple mappings * Its purpose is to rebuild a given sparse memory layout, including mirrors. @@ -31,11 +40,11 @@ public: HostMemory(HostMemory&& other) noexcept; HostMemory& operator=(HostMemory&& other) noexcept; - void Map(size_t virtual_offset, size_t host_offset, size_t length); + void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms); void Unmap(size_t virtual_offset, size_t length); - void Protect(size_t virtual_offset, size_t length, bool read, bool write); + void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute = false); void EnableDirectMappedAddress(); diff --git a/src/core/hle/kernel/k_page_table_base.cpp b/src/core/hle/kernel/k_page_table_base.cpp index 47dc8fd35..dc6524146 100644 --- a/src/core/hle/kernel/k_page_table_base.cpp +++ b/src/core/hle/kernel/k_page_table_base.cpp @@ -88,6 +88,20 @@ Result FlushDataCache(AddressType addr, u64 size) { R_SUCCEED(); } +constexpr Common::MemoryPermission ConvertToMemoryPermission(KMemoryPermission perm) { + Common::MemoryPermission perms{}; + if (True(perm & KMemoryPermission::UserRead)) { + perms |= Common::MemoryPermission::Read; + } + if (True(perm & KMemoryPermission::UserWrite)) { + perms |= Common::MemoryPermission::Write; + } + if (True(perm & KMemoryPermission::UserExecute)) { + perms |= Common::MemoryPermission::Execute; + } + return perms; +} + } // namespace void KPageTableBase::MemoryRange::Open() { @@ -5643,7 +5657,8 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a case OperationType::Map: { ASSERT(virt_addr != 0); ASSERT(Common::IsAligned(GetInteger(virt_addr), PageSize)); - m_memory->MapMemoryRegion(*m_impl, virt_addr, num_pages * PageSize, phys_addr); + m_memory->MapMemoryRegion(*m_impl, virt_addr, num_pages * PageSize, phys_addr, + ConvertToMemoryPermission(properties.perm)); // Open references to pages, if we should. if (this->IsHeapPhysicalAddress(phys_addr)) { @@ -5658,8 +5673,18 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a } case OperationType::ChangePermissions: case OperationType::ChangePermissionsAndRefresh: - case OperationType::ChangePermissionsAndRefreshAndFlush: + case OperationType::ChangePermissionsAndRefreshAndFlush: { + const bool read = True(properties.perm & Kernel::KMemoryPermission::UserRead); + const bool write = True(properties.perm & Kernel::KMemoryPermission::UserWrite); + // todo: this doesn't really belong here and should go into m_memory to handle rasterizer + // access todo: ignore exec on non-direct-mapped case + const bool exec = True(properties.perm & Kernel::KMemoryPermission::UserExecute); + if (Settings::IsFastmemEnabled()) { + m_system.DeviceMemory().buffer.Protect(GetInteger(virt_addr), num_pages * PageSize, + read, write, exec); + } R_SUCCEED(); + } default: UNREACHABLE(); } @@ -5687,7 +5712,8 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a const size_t size{node.GetNumPages() * PageSize}; // Map the pages. - m_memory->MapMemoryRegion(*m_impl, virt_addr, size, node.GetAddress()); + m_memory->MapMemoryRegion(*m_impl, virt_addr, size, node.GetAddress(), + ConvertToMemoryPermission(properties.perm)); virt_addr += size; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index a3431772a..14db64f9d 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -53,7 +53,7 @@ struct Memory::Impl { } void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, - Common::PhysicalAddress target) { + Common::PhysicalAddress target, Common::MemoryPermission perms) { ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((base & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", GetInteger(base)); ASSERT_MSG(target >= DramMemoryMap::Base, "Out of bounds target: {:016X}", @@ -63,7 +63,7 @@ struct Memory::Impl { if (Settings::IsFastmemEnabled()) { system.DeviceMemory().buffer.Map(GetInteger(base), - GetInteger(target) - DramMemoryMap::Base, size); + GetInteger(target) - DramMemoryMap::Base, size, perms); } } @@ -831,8 +831,8 @@ void Memory::SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) { } void Memory::MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, - Common::PhysicalAddress target) { - impl->MapMemoryRegion(page_table, base, size, target); + Common::PhysicalAddress target, Common::MemoryPermission perms) { + impl->MapMemoryRegion(page_table, base, size, target, perms); } void Memory::UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size) { diff --git a/src/core/memory.h b/src/core/memory.h index 13047a545..73195549f 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -15,8 +15,9 @@ #include "core/hle/result.h" namespace Common { +enum class MemoryPermission : u32; struct PageTable; -} +} // namespace Common namespace Core { class System; @@ -82,9 +83,10 @@ public: * @param size The amount of bytes to map. Must be page-aligned. * @param target Buffer with the memory backing the mapping. Must be of length at least * `size`. + * @param perms The permissions to map the memory with. */ void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, - Common::PhysicalAddress target); + Common::PhysicalAddress target, Common::MemoryPermission perms); /** * Unmaps a region of the emulated process address space. diff --git a/src/tests/common/host_memory.cpp b/src/tests/common/host_memory.cpp index 1b014b632..1a28e862b 100644 --- a/src/tests/common/host_memory.cpp +++ b/src/tests/common/host_memory.cpp @@ -11,6 +11,7 @@ using namespace Common::Literals; static constexpr size_t VIRTUAL_SIZE = 1ULL << 39; static constexpr size_t BACKING_SIZE = 4_GiB; +static constexpr auto PERMS = Common::MemoryPermission::ReadWrite; TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") { { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); } @@ -19,7 +20,7 @@ TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") { TEST_CASE("HostMemory: Simple map", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x5000, 0x8000, 0x1000); + mem.Map(0x5000, 0x8000, 0x1000, PERMS); volatile u8* const data = mem.VirtualBasePointer() + 0x5000; data[0] = 50; @@ -28,8 +29,8 @@ TEST_CASE("HostMemory: Simple map", "[common]") { TEST_CASE("HostMemory: Simple mirror map", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x5000, 0x3000, 0x2000); - mem.Map(0x8000, 0x4000, 0x1000); + mem.Map(0x5000, 0x3000, 0x2000, PERMS); + mem.Map(0x8000, 0x4000, 0x1000, PERMS); volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000; volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000; @@ -39,7 +40,7 @@ TEST_CASE("HostMemory: Simple mirror map", "[common]") { TEST_CASE("HostMemory: Simple unmap", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x5000, 0x3000, 0x2000); + mem.Map(0x5000, 0x3000, 0x2000, PERMS); volatile u8* const data = mem.VirtualBasePointer() + 0x5000; data[75] = 50; @@ -50,7 +51,7 @@ TEST_CASE("HostMemory: Simple unmap", "[common]") { TEST_CASE("HostMemory: Simple unmap and remap", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x5000, 0x3000, 0x2000); + mem.Map(0x5000, 0x3000, 0x2000, PERMS); volatile u8* const data = mem.VirtualBasePointer() + 0x5000; data[0] = 50; @@ -58,79 +59,79 @@ TEST_CASE("HostMemory: Simple unmap and remap", "[common]") { mem.Unmap(0x5000, 0x2000); - mem.Map(0x5000, 0x3000, 0x2000); + mem.Map(0x5000, 0x3000, 0x2000, PERMS); REQUIRE(data[0] == 50); - mem.Map(0x7000, 0x2000, 0x5000); + mem.Map(0x7000, 0x2000, 0x5000, PERMS); REQUIRE(data[0x3000] == 50); } TEST_CASE("HostMemory: Nieche allocation", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x0000, 0, 0x20000); + mem.Map(0x0000, 0, 0x20000, PERMS); mem.Unmap(0x0000, 0x4000); - mem.Map(0x1000, 0, 0x2000); - mem.Map(0x3000, 0, 0x1000); - mem.Map(0, 0, 0x1000); + mem.Map(0x1000, 0, 0x2000, PERMS); + mem.Map(0x3000, 0, 0x1000, PERMS); + mem.Map(0, 0, 0x1000, PERMS); } TEST_CASE("HostMemory: Full unmap", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x8000, 0, 0x4000); + mem.Map(0x8000, 0, 0x4000, PERMS); mem.Unmap(0x8000, 0x4000); - mem.Map(0x6000, 0, 0x16000); + mem.Map(0x6000, 0, 0x16000, PERMS); } TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x0000, 0, 0x4000); + mem.Map(0x0000, 0, 0x4000, PERMS); mem.Unmap(0x2000, 0x4000); - mem.Map(0x2000, 0x80000, 0x4000); + mem.Map(0x2000, 0x80000, 0x4000, PERMS); } TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x8000, 0, 0x4000); + mem.Map(0x8000, 0, 0x4000, PERMS); mem.Unmap(0x6000, 0x4000); - mem.Map(0x8000, 0, 0x2000); + mem.Map(0x8000, 0, 0x2000, PERMS); } TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x0000, 0, 0x4000); - mem.Map(0x4000, 0, 0x1b000); + mem.Map(0x0000, 0, 0x4000, PERMS); + mem.Map(0x4000, 0, 0x1b000, PERMS); mem.Unmap(0x3000, 0x1c000); - mem.Map(0x3000, 0, 0x20000); + mem.Map(0x3000, 0, 0x20000, PERMS); } TEST_CASE("HostMemory: Unmap between placeholders", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x0000, 0, 0x4000); - mem.Map(0x4000, 0, 0x4000); + mem.Map(0x0000, 0, 0x4000, PERMS); + mem.Map(0x4000, 0, 0x4000, PERMS); mem.Unmap(0x2000, 0x4000); - mem.Map(0x2000, 0, 0x4000); + mem.Map(0x2000, 0, 0x4000, PERMS); } TEST_CASE("HostMemory: Unmap to origin", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x4000, 0, 0x4000); - mem.Map(0x8000, 0, 0x4000); + mem.Map(0x4000, 0, 0x4000, PERMS); + mem.Map(0x8000, 0, 0x4000, PERMS); mem.Unmap(0x4000, 0x4000); - mem.Map(0, 0, 0x4000); - mem.Map(0x4000, 0, 0x4000); + mem.Map(0, 0, 0x4000, PERMS); + mem.Map(0x4000, 0, 0x4000, PERMS); } TEST_CASE("HostMemory: Unmap to right", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x4000, 0, 0x4000); - mem.Map(0x8000, 0, 0x4000); + mem.Map(0x4000, 0, 0x4000, PERMS); + mem.Map(0x8000, 0, 0x4000, PERMS); mem.Unmap(0x8000, 0x4000); - mem.Map(0x8000, 0, 0x4000); + mem.Map(0x8000, 0, 0x4000, PERMS); } TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x4000, 0x10000, 0x4000); + mem.Map(0x4000, 0x10000, 0x4000, PERMS); volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; ptr[0x1000] = 17; @@ -142,7 +143,7 @@ TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") { TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x4000, 0x10000, 0x4000); + mem.Map(0x4000, 0x10000, 0x4000, PERMS); volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; ptr[0x3000] = 19; @@ -156,7 +157,7 @@ TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") { TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x4000, 0x10000, 0x4000); + mem.Map(0x4000, 0x10000, 0x4000, PERMS); volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; ptr[0x0000] = 19; @@ -170,8 +171,8 @@ TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") { TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); - mem.Map(0x4000, 0x10000, 0x2000); - mem.Map(0x6000, 0x20000, 0x2000); + mem.Map(0x4000, 0x10000, 0x2000, PERMS); + mem.Map(0x6000, 0x20000, 0x2000, PERMS); volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; ptr[0x0000] = 19; From f2a840908380f876a1b5675e9cae281c8db77776 Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 17 Nov 2023 21:58:29 +0200 Subject: [PATCH 06/28] kernel: Manually specify aslr region start --- src/core/hle/kernel/k_page_table_base.cpp | 6 ++++-- src/core/hle/kernel/k_page_table_base.h | 3 ++- src/core/hle/kernel/k_process.cpp | 12 ++++++------ src/core/hle/kernel/k_process.h | 5 +++-- src/core/hle/kernel/k_process_page_table.h | 9 +++++---- src/core/loader/deconstructed_rom_directory.cpp | 2 +- src/core/loader/kip.cpp | 3 ++- src/core/loader/nro.cpp | 3 ++- 8 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/core/hle/kernel/k_page_table_base.cpp b/src/core/hle/kernel/k_page_table_base.cpp index dc6524146..f2ffc39c1 100644 --- a/src/core/hle/kernel/k_page_table_base.cpp +++ b/src/core/hle/kernel/k_page_table_base.cpp @@ -184,7 +184,8 @@ Result KPageTableBase::InitializeForProcess(Svc::CreateProcessFlag as_type, bool KMemoryManager::Pool pool, KProcessAddress code_address, size_t code_size, KSystemResource* system_resource, KResourceLimit* resource_limit, - Core::Memory::Memory& memory) { + Core::Memory::Memory& memory, + KProcessAddress aslr_space_start) { // Calculate region extents. const size_t as_width = GetAddressSpaceWidth(as_type); const KProcessAddress start = 0; @@ -225,7 +226,8 @@ Result KPageTableBase::InitializeForProcess(Svc::CreateProcessFlag as_type, bool heap_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Heap); stack_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Stack); kernel_map_region_size = GetSpaceSize(KAddressSpaceInfo::Type::MapSmall); - m_code_region_start = GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit); + m_code_region_start = m_address_space_start + aslr_space_start + + GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit); m_code_region_end = m_code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::Map39Bit); m_alias_code_region_start = m_code_region_start; m_alias_code_region_end = m_code_region_end; diff --git a/src/core/hle/kernel/k_page_table_base.h b/src/core/hle/kernel/k_page_table_base.h index ee2c41e67..556d230b3 100644 --- a/src/core/hle/kernel/k_page_table_base.h +++ b/src/core/hle/kernel/k_page_table_base.h @@ -235,7 +235,8 @@ public: bool enable_device_address_space_merge, bool from_back, KMemoryManager::Pool pool, KProcessAddress code_address, size_t code_size, KSystemResource* system_resource, - KResourceLimit* resource_limit, Core::Memory::Memory& memory); + KResourceLimit* resource_limit, Core::Memory::Memory& memory, + KProcessAddress aslr_space_start); void Finalize(); diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 3cfb414e5..c6a200320 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -300,7 +300,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool, params.code_address, params.code_num_pages * PageSize, - m_system_resource, res_limit, this->GetMemory())); + m_system_resource, res_limit, this->GetMemory(), 0)); } ON_RESULT_FAILURE_2 { m_page_table.Finalize(); @@ -332,7 +332,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa Result KProcess::Initialize(const Svc::CreateProcessParameter& params, std::span user_caps, KResourceLimit* res_limit, - KMemoryManager::Pool pool) { + KMemoryManager::Pool pool, KProcessAddress aslr_space_start) { ASSERT(res_limit != nullptr); // Set members. @@ -393,7 +393,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool, params.code_address, code_size, m_system_resource, res_limit, - this->GetMemory())); + this->GetMemory(), aslr_space_start)); } ON_RESULT_FAILURE_2 { m_page_table.Finalize(); @@ -1128,7 +1128,7 @@ KProcess::KProcess(KernelCore& kernel) KProcess::~KProcess() = default; Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, - bool is_hbl) { + KProcessAddress aslr_space_start, bool is_hbl) { // Create a resource limit for the process. const auto physical_memory_size = m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application); @@ -1179,7 +1179,7 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std: .name = {}, .version = {}, .program_id = metadata.GetTitleID(), - .code_address = code_address, + .code_address = code_address + GetInteger(aslr_space_start), .code_num_pages = static_cast(code_size / PageSize), .flags = flag, .reslimit = Svc::InvalidHandle, @@ -1193,7 +1193,7 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std: // Initialize for application process. R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit, - KMemoryManager::Pool::Application)); + KMemoryManager::Pool::Application, aslr_space_start)); // Assign remaining properties. m_is_hbl = is_hbl; diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index 8339465fd..54b8e0a59 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -150,7 +150,8 @@ public: std::span caps, KResourceLimit* res_limit, KMemoryManager::Pool pool, bool immortal); Result Initialize(const Svc::CreateProcessParameter& params, std::span user_caps, - KResourceLimit* res_limit, KMemoryManager::Pool pool); + KResourceLimit* res_limit, KMemoryManager::Pool pool, + KProcessAddress aslr_space_start); void Exit(); const char* GetName() const { @@ -479,7 +480,7 @@ public: public: Result LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, - bool is_hbl); + KProcessAddress aslr_space_start, bool is_hbl); void LoadModule(CodeSet code_set, KProcessAddress base_addr); diff --git a/src/core/hle/kernel/k_process_page_table.h b/src/core/hle/kernel/k_process_page_table.h index b7ae5abd0..9e40f68bc 100644 --- a/src/core/hle/kernel/k_process_page_table.h +++ b/src/core/hle/kernel/k_process_page_table.h @@ -23,10 +23,11 @@ public: Result Initialize(Svc::CreateProcessFlag as_type, bool enable_aslr, bool enable_das_merge, bool from_back, KMemoryManager::Pool pool, KProcessAddress code_address, size_t code_size, KSystemResource* system_resource, - KResourceLimit* resource_limit, Core::Memory::Memory& memory) { - R_RETURN(m_page_table.InitializeForProcess(as_type, enable_aslr, enable_das_merge, - from_back, pool, code_address, code_size, - system_resource, resource_limit, memory)); + KResourceLimit* resource_limit, Core::Memory::Memory& memory, + KProcessAddress aslr_space_start) { + R_RETURN(m_page_table.InitializeForProcess( + as_type, enable_aslr, enable_das_merge, from_back, pool, code_address, code_size, + system_resource, resource_limit, memory, aslr_space_start)); } void Finalize() { diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 5c36b71e5..48c0edaea 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -147,7 +147,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect } // Setup the process code layout - if (process.LoadFromMetadata(metadata, code_size, is_hbl).IsError()) { + if (process.LoadFromMetadata(metadata, code_size, 0, is_hbl).IsError()) { return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; } diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index bf56a08b4..cd6982921 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp @@ -91,7 +91,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::KProcess& process, // Setup the process code layout if (process - .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), false) + .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), 0, + false) .IsError()) { return {ResultStatus::ErrorNotInitialized, {}}; } diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 69f1a54ed..dfed296a5 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -197,7 +197,8 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector& data) // Setup the process code layout if (process - .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), false) + .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), 0, + false) .IsError()) { return false; } From 15331c2a6007656171275fcbb41f912c1297c358 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 17 Nov 2023 22:03:42 +0200 Subject: [PATCH 07/28] settings: Add cpu backend setting --- src/common/settings.cpp | 5 +++++ src/common/settings.h | 10 ++++++++++ src/common/settings_enums.h | 2 ++ 3 files changed, 17 insertions(+) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index a10131eb2..19dfe08da 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -41,6 +41,7 @@ SWITCHABLE(AspectRatio, true); SWITCHABLE(AstcDecodeMode, true); SWITCHABLE(AstcRecompression, true); SWITCHABLE(AudioMode, true); +SWITCHABLE(CpuBackend, true); SWITCHABLE(CpuAccuracy, true); SWITCHABLE(FullscreenMode, true); SWITCHABLE(GpuAccuracy, true); @@ -155,6 +156,10 @@ bool IsFastmemEnabled() { return true; } +bool IsNceEnabled(bool is_64bit) { + return values.cpu_backend.GetValue() == CpuBackend::Nce && is_64bit; +} + bool IsDockedMode() { return values.use_docked_mode.GetValue() == Settings::ConsoleMode::Docked; } diff --git a/src/common/settings.h b/src/common/settings.h index e75099b89..389c747cb 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -63,6 +63,7 @@ SWITCHABLE(AspectRatio, true); SWITCHABLE(AstcDecodeMode, true); SWITCHABLE(AstcRecompression, true); SWITCHABLE(AudioMode, true); +SWITCHABLE(CpuBackend, true); SWITCHABLE(CpuAccuracy, true); SWITCHABLE(FullscreenMode, true); SWITCHABLE(GpuAccuracy, true); @@ -179,6 +180,14 @@ struct Values { &use_speed_limit}; // Cpu + SwitchableSetting cpu_backend{ + linkage, CpuBackend::Dynarmic, CpuBackend::Dynarmic, +#ifdef ARCHITECTURE_arm64 + CpuBackend::Nce, +#else + CpuBackend::Dynarmic, +#endif + "cpu_backend", Category::Cpu}; SwitchableSetting cpu_accuracy{linkage, CpuAccuracy::Auto, CpuAccuracy::Auto, CpuAccuracy::Paranoid, "cpu_accuracy", Category::Cpu}; @@ -564,6 +573,7 @@ bool IsGPULevelExtreme(); bool IsGPULevelHigh(); bool IsFastmemEnabled(); +bool IsNceEnabled(bool is_64bit = true); bool IsDockedMode(); diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 11429d7a8..d6351e57e 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -129,6 +129,8 @@ ENUM(ShaderBackend, Glsl, Glasm, SpirV); ENUM(GpuAccuracy, Normal, High, Extreme); +ENUM(CpuBackend, Dynarmic, Nce); + ENUM(CpuAccuracy, Auto, Accurate, Unsafe, Paranoid); ENUM(MemoryLayout, Memory_4Gb, Memory_6Gb, Memory_8Gb); From 2e02efbdd047ce125c7d0af418619f4e120df798 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 17 Nov 2023 22:06:37 +0200 Subject: [PATCH 08/28] externals: Add oaknut submodule --- .gitmodules | 3 +++ externals/CMakeLists.txt | 4 ++++ externals/oaknut | 1 + 3 files changed, 8 insertions(+) create mode 160000 externals/oaknut diff --git a/.gitmodules b/.gitmodules index 45dd0d259..e65997afc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -61,3 +61,6 @@ [submodule "simpleini"] path = externals/simpleini url = https://github.com/brofield/simpleini.git +[submodule "oaknut"] + path = externals/oaknut + url = https://github.com/merryhime/oaknut diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 515e3f2a4..1d2be1459 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -26,6 +26,10 @@ if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmi add_library(dynarmic::dynarmic ALIAS dynarmic) endif() +if (ARCHITECTURE_arm64 AND NOT TARGET merry::oaknut) + add_subdirectory(oaknut) +endif() + # getopt if (MSVC) add_subdirectory(getopt) diff --git a/externals/oaknut b/externals/oaknut new file mode 160000 index 000000000..918bd94f0 --- /dev/null +++ b/externals/oaknut @@ -0,0 +1 @@ +Subproject commit 918bd94f025d6a2de13978468351598997ae3909 From 48388376206aaa7d887b41030019035a06203867 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 17 Nov 2023 22:23:48 +0200 Subject: [PATCH 09/28] device_memory: Enable direct mapped addresses for nce --- src/common/settings.cpp | 10 ++++++++-- src/common/settings.h | 3 ++- src/core/core.cpp | 3 ++- src/core/device_memory.cpp | 11 ++++++++--- src/core/device_memory.h | 2 +- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 19dfe08da..167e984a6 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -156,8 +156,14 @@ bool IsFastmemEnabled() { return true; } -bool IsNceEnabled(bool is_64bit) { - return values.cpu_backend.GetValue() == CpuBackend::Nce && is_64bit; +static bool is_nce_enabled = false; + +void SetNceEnabled(bool is_64bit) { + is_nce_enabled = values.cpu_backend.GetValue() == CpuBackend::Nce && is_64bit; +} + +bool IsNceEnabled() { + return is_nce_enabled; } bool IsDockedMode() { diff --git a/src/common/settings.h b/src/common/settings.h index 389c747cb..fea639ee3 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -573,7 +573,8 @@ bool IsGPULevelExtreme(); bool IsGPULevelHigh(); bool IsFastmemEnabled(); -bool IsNceEnabled(bool is_64bit = true); +void SetNceEnabled(bool is_64bit); +bool IsNceEnabled(); bool IsDockedMode(); diff --git a/src/core/core.cpp b/src/core/core.cpp index 14d6c8c27..1d557fb43 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -136,7 +136,8 @@ struct System::Impl { } void Initialize(System& system) { - device_memory = std::make_unique(); + const bool direct_mapped_address = Settings::IsNceEnabled(); + device_memory = std::make_unique(direct_mapped_address); is_multicore = Settings::values.use_multi_core.GetValue(); extended_memory_layout = diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp index de3f8ef8f..0528a8e3b 100644 --- a/src/core/device_memory.cpp +++ b/src/core/device_memory.cpp @@ -6,15 +6,20 @@ namespace Core { -#ifdef ANDROID +#ifdef ARCHITECTURE_arm64 constexpr size_t VirtualReserveSize = 1ULL << 38; #else constexpr size_t VirtualReserveSize = 1ULL << 39; #endif -DeviceMemory::DeviceMemory() +DeviceMemory::DeviceMemory(bool direct_mapped_address) : buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(), - VirtualReserveSize} {} + VirtualReserveSize} { + if (direct_mapped_address) { + buffer.EnableDirectMappedAddress(); + } +} + DeviceMemory::~DeviceMemory() = default; } // namespace Core diff --git a/src/core/device_memory.h b/src/core/device_memory.h index 13388b73e..368f19e86 100644 --- a/src/core/device_memory.h +++ b/src/core/device_memory.h @@ -18,7 +18,7 @@ enum : u64 { class DeviceMemory { public: - explicit DeviceMemory(); + explicit DeviceMemory(bool direct_mapped_address); ~DeviceMemory(); DeviceMemory& operator=(const DeviceMemory&) = delete; From 9f91ba1f7357c61dd2c7c3b437ea203d467fd400 Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 17 Nov 2023 23:44:53 +0200 Subject: [PATCH 10/28] arm: Implement native code execution backend --- externals/CMakeLists.txt | 8 +- src/common/host_memory.cpp | 10 +- src/common/settings.cpp | 4 +- src/common/settings.h | 2 +- src/core/CMakeLists.txt | 16 + src/core/arm/arm_interface.h | 3 + src/core/arm/nce/arm_nce.cpp | 395 +++++++++++++++ src/core/arm/nce/arm_nce.h | 108 ++++ src/core/arm/nce/arm_nce.s | 222 ++++++++ src/core/arm/nce/arm_nce_asm_definitions.h | 29 ++ src/core/arm/nce/guest_context.h | 50 ++ src/core/arm/nce/instructions.h | 147 ++++++ src/core/arm/nce/patch.cpp | 472 ++++++++++++++++++ src/core/arm/nce/patch.h | 107 ++++ src/core/core.cpp | 4 +- src/core/cpu_manager.cpp | 2 + src/core/device_memory.cpp | 8 +- src/core/device_memory.h | 2 +- src/core/hle/kernel/code_set.h | 9 + src/core/hle/kernel/k_address_space_info.cpp | 4 +- src/core/hle/kernel/k_process.cpp | 11 + src/core/hle/kernel/k_process.h | 9 + src/core/hle/kernel/k_thread.h | 16 + src/core/hle/kernel/physical_core.cpp | 14 +- .../loader/deconstructed_rom_directory.cpp | 61 ++- src/core/loader/nro.cpp | 62 ++- src/core/loader/nro.h | 2 +- src/core/loader/nso.cpp | 56 ++- src/core/loader/nso.h | 7 +- src/core/memory.cpp | 13 + src/core/memory.h | 1 + 31 files changed, 1803 insertions(+), 51 deletions(-) create mode 100644 src/core/arm/nce/arm_nce.cpp create mode 100644 src/core/arm/nce/arm_nce.h create mode 100644 src/core/arm/nce/arm_nce.s create mode 100644 src/core/arm/nce/arm_nce_asm_definitions.h create mode 100644 src/core/arm/nce/guest_context.h create mode 100644 src/core/arm/nce/instructions.h create mode 100644 src/core/arm/nce/patch.cpp create mode 100644 src/core/arm/nce/patch.h diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 1d2be1459..8f83d4991 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -20,16 +20,16 @@ if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak) endif() # Dynarmic +if (ARCHITECTURE_arm64 AND NOT TARGET merry::oaknut) + add_subdirectory(oaknut) +endif() + if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic) set(DYNARMIC_IGNORE_ASSERTS ON) add_subdirectory(dynarmic) add_library(dynarmic::dynarmic ALIAS dynarmic) endif() -if (ARCHITECTURE_arm64 AND NOT TARGET merry::oaknut) - add_subdirectory(oaknut) -endif() - # getopt if (MSVC) add_subdirectory(getopt) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 3e4b34de6..38d7b29f7 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -189,6 +189,11 @@ public: } } + void EnableDirectMappedAddress() { + // TODO + UNREACHABLE(); + } + const size_t backing_size; ///< Size of the backing memory in bytes const size_t virtual_size; ///< Size of the virtual address placeholder in bytes @@ -340,11 +345,6 @@ private: return false; } - void EnableDirectMappedAddress() { - // TODO - UNREACHABLE(); - } - HANDLE process{}; ///< Current process handle HANDLE backing_handle{}; ///< File based backing memory diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 167e984a6..81a036ef0 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -158,8 +158,8 @@ bool IsFastmemEnabled() { static bool is_nce_enabled = false; -void SetNceEnabled(bool is_64bit) { - is_nce_enabled = values.cpu_backend.GetValue() == CpuBackend::Nce && is_64bit; +void SetNceEnabled(bool is_39bit) { + is_nce_enabled = values.cpu_backend.GetValue() == CpuBackend::Nce && is_39bit; } bool IsNceEnabled() { diff --git a/src/common/settings.h b/src/common/settings.h index fea639ee3..648e0be0d 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -181,7 +181,7 @@ struct Values { // Cpu SwitchableSetting cpu_backend{ - linkage, CpuBackend::Dynarmic, CpuBackend::Dynarmic, + linkage, CpuBackend::Nce, CpuBackend::Dynarmic, #ifdef ARCHITECTURE_arm64 CpuBackend::Nce, #else diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 66c10fc3f..c5805ec61 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -926,6 +926,22 @@ if (ENABLE_WEB_SERVICE) target_link_libraries(core PRIVATE web_service) endif() +if (ARCHITECTURE_arm64) + enable_language(C ASM) + set(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp") + + target_sources(core PRIVATE + arm/nce/arm_nce.cpp + arm/nce/arm_nce.h + arm/nce/arm_nce.s + arm/nce/guest_context.h + arm/nce/patch.cpp + arm/nce/patch.h + arm/nce/instructions.h + ) + target_link_libraries(core PRIVATE merry::oaknut) +endif() + if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) target_sources(core PRIVATE arm/dynarmic/arm_dynarmic.h diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 3d866ff6f..a9d9ac09d 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -81,6 +81,9 @@ public: // thread context to be 800 bytes in size. static_assert(sizeof(ThreadContext64) == 0x320); + /// Perform any backend-specific initialization. + virtual void Initialize() {} + /// Runs the CPU until an event happens void Run(); diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp new file mode 100644 index 000000000..511248a0d --- /dev/null +++ b/src/core/arm/nce/arm_nce.cpp @@ -0,0 +1,395 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include "common/scope_exit.h" +#include "common/signal_chain.h" +#include "core/arm/nce/arm_nce.h" +#include "core/arm/nce/patch.h" +#include "core/core.h" +#include "core/memory.h" + +#include "core/hle/kernel/k_process.h" + +#include +#include +#include + +namespace Core { + +namespace { + +struct sigaction g_orig_action; + +// Verify assembly offsets. +using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; +static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0NativeContext); +static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock); +static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic); + +fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { + _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); + while (header->magic != FPSIMD_MAGIC) { + header = reinterpret_cast<_aarch64_ctx*>((char*)header + header->size); + } + return reinterpret_cast(header); +} + +} // namespace + +void* ARM_NCE::RestoreGuestContext(void* raw_context) { + // Retrieve the host context. + auto& host_ctx = static_cast(raw_context)->uc_mcontext; + + // Thread-local parameters will be located in x9. + auto* tpidr = reinterpret_cast(host_ctx.regs[9]); + auto* guest_ctx = static_cast(tpidr->native_context); + + // Retrieve the host floating point state. + auto* fpctx = GetFloatingPointState(host_ctx); + + // Save host callee-saved registers. + std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8], + sizeof(guest_ctx->host_ctx.host_saved_vregs)); + std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19], + sizeof(guest_ctx->host_ctx.host_saved_regs)); + + // Save stack pointer. + guest_ctx->host_ctx.host_sp = host_ctx.sp; + + // Restore all guest state except tpidr_el0. + host_ctx.sp = guest_ctx->sp; + host_ctx.pc = guest_ctx->pc; + host_ctx.pstate = guest_ctx->pstate; + fpctx->fpcr = guest_ctx->fpcr; + fpctx->fpsr = guest_ctx->fpsr; + std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs)); + std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs)); + + // Return the new thread-local storage pointer. + return tpidr; +} + +void ARM_NCE::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) { + // Retrieve the host context. + auto& host_ctx = static_cast(raw_context)->uc_mcontext; + + // Retrieve the host floating point state. + auto* fpctx = GetFloatingPointState(host_ctx); + + // Save all guest registers except tpidr_el0. + std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs)); + std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs)); + guest_ctx->fpsr = fpctx->fpsr; + guest_ctx->fpcr = fpctx->fpcr; + guest_ctx->pstate = static_cast(host_ctx.pstate); + guest_ctx->pc = host_ctx.pc; + guest_ctx->sp = host_ctx.sp; + + // Restore stack pointer. + host_ctx.sp = guest_ctx->host_ctx.host_sp; + + // Restore host callee-saved registers. + std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(), + sizeof(guest_ctx->host_ctx.host_saved_regs)); + std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(), + sizeof(guest_ctx->host_ctx.host_saved_vregs)); + + // Return from the call on exit by setting pc to x30. + host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11]; + + // Clear esr_el1 and return it. + host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0); +} + +bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) { + auto& host_ctx = static_cast(raw_context)->uc_mcontext; + auto* info = static_cast(raw_info); + + // Try to handle an invalid access. + // TODO: handle accesses which split a page? + const Common::ProcessAddress addr = + (reinterpret_cast(info->si_addr) & ~Memory::YUZU_PAGEMASK); + if (guest_ctx->system->ApplicationMemory().InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) { + // We handled the access successfully and are returning to guest code. + return true; + } + + // We can't handle the access, so trigger an exception. + const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast(info->si_addr); + guest_ctx->esr_el1.fetch_or( + static_cast(is_prefetch_abort ? HaltReason::PrefetchAbort : HaltReason::DataAbort)); + + // Forcibly mark the context as locked. We are still running. + // We may race with SignalInterrupt here: + // - If we lose the race, then SignalInterrupt will send us a signal which are masking, + // and it will do nothing when it is unmasked, as we have already left guest code. + // - If we win the race, then SignalInterrupt will wait for us to unlock first. + auto& thread_params = guest_ctx->parent->running_thread->GetNativeExecutionParameters(); + thread_params.lock.store(SpinLockLocked); + + // Return to host. + SaveGuestContext(guest_ctx, raw_context); + return false; +} + +void ARM_NCE::HandleHostFault(int sig, void* raw_info, void* raw_context) { + return g_orig_action.sa_sigaction(sig, static_cast(raw_info), raw_context); +} + +HaltReason ARM_NCE::RunJit() { + // Get the thread parameters. + // TODO: pass the current thread down from ::Run + auto* thread = Kernel::GetCurrentThreadPointer(system.Kernel()); + auto* thread_params = &thread->GetNativeExecutionParameters(); + + { + // Lock our core context. + std::scoped_lock lk{lock}; + + // We should not be running. + ASSERT(running_thread == nullptr); + + // Check if we need to run. If we have already been halted, we are done. + u64 halt = guest_ctx.esr_el1.exchange(0); + if (halt != 0) { + return static_cast(halt); + } + + // Mark that we are running. + running_thread = thread; + + // Acquire the lock on the thread parameters. + // This allows us to force synchronization with SignalInterrupt. + LockThreadParameters(thread_params); + } + + // Assign current members. + guest_ctx.parent = this; + thread_params->native_context = &guest_ctx; + thread_params->tpidr_el0 = guest_ctx.tpidr_el0; + thread_params->tpidrro_el0 = guest_ctx.tpidrro_el0; + thread_params->is_running = true; + + HaltReason halt{}; + + // TODO: finding and creating the post handler needs to be locked + // to deal with dynamic loading of NROs. + const auto& post_handlers = system.ApplicationProcess()->GetPostHandlers(); + if (auto it = post_handlers.find(guest_ctx.pc); it != post_handlers.end()) { + halt = ReturnToRunCodeByTrampoline(thread_params, &guest_ctx, it->second); + } else { + halt = ReturnToRunCodeByExceptionLevelChange(thread_id, thread_params); + } + + // Unload members. + // The thread does not change, so we can persist the old reference. + guest_ctx.tpidr_el0 = thread_params->tpidr_el0; + thread_params->native_context = nullptr; + thread_params->is_running = false; + + // Unlock the thread parameters. + UnlockThreadParameters(thread_params); + + { + // Lock the core context. + std::scoped_lock lk{lock}; + + // On exit, we no longer have an active thread. + running_thread = nullptr; + } + + // Return the halt reason. + return halt; +} + +HaltReason ARM_NCE::StepJit() { + return HaltReason::StepThread; +} + +u32 ARM_NCE::GetSvcNumber() const { + return guest_ctx.svc_swi; +} + +ARM_NCE::ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_) + : ARM_Interface{system_, uses_wall_clock_}, core_index{core_index_} { + guest_ctx.system = &system_; +} + +ARM_NCE::~ARM_NCE() = default; + +void ARM_NCE::Initialize() { + thread_id = gettid(); + + // Setup our signals + static std::once_flag flag; + std::call_once(flag, [] { + using HandlerType = decltype(sigaction::sa_sigaction); + + sigset_t signal_mask; + sigemptyset(&signal_mask); + sigaddset(&signal_mask, ReturnToRunCodeByExceptionLevelChangeSignal); + sigaddset(&signal_mask, BreakFromRunCodeSignal); + sigaddset(&signal_mask, GuestFaultSignal); + + struct sigaction return_to_run_code_action {}; + return_to_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK; + return_to_run_code_action.sa_sigaction = reinterpret_cast( + &ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler); + return_to_run_code_action.sa_mask = signal_mask; + Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action, + nullptr); + + struct sigaction break_from_run_code_action {}; + break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK; + break_from_run_code_action.sa_sigaction = + reinterpret_cast(&ARM_NCE::BreakFromRunCodeSignalHandler); + break_from_run_code_action.sa_mask = signal_mask; + Common::SigAction(BreakFromRunCodeSignal, &break_from_run_code_action, nullptr); + + struct sigaction fault_action {}; + fault_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART; + fault_action.sa_sigaction = + reinterpret_cast(&ARM_NCE::GuestFaultSignalHandler); + fault_action.sa_mask = signal_mask; + Common::SigAction(GuestFaultSignal, &fault_action, &g_orig_action); + + // Simplify call for g_orig_action. + // These fields occupy the same space in memory, so this should be a no-op in practice. + if (!(g_orig_action.sa_flags & SA_SIGINFO)) { + g_orig_action.sa_sigaction = + reinterpret_cast(g_orig_action.sa_handler); + } + }); +} + +void ARM_NCE::SetPC(u64 pc) { + guest_ctx.pc = pc; +} + +u64 ARM_NCE::GetPC() const { + return guest_ctx.pc; +} + +u64 ARM_NCE::GetSP() const { + return guest_ctx.sp; +} + +u64 ARM_NCE::GetReg(int index) const { + return guest_ctx.cpu_registers[index]; +} + +void ARM_NCE::SetReg(int index, u64 value) { + guest_ctx.cpu_registers[index] = value; +} + +u128 ARM_NCE::GetVectorReg(int index) const { + return guest_ctx.vector_registers[index]; +} + +void ARM_NCE::SetVectorReg(int index, u128 value) { + guest_ctx.vector_registers[index] = value; +} + +u32 ARM_NCE::GetPSTATE() const { + return guest_ctx.pstate; +} + +void ARM_NCE::SetPSTATE(u32 pstate) { + guest_ctx.pstate = pstate; +} + +u64 ARM_NCE::GetTlsAddress() const { + return guest_ctx.tpidrro_el0; +} + +void ARM_NCE::SetTlsAddress(u64 address) { + guest_ctx.tpidrro_el0 = address; +} + +u64 ARM_NCE::GetTPIDR_EL0() const { + return guest_ctx.tpidr_el0; +} + +void ARM_NCE::SetTPIDR_EL0(u64 value) { + guest_ctx.tpidr_el0 = value; +} + +void ARM_NCE::SaveContext(ThreadContext64& ctx) const { + ctx.cpu_registers = guest_ctx.cpu_registers; + ctx.sp = guest_ctx.sp; + ctx.pc = guest_ctx.pc; + ctx.pstate = guest_ctx.pstate; + ctx.vector_registers = guest_ctx.vector_registers; + ctx.fpcr = guest_ctx.fpcr; + ctx.fpsr = guest_ctx.fpsr; + ctx.tpidr = guest_ctx.tpidr_el0; +} + +void ARM_NCE::LoadContext(const ThreadContext64& ctx) { + guest_ctx.cpu_registers = ctx.cpu_registers; + guest_ctx.sp = ctx.sp; + guest_ctx.pc = ctx.pc; + guest_ctx.pstate = ctx.pstate; + guest_ctx.vector_registers = ctx.vector_registers; + guest_ctx.fpcr = ctx.fpcr; + guest_ctx.fpsr = ctx.fpsr; + guest_ctx.tpidr_el0 = ctx.tpidr; +} + +void ARM_NCE::SignalInterrupt() { + // Lock core context. + std::scoped_lock lk{lock}; + + // Add break loop condition. + guest_ctx.esr_el1.fetch_or(static_cast(HaltReason::BreakLoop)); + + // If there is no thread running, we are done. + if (running_thread == nullptr) { + return; + } + + // Lock the thread context. + auto* params = &running_thread->GetNativeExecutionParameters(); + LockThreadParameters(params); + + if (params->is_running) { + // We should signal to the running thread. + // The running thread will unlock the thread context. + syscall(SYS_tkill, thread_id, BreakFromRunCodeSignal); + } else { + // If the thread is no longer running, we have nothing to do. + UnlockThreadParameters(params); + } +} + +void ARM_NCE::ClearInterrupt() { + guest_ctx.esr_el1 = {}; +} + +void ARM_NCE::ClearInstructionCache() { + // TODO: This is not possible to implement correctly on Linux because + // we do not have any access to ic iallu. + + // Require accesses to complete. + std::atomic_thread_fence(std::memory_order_seq_cst); +} + +void ARM_NCE::InvalidateCacheRange(u64 addr, std::size_t size) { + // Clean cache. + auto* ptr = reinterpret_cast(addr); + __builtin___clear_cache(ptr, ptr + size); +} + +void ARM_NCE::ClearExclusiveState() { + // No-op. +} + +void ARM_NCE::PageTableChanged(Common::PageTable& page_table, + std::size_t new_address_space_size_in_bits) { + // No-op. Page table is never used. +} + +} // namespace Core diff --git a/src/core/arm/nce/arm_nce.h b/src/core/arm/nce/arm_nce.h new file mode 100644 index 000000000..5fbd6dbf3 --- /dev/null +++ b/src/core/arm/nce/arm_nce.h @@ -0,0 +1,108 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include + +#include "core/arm/arm_interface.h" +#include "core/arm/nce/guest_context.h" + +namespace Core::Memory { +class Memory; +} + +namespace Core { + +class System; + +class ARM_NCE final : public ARM_Interface { +public: + ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_); + + ~ARM_NCE() override; + + void Initialize() override; + void SetPC(u64 pc) override; + u64 GetPC() const override; + u64 GetSP() const override; + u64 GetReg(int index) const override; + void SetReg(int index, u64 value) override; + u128 GetVectorReg(int index) const override; + void SetVectorReg(int index, u128 value) override; + + u32 GetPSTATE() const override; + void SetPSTATE(u32 pstate) override; + u64 GetTlsAddress() const override; + void SetTlsAddress(u64 address) override; + void SetTPIDR_EL0(u64 value) override; + u64 GetTPIDR_EL0() const override; + + Architecture GetArchitecture() const override { + return Architecture::Aarch64; + } + + void SaveContext(ThreadContext32& ctx) const override {} + void SaveContext(ThreadContext64& ctx) const override; + void LoadContext(const ThreadContext32& ctx) override {} + void LoadContext(const ThreadContext64& ctx) override; + + void SignalInterrupt() override; + void ClearInterrupt() override; + void ClearExclusiveState() override; + void ClearInstructionCache() override; + void InvalidateCacheRange(u64 addr, std::size_t size) override; + void PageTableChanged(Common::PageTable& new_page_table, + std::size_t new_address_space_size_in_bits) override; + +protected: + HaltReason RunJit() override; + HaltReason StepJit() override; + + u32 GetSvcNumber() const override; + + const Kernel::DebugWatchpoint* HaltedWatchpoint() const override { + return nullptr; + } + + void RewindBreakpointInstruction() override {} + +private: + // Assembly definitions. + static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx, + u64 trampoline_addr); + static HaltReason ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr); + + static void ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, + void* raw_context); + static void BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context); + static void GuestFaultSignalHandler(int sig, void* info, void* raw_context); + + static void LockThreadParameters(void* tpidr); + static void UnlockThreadParameters(void* tpidr); + +private: + // C++ implementation functions for assembly definitions. + static void* RestoreGuestContext(void* raw_context); + static void SaveGuestContext(GuestContext* ctx, void* raw_context); + static bool HandleGuestFault(GuestContext* ctx, void* info, void* raw_context); + static void HandleHostFault(int sig, void* info, void* raw_context); + +public: + // Members set on initialization. + std::size_t core_index{}; + pid_t thread_id{-1}; + + // Core context. + GuestContext guest_ctx; + + // Thread and invalidation info. + std::mutex lock; + Kernel::KThread* running_thread{}; +}; + +} // namespace Core diff --git a/src/core/arm/nce/arm_nce.s b/src/core/arm/nce/arm_nce.s new file mode 100644 index 000000000..b98e09f31 --- /dev/null +++ b/src/core/arm/nce/arm_nce.s @@ -0,0 +1,222 @@ +/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "core/arm/nce/arm_nce_asm_definitions.h" + +#define LOAD_IMMEDIATE_32(reg, val) \ + mov reg, #(((val) >> 0x00) & 0xFFFF); \ + movk reg, #(((val) >> 0x10) & 0xFFFF), lsl #16 + + +/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */ +.section .text._ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits +.global _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm +.type _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function +_ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm: + /* Back up host sp to x3. */ + /* Back up host tpidr_el0 to x4. */ + mov x3, sp + mrs x4, tpidr_el0 + + /* Load guest sp. x5 is used as a scratch register. */ + ldr x5, [x1, #(GuestContextSp)] + mov sp, x5 + + /* Offset GuestContext pointer to the host member. */ + add x5, x1, #(GuestContextHostContext) + + /* Save original host sp and tpidr_el0 (x3, x4) to host context. */ + stp x3, x4, [x5, #(HostContextSpTpidrEl0)] + + /* Save all callee-saved host GPRs. */ + stp x19, x20, [x5, #(HostContextRegs+0x0)] + stp x21, x22, [x5, #(HostContextRegs+0x10)] + stp x23, x24, [x5, #(HostContextRegs+0x20)] + stp x25, x26, [x5, #(HostContextRegs+0x30)] + stp x27, x28, [x5, #(HostContextRegs+0x40)] + stp x29, x30, [x5, #(HostContextRegs+0x50)] + + /* Save all callee-saved host FPRs. */ + stp q8, q9, [x5, #(HostContextVregs+0x0)] + stp q10, q11, [x5, #(HostContextVregs+0x20)] + stp q12, q13, [x5, #(HostContextVregs+0x40)] + stp q14, q15, [x5, #(HostContextVregs+0x60)] + + /* Load guest tpidr_el0 from argument. */ + msr tpidr_el0, x0 + + /* Tail call the trampoline to restore guest state. */ + br x2 + + +/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */ +.section .text._ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits +.global _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv +.type _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, %function +_ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv: + /* This jumps to the signal handler, which will restore the entire context. */ + /* On entry, x0 = thread id, which is already in the right place. */ + + /* Move tpidr to x9 so it is not trampled. */ + mov x9, x1 + + /* Set up arguments. */ + mov x8, #(__NR_tkill) + mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal) + + /* Tail call the signal handler. */ + svc #0 + + /* Block execution from flowing here. */ + brk #1000 + + +/* static void Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */ +.section .text._ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits +.global _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_ +.type _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function +_ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_: + stp x29, x30, [sp, #-0x10]! + mov x29, sp + + /* Call the context restorer with the raw context. */ + mov x0, x2 + bl _ZN4Core7ARM_NCE19RestoreGuestContextEPv + + /* Save the old value of tpidr_el0. */ + mrs x8, tpidr_el0 + ldr x9, [x0, #(TpidrEl0NativeContext)] + str x8, [x9, #(GuestContextHostContext + HostContextTpidrEl0)] + + /* Set our new tpidr_el0. */ + msr tpidr_el0, x0 + + /* Unlock the context. */ + bl _ZN4Core7ARM_NCE22UnlockThreadParametersEPv + + /* Returning from here will enter the guest. */ + ldp x29, x30, [sp], #0x10 + ret + + +/* static void Core::ARM_NCE::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */ +.section .text._ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits +.global _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_ +.type _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, %function +_ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_: + /* Check to see if we have the correct TLS magic. */ + mrs x8, tpidr_el0 + ldr w9, [x8, #(TpidrEl0TlsMagic)] + + LOAD_IMMEDIATE_32(w10, TlsMagic) + + cmp w9, w10 + b.ne 1f + + /* Correct TLS magic, so this is a guest interrupt. */ + /* Restore host tpidr_el0. */ + ldr x0, [x8, #(TpidrEl0NativeContext)] + ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)] + msr tpidr_el0, x3 + + /* Tail call the restorer. */ + mov x1, x2 + b _ZN4Core7ARM_NCE16SaveGuestContextEPNS_12GuestContextEPv + + /* Returning from here will enter host code. */ + +1: + /* Incorrect TLS magic, so this is a spurious signal. */ + ret + + +/* static void Core::ARM_NCE::GuestFaultSignalHandler(int sig, void* info, void* raw_context) */ +.section .text._ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, "ax", %progbits +.global _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_ +.type _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, %function +_ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_: + /* Check to see if we have the correct TLS magic. */ + mrs x8, tpidr_el0 + ldr w9, [x8, #(TpidrEl0TlsMagic)] + + LOAD_IMMEDIATE_32(w10, TlsMagic) + + cmp w9, w10 + b.eq 1f + + /* Incorrect TLS magic, so this is a host fault. */ + /* Tail call the handler. */ + b _ZN4Core7ARM_NCE15HandleHostFaultEiPvS1_ + +1: + /* Correct TLS magic, so this is a guest fault. */ + stp x29, x30, [sp, #-0x20]! + str x19, [sp, #0x10] + mov x29, sp + + /* Save the old tpidr_el0. */ + mov x19, x8 + + /* Restore host tpidr_el0. */ + ldr x0, [x8, #(TpidrEl0NativeContext)] + ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)] + msr tpidr_el0, x3 + + /* Call the handler. */ + bl _ZN4Core7ARM_NCE16HandleGuestFaultEPNS_12GuestContextEPvS3_ + + /* If the handler returned false, we want to preserve the host tpidr_el0. */ + cbz x0, 2f + + /* Otherwise, restore guest tpidr_el0. */ + msr tpidr_el0, x19 + +2: + ldr x19, [sp, #0x10] + ldp x29, x30, [sp], #0x20 + ret + + +/* static void Core::ARM_NCE::LockThreadParameters(void* tpidr) */ +.section .text._ZN4Core7ARM_NCE20LockThreadParametersEPv, "ax", %progbits +.global _ZN4Core7ARM_NCE20LockThreadParametersEPv +.type _ZN4Core7ARM_NCE20LockThreadParametersEPv, %function +_ZN4Core7ARM_NCE20LockThreadParametersEPv: + /* Offset to lock member. */ + add x0, x0, #(TpidrEl0Lock) + +1: + /* Clear the monitor. */ + clrex + +2: + /* Load-linked with acquire ordering. */ + ldaxr w1, [x0] + + /* If the value was SpinLockLocked, clear monitor and retry. */ + cbz w1, 1b + + /* Store-conditional SpinLockLocked with relaxed ordering. */ + stxr w1, wzr, [x0] + + /* If we failed to store, retry. */ + cbnz w1, 2b + + ret + + +/* static void Core::ARM_NCE::UnlockThreadParameters(void* tpidr) */ +.section .text._ZN4Core7ARM_NCE22UnlockThreadParametersEPv, "ax", %progbits +.global _ZN4Core7ARM_NCE22UnlockThreadParametersEPv +.type _ZN4Core7ARM_NCE22UnlockThreadParametersEPv, %function +_ZN4Core7ARM_NCE22UnlockThreadParametersEPv: + /* Offset to lock member. */ + add x0, x0, #(TpidrEl0Lock) + + /* Load SpinLockUnlocked. */ + mov w1, #(SpinLockUnlocked) + + /* Store value with release ordering. */ + stlr w1, [x0] + + ret diff --git a/src/core/arm/nce/arm_nce_asm_definitions.h b/src/core/arm/nce/arm_nce_asm_definitions.h new file mode 100644 index 000000000..8a9b285b5 --- /dev/null +++ b/src/core/arm/nce/arm_nce_asm_definitions.h @@ -0,0 +1,29 @@ +/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#define __ASSEMBLY__ + +#include +#include + +#define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2 +#define BreakFromRunCodeSignal SIGURG +#define GuestFaultSignal SIGSEGV + +#define GuestContextSp 0xF8 +#define GuestContextHostContext 0x320 + +#define HostContextSpTpidrEl0 0xE0 +#define HostContextTpidrEl0 0xE8 +#define HostContextRegs 0x0 +#define HostContextVregs 0x60 + +#define TpidrEl0NativeContext 0x10 +#define TpidrEl0Lock 0x18 +#define TpidrEl0TlsMagic 0x20 +#define TlsMagic 0x555a5559 + +#define SpinLockLocked 0 +#define SpinLockUnlocked 1 diff --git a/src/core/arm/nce/guest_context.h b/src/core/arm/nce/guest_context.h new file mode 100644 index 000000000..0767a0337 --- /dev/null +++ b/src/core/arm/nce/guest_context.h @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "core/arm/arm_interface.h" +#include "core/arm/nce/arm_nce_asm_definitions.h" + +namespace Core { + +class ARM_NCE; +class System; + +struct HostContext { + alignas(16) std::array host_saved_regs{}; + alignas(16) std::array host_saved_vregs{}; + u64 host_sp{}; + void* host_tpidr_el0{}; +}; + +struct GuestContext { + std::array cpu_registers{}; + u64 sp{}; + u64 pc{}; + u32 fpcr{}; + u32 fpsr{}; + std::array vector_registers{}; + u32 pstate{}; + alignas(16) HostContext host_ctx{}; + u64 tpidrro_el0{}; + u64 tpidr_el0{}; + std::atomic esr_el1{}; + u32 nzcv{}; + u32 svc_swi{}; + System* system{}; + ARM_NCE* parent{}; +}; + +// Verify assembly offsets. +static_assert(offsetof(GuestContext, sp) == GuestContextSp); +static_assert(offsetof(GuestContext, host_ctx) == GuestContextHostContext); +static_assert(offsetof(HostContext, host_sp) == HostContextSpTpidrEl0); +static_assert(offsetof(HostContext, host_tpidr_el0) - 8 == HostContextSpTpidrEl0); +static_assert(offsetof(HostContext, host_tpidr_el0) == HostContextTpidrEl0); +static_assert(offsetof(HostContext, host_saved_regs) == HostContextRegs); +static_assert(offsetof(HostContext, host_saved_vregs) == HostContextVregs); + +} // namespace Core diff --git a/src/core/arm/nce/instructions.h b/src/core/arm/nce/instructions.h new file mode 100644 index 000000000..5b56ff857 --- /dev/null +++ b/src/core/arm/nce/instructions.h @@ -0,0 +1,147 @@ +// SPDX-FileCopyrightText: Copyright © 2020 Skyline Team and Contributors +// SPDX-License-Identifier: MPL-2.0 + +#include "common/bit_field.h" +#include "common/common_types.h" + +namespace Core::NCE { + +enum SystemRegister : u32 { + TpidrEl0 = 0x5E82, + TpidrroEl0 = 0x5E83, + CntfrqEl0 = 0x5F00, + CntpctEl0 = 0x5F01, +}; + +// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SVC--Supervisor-Call- +union SVC { + constexpr explicit SVC(u32 raw_) : raw{raw_} {} + + constexpr bool Verify() { + return (this->GetSig0() == 0x1 && this->GetSig1() == 0x6A0); + } + + constexpr u32 GetSig0() { + return decltype(sig0)::ExtractValue(raw); + } + + constexpr u32 GetValue() { + return decltype(value)::ExtractValue(raw); + } + + constexpr u32 GetSig1() { + return decltype(sig1)::ExtractValue(raw); + } + + u32 raw; + +private: + BitField<0, 5, u32> sig0; // 0x1 + BitField<5, 16, u32> value; // 16-bit immediate + BitField<21, 11, u32> sig1; // 0x6A0 +}; +static_assert(sizeof(SVC) == sizeof(u32)); +static_assert(SVC(0xD40000C1).Verify()); +static_assert(SVC(0xD40000C1).GetValue() == 0x6); + +// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MRS--Move-System-Register- +union MRS { + constexpr explicit MRS(u32 raw_) : raw{raw_} {} + + constexpr bool Verify() { + return (this->GetSig() == 0xD53); + } + + constexpr u32 GetRt() { + return decltype(rt)::ExtractValue(raw); + } + + constexpr u32 GetSystemReg() { + return decltype(system_reg)::ExtractValue(raw); + } + + constexpr u32 GetSig() { + return decltype(sig)::ExtractValue(raw); + } + + u32 raw; + +private: + BitField<0, 5, u32> rt; // destination register + BitField<5, 15, u32> system_reg; // source system register + BitField<20, 12, u32> sig; // 0xD53 +}; +static_assert(sizeof(MRS) == sizeof(u32)); +static_assert(MRS(0xD53BE020).Verify()); +static_assert(MRS(0xD53BE020).GetSystemReg() == CntpctEl0); +static_assert(MRS(0xD53BE020).GetRt() == 0x0); + +// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register- +union MSR { + constexpr explicit MSR(u32 raw_) : raw{raw_} {} + + constexpr bool Verify() { + return this->GetSig() == 0xD51; + } + + constexpr u32 GetRt() { + return decltype(rt)::ExtractValue(raw); + } + + constexpr u32 GetSystemReg() { + return decltype(system_reg)::ExtractValue(raw); + } + + constexpr u32 GetSig() { + return decltype(sig)::ExtractValue(raw); + } + + u32 raw; + +private: + BitField<0, 5, u32> rt; // source register + BitField<5, 15, u32> system_reg; // destination system register + BitField<20, 12, u32> sig; // 0xD51 +}; +static_assert(sizeof(MSR) == sizeof(u32)); +static_assert(MSR(0xD51BD040).Verify()); +static_assert(MSR(0xD51BD040).GetSystemReg() == TpidrEl0); +static_assert(MSR(0xD51BD040).GetRt() == 0x0); + +// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXR--Load-Exclusive-Register- +// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers- +// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXR--Store-Exclusive-Register- +// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers- +union Exclusive { + constexpr explicit Exclusive(u32 raw_) : raw{raw_} {} + + constexpr bool Verify() { + return this->GetSig() == 0x10; + } + + constexpr u32 GetSig() { + return decltype(sig)::ExtractValue(raw); + } + + constexpr u32 AsOrdered() { + return raw | decltype(o0)::FormatValue(1); + } + + u32 raw; + +private: + BitField<0, 5, u32> rt; // memory operand + BitField<5, 5, u32> rn; // register operand 1 + BitField<10, 5, u32> rt2; // register operand 2 + BitField<15, 1, u32> o0; // ordered + BitField<16, 5, u32> rs; // status register + BitField<21, 2, u32> l; // operation type + BitField<23, 7, u32> sig; // 0x10 + BitField<30, 2, u32> size; // size +}; +static_assert(Exclusive(0xC85FFC00).Verify()); +static_assert(Exclusive(0xC85FFC00).AsOrdered() == 0xC85FFC00); +static_assert(Exclusive(0xC85F7C00).AsOrdered() == 0xC85FFC00); +static_assert(Exclusive(0xC8200440).AsOrdered() == 0xC8208440); + +} // namespace Core::NCE diff --git a/src/core/arm/nce/patch.cpp b/src/core/arm/nce/patch.cpp new file mode 100644 index 000000000..c79399c2b --- /dev/null +++ b/src/core/arm/nce/patch.cpp @@ -0,0 +1,472 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/arm64/native_clock.h" +#include "common/bit_cast.h" +#include "common/literals.h" +#include "core/arm/nce/arm_nce.h" +#include "core/arm/nce/guest_context.h" +#include "core/arm/nce/instructions.h" +#include "core/arm/nce/patch.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/hle/kernel/svc.h" + +namespace Core::NCE { + +using namespace Common::Literals; +using namespace oaknut::util; + +using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; + +constexpr size_t MaxRelativeBranch = 128_MiB; + +Patcher::Patcher() : c(m_patch_instructions) {} + +Patcher::~Patcher() = default; + +void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, + const Kernel::CodeSet::Segment& code) { + + // Write save context helper function. + c.l(m_save_context); + WriteSaveContext(); + + // Write load context helper function. + c.l(m_load_context); + WriteLoadContext(); + + // Retrieve text segment data. + const auto text = std::span{program_image}.subspan(code.offset, code.size); + const auto text_words = + std::span{reinterpret_cast(text.data()), text.size() / sizeof(u32)}; + + // Loop through instructions, patching as needed. + for (u32 i = 0; i < static_cast(text_words.size()); i++) { + const u32 inst = text_words[i]; + + const auto AddRelocations = [&] { + const uintptr_t this_offset = i * sizeof(u32); + const uintptr_t next_offset = this_offset + sizeof(u32); + + // Relocate from here to patch. + this->BranchToPatch(this_offset); + + // Relocate from patch to next instruction. + return next_offset; + }; + + // SVC + if (auto svc = SVC{inst}; svc.Verify()) { + WriteSvcTrampoline(AddRelocations(), svc.GetValue()); + continue; + } + + // MRS Xn, TPIDR_EL0 + // MRS Xn, TPIDRRO_EL0 + if (auto mrs = MRS{inst}; + mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) { + const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0 + : oaknut::SystemReg::TPIDR_EL0; + const auto dest_reg = oaknut::XReg{static_cast(mrs.GetRt())}; + WriteMrsHandler(AddRelocations(), dest_reg, src_reg); + continue; + } + + // MRS Xn, CNTPCT_EL0 + if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) { + WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast(mrs.GetRt())}); + continue; + } + + // MRS Xn, CNTFRQ_EL0 + if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) { + UNREACHABLE(); + } + + // MSR TPIDR_EL0, Xn + if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) { + WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast(msr.GetRt())}); + continue; + } + } + + // Determine patching mode for the final relocation step + const size_t image_size = program_image.size(); + this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData; +} + +void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, + const Kernel::CodeSet::Segment& code, + Kernel::PhysicalMemory& program_image, + EntryTrampolines* out_trampolines) { + const size_t patch_size = SectionSize(); + const size_t image_size = program_image.size(); + + // Retrieve text segment data. + const auto text = std::span{program_image}.subspan(code.offset, code.size); + const auto text_words = + std::span{reinterpret_cast(text.data()), text.size() / sizeof(u32)}; + + const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) { + oaknut::CodeGenerator rc{target}; + if (mode == PatchMode::PreText) { + rc.B(rel.patch_offset - patch_size - rel.module_offset); + } else { + rc.B(image_size - rel.module_offset + rel.patch_offset); + } + }; + + const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) { + oaknut::CodeGenerator rc{target}; + if (mode == PatchMode::PreText) { + rc.B(patch_size - rel.patch_offset + rel.module_offset); + } else { + rc.B(rel.module_offset - image_size - rel.patch_offset); + } + }; + + const auto RebasePatch = [&](ptrdiff_t patch_offset) { + if (mode == PatchMode::PreText) { + return GetInteger(load_base) + patch_offset; + } else { + return GetInteger(load_base) + image_size + patch_offset; + } + }; + + const auto RebasePc = [&](uintptr_t module_offset) { + if (mode == PatchMode::PreText) { + return GetInteger(load_base) + patch_size + module_offset; + } else { + return GetInteger(load_base) + module_offset; + } + }; + + // We are now ready to relocate! + for (const Relocation& rel : m_branch_to_patch_relocations) { + ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel); + } + for (const Relocation& rel : m_branch_to_module_relocations) { + ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), + rel); + } + + // Rewrite PC constants and record post trampolines + for (const Relocation& rel : m_write_module_pc_relocations) { + oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)}; + rc.dx(RebasePc(rel.module_offset)); + } + for (const Trampoline& rel : m_trampolines) { + out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)}); + } + + // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. + // Convert to ordered to preserve this assumption + for (u32 i = 0; i < static_cast(text_words.size()); i++) { + const u32 inst = text_words[i]; + if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { + text_words[i] = exclusive.AsOrdered(); + } + } + + // Copy to program image + if (this->mode == PatchMode::PreText) { + std::memcpy(program_image.data(), m_patch_instructions.data(), + m_patch_instructions.size() * sizeof(u32)); + } else { + program_image.resize(image_size + patch_size); + std::memcpy(program_image.data() + image_size, m_patch_instructions.data(), + m_patch_instructions.size() * sizeof(u32)); + } +} + +size_t Patcher::SectionSize() const noexcept { + return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE); +} + +void Patcher::WriteLoadContext() { + // This function was called, which modifies X30, so use that as a scratch register. + // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes + // of stack. + c.STR(X30, SP, 8); + c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); + c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); + + // Load system registers. + c.LDR(W0, X30, offsetof(GuestContext, fpsr)); + c.MSR(oaknut::SystemReg::FPSR, X0); + c.LDR(W0, X30, offsetof(GuestContext, fpcr)); + c.MSR(oaknut::SystemReg::FPCR, X0); + c.LDR(W0, X30, offsetof(GuestContext, nzcv)); + c.MSR(oaknut::SystemReg::NZCV, X0); + + // Load all vector registers. + static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); + for (int i = 0; i <= 30; i += 2) { + c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); + } + + // Load all general-purpose registers except X30. + for (int i = 0; i <= 28; i += 2) { + c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); + } + + // Reload our return X30 from the stack and return. + // The patch code will reload the guest X30 for us. + c.LDR(X30, SP, 8); + c.RET(); +} + +void Patcher::WriteSaveContext() { + // This function was called, which modifies X30, so use that as a scratch register. + // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of + // stack. + c.STR(X30, SP, 8); + c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); + c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); + + // Store all general-purpose registers except X30. + for (int i = 0; i <= 28; i += 2) { + c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); + } + + // Store all vector registers. + static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); + for (int i = 0; i <= 30; i += 2) { + c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); + } + + // Store guest system registers, X30 and SP, using X0 as a scratch register. + c.STR(X0, SP, PRE_INDEXED, -16); + c.LDR(X0, SP, 16); + c.STR(X0, X30, 8 * 30); + c.ADD(X0, SP, 32); + c.STR(X0, X30, offsetof(GuestContext, sp)); + c.MRS(X0, oaknut::SystemReg::FPSR); + c.STR(W0, X30, offsetof(GuestContext, fpsr)); + c.MRS(X0, oaknut::SystemReg::FPCR); + c.STR(W0, X30, offsetof(GuestContext, fpcr)); + c.MRS(X0, oaknut::SystemReg::NZCV); + c.STR(W0, X30, offsetof(GuestContext, nzcv)); + c.LDR(X0, SP, POST_INDEXED, 16); + + // Reload our return X30 from the stack, and return. + c.LDR(X30, SP, 8); + c.RET(); +} + +void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { + LOG_ERROR(Core_ARM, "Patching SVC {:#x} at {:#x}", svc_id, module_dest - 4); + // We are about to start saving state, so we need to lock the context. + this->LockContext(); + + // Store guest X30 to the stack. Then, save the context and restore the stack. + // This will save all registers except PC, but we know PC at patch time. + c.STR(X30, SP, PRE_INDEXED, -16); + c.BL(m_save_context); + c.LDR(X30, SP, POST_INDEXED, 16); + + // Now that we've saved all registers, we can use any registers as scratch. + // Store PC + 4 to arm interface, since we know the instruction offset from the entry point. + oaknut::Label pc_after_svc; + c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); + c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); + c.LDR(X2, pc_after_svc); + c.STR(X2, X1, offsetof(GuestContext, pc)); + + // Store SVC number to execute when we return + c.MOV(X2, svc_id); + c.STR(W2, X1, offsetof(GuestContext, svc_swi)); + + // We are calling a SVC. Clear esr_el1 and return it. + static_assert(std::is_same_v, u64>); + oaknut::Label retry; + c.ADD(X2, X1, offsetof(GuestContext, esr_el1)); + c.l(retry); + c.LDAXR(X0, X2); + c.STLXR(W3, XZR, X2); + c.CBNZ(W3, retry); + + // Add "calling SVC" flag. Since this is X0, this is now our return value. + c.ORR(X0, X0, static_cast(HaltReason::SupervisorCall)); + + // Offset the GuestContext pointer to the HostContext member. + // STP has limited range of [-512, 504] which we can't reach otherwise + // NB: Due to this all offsets below are from the start of HostContext. + c.ADD(X1, X1, offsetof(GuestContext, host_ctx)); + + // Reload host TPIDR_EL0 and SP. + static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0)); + c.LDP(X2, X3, X1, offsetof(HostContext, host_sp)); + c.MOV(SP, X2); + c.MSR(oaknut::SystemReg::TPIDR_EL0, X3); + + // Load callee-saved host registers and return to host. + static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs); + static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs); + c.LDP(X19, X20, X1, HOST_REGS_OFF); + c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64)); + c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64)); + c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64)); + c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64)); + c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64)); + c.LDP(Q8, Q9, X1, HOST_VREGS_OFF); + c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128)); + c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128)); + c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128)); + c.RET(); + + // Write the post-SVC trampoline address, which will jump back to the guest after restoring its + // state. + m_trampolines.push_back({c.offset(), module_dest}); + + // Host called this location. Save the return address so we can + // unwind the stack properly when jumping back. + c.MRS(X2, oaknut::SystemReg::TPIDR_EL0); + c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context)); + c.ADD(X0, X2, offsetof(GuestContext, host_ctx)); + c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64)); + + // Reload all guest registers except X30 and PC. + // The function also expects 16 bytes of stack already allocated. + c.STR(X30, SP, PRE_INDEXED, -16); + c.BL(m_load_context); + c.LDR(X30, SP, POST_INDEXED, 16); + + // Use X1 as a scratch register to restore X30. + c.STR(X1, SP, PRE_INDEXED, -16); + c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); + c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); + c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30); + c.LDR(X1, SP, POST_INDEXED, 16); + + // Unlock the context. + this->UnlockContext(); + + // Jump back to the instruction after the emulated SVC. + this->BranchToModule(module_dest); + + // Store PC after call. + c.l(pc_after_svc); + this->WriteModulePc(module_dest); +} + +void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, + oaknut::SystemReg src_reg) { + // Retrieve emulated TLS register from GuestContext. + c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0); + if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) { + c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0)); + } else { + c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0)); + } + + // Jump back to the instruction after the emulated MRS. + this->BranchToModule(module_dest); +} + +void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { + const auto scratch_reg = src_reg.index() == 0 ? X1 : X0; + c.STR(scratch_reg, SP, PRE_INDEXED, -16); + + // Save guest value to NativeExecutionParameters::tpidr_el0. + c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0); + c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0)); + + // Restore scratch register. + c.LDR(scratch_reg, SP, POST_INDEXED, 16); + + // Jump back to the instruction after the emulated MSR. + this->BranchToModule(module_dest); +} + +void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { + static Common::Arm64::NativeClock clock{}; + const auto factor = clock.GetGuestCNTFRQFactor(); + const auto raw_factor = Common::BitCast>(factor); + + const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1; + oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0; + oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1; + + oaknut::Label factorlo; + oaknut::Label factorhi; + + // Save scratches. + c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16); + + // Load counter value. + c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0); + + // Load scaling factor. + c.LDR(scratch0, factorlo); + c.LDR(scratch1, factorhi); + + // Multiply low bits and get result. + c.UMULH(scratch0, dest_reg, scratch0); + + // Multiply high bits and add low bit result. + c.MADD(dest_reg, dest_reg, scratch1, scratch0); + + // Reload scratches. + c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16); + + // Jump back to the instruction after the emulated MRS. + this->BranchToModule(module_dest); + + // Scaling factor constant values. + c.l(factorlo); + c.dx(raw_factor[0]); + c.l(factorhi); + c.dx(raw_factor[1]); +} + +void Patcher::LockContext() { + oaknut::Label retry; + + // Save scratches. + c.STP(X0, X1, SP, PRE_INDEXED, -16); + + // Reload lock pointer. + c.l(retry); + c.CLREX(); + c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); + c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); + + static_assert(SpinLockLocked == 0); + + // Load-linked with acquire ordering. + c.LDAXR(W1, X0); + + // If the value was SpinLockLocked, clear monitor and retry. + c.CBZ(W1, retry); + + // Store-conditional SpinLockLocked with relaxed ordering. + c.STXR(W1, WZR, X0); + + // If we failed to store, retry. + c.CBNZ(W1, retry); + + // We succeeded! Reload scratches. + c.LDP(X0, X1, SP, POST_INDEXED, 16); +} + +void Patcher::UnlockContext() { + // Save scratches. + c.STP(X0, X1, SP, PRE_INDEXED, -16); + + // Load lock pointer. + c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); + c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); + + // Load SpinLockUnlocked. + c.MOV(W1, SpinLockUnlocked); + + // Store value with release ordering. + c.STLR(W1, X0); + + // Load scratches. + c.LDP(X0, X1, SP, POST_INDEXED, 16); +} + +} // namespace Core::NCE diff --git a/src/core/arm/nce/patch.h b/src/core/arm/nce/patch.h new file mode 100644 index 000000000..b727d4e48 --- /dev/null +++ b/src/core/arm/nce/patch.h @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#include +#include +#pragma clang diagnostic pop + +#include "common/common_types.h" +#include "core/hle/kernel/code_set.h" +#include "core/hle/kernel/k_typed_address.h" +#include "core/hle/kernel/physical_memory.h" + +#include + +namespace Core { +struct GuestContext; +} + +namespace Core::NCE { + +enum class PatchMode : u32 { + None, + PreText, ///< Patch section is inserted before .text + PostData, ///< Patch section is inserted after .data +}; + +using ModuleTextAddress = u64; +using PatchTextAddress = u64; +using EntryTrampolines = std::unordered_map; + +class Patcher { +public: + explicit Patcher(); + ~Patcher(); + + void PatchText(const Kernel::PhysicalMemory& program_image, + const Kernel::CodeSet::Segment& code); + void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, + Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines); + size_t SectionSize() const noexcept; + + [[nodiscard]] PatchMode Mode() const noexcept { + return mode; + } + +private: + using ModuleDestLabel = uintptr_t; + + struct Trampoline { + ptrdiff_t patch_offset; + uintptr_t module_offset; + }; + + void WriteLoadContext(); + void WriteSaveContext(); + void LockContext(); + void UnlockContext(); + void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id); + void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, + oaknut::SystemReg src_reg); + void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg); + void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg); + +private: + void BranchToPatch(uintptr_t module_dest) { + m_branch_to_patch_relocations.push_back({c.offset(), module_dest}); + } + + void BranchToModule(uintptr_t module_dest) { + m_branch_to_module_relocations.push_back({c.offset(), module_dest}); + c.dw(0); + } + + void WriteModulePc(uintptr_t module_dest) { + m_write_module_pc_relocations.push_back({c.offset(), module_dest}); + c.dx(0); + } + +private: + // List of patch instructions we have generated. + std::vector m_patch_instructions{}; + + // Relocation type for relative branch from module to patch. + struct Relocation { + ptrdiff_t patch_offset; ///< Offset in bytes from the start of the patch section. + uintptr_t module_offset; ///< Offset in bytes from the start of the text section. + }; + + oaknut::VectorCodeGenerator c; + std::vector m_trampolines; + std::vector m_branch_to_patch_relocations{}; + std::vector m_branch_to_module_relocations{}; + std::vector m_write_module_pc_relocations{}; + oaknut::Label m_save_context{}; + oaknut::Label m_load_context{}; + PatchMode mode{PatchMode::None}; +}; + +} // namespace Core::NCE diff --git a/src/core/core.cpp b/src/core/core.cpp index 1d557fb43..408479019 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -136,9 +136,7 @@ struct System::Impl { } void Initialize(System& system) { - const bool direct_mapped_address = Settings::IsNceEnabled(); - device_memory = std::make_unique(direct_mapped_address); - + device_memory = std::make_unique(); is_multicore = Settings::values.use_multi_core.GetValue(); extended_memory_layout = Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb; diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 980bb97f9..151eb3870 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -211,6 +211,8 @@ void CpuManager::RunThread(std::stop_token token, std::size_t core) { system.GPU().ObtainContext(); } + system.ArmInterface(core).Initialize(); + auto& kernel = system.Kernel(); auto& scheduler = *kernel.CurrentScheduler(); auto* thread = scheduler.GetSchedulerCurrentThread(); diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp index 0528a8e3b..3a9151646 100644 --- a/src/core/device_memory.cpp +++ b/src/core/device_memory.cpp @@ -12,13 +12,9 @@ constexpr size_t VirtualReserveSize = 1ULL << 38; constexpr size_t VirtualReserveSize = 1ULL << 39; #endif -DeviceMemory::DeviceMemory(bool direct_mapped_address) +DeviceMemory::DeviceMemory() : buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(), - VirtualReserveSize} { - if (direct_mapped_address) { - buffer.EnableDirectMappedAddress(); - } -} + VirtualReserveSize} {} DeviceMemory::~DeviceMemory() = default; diff --git a/src/core/device_memory.h b/src/core/device_memory.h index 368f19e86..13388b73e 100644 --- a/src/core/device_memory.h +++ b/src/core/device_memory.h @@ -18,7 +18,7 @@ enum : u64 { class DeviceMemory { public: - explicit DeviceMemory(bool direct_mapped_address); + explicit DeviceMemory(); ~DeviceMemory(); DeviceMemory& operator=(const DeviceMemory&) = delete; diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h index af1af2b78..d53da82f4 100644 --- a/src/core/hle/kernel/code_set.h +++ b/src/core/hle/kernel/code_set.h @@ -75,11 +75,20 @@ struct CodeSet final { return segments[2]; } + Segment& PatchSegment() { + return patch_segment; + } + + const Segment& PatchSegment() const { + return patch_segment; + } + /// The overall data that backs this code set. Kernel::PhysicalMemory memory; /// The segments that comprise this code set. std::array segments; + Segment patch_segment; /// The entry point address for this code set. KProcessAddress entrypoint = 0; diff --git a/src/core/hle/kernel/k_address_space_info.cpp b/src/core/hle/kernel/k_address_space_info.cpp index 32173e52b..3235a7a37 100644 --- a/src/core/hle/kernel/k_address_space_info.cpp +++ b/src/core/hle/kernel/k_address_space_info.cpp @@ -25,8 +25,8 @@ constexpr std::array AddressSpaceInfos{{ { .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, }, { .bit_width = 36, .address = Size_Invalid, .size = 8_GiB , .type = KAddressSpaceInfo::Type::Heap, }, { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, }, -#ifdef ANDROID - // With Android, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region. +#ifdef ARCHITECTURE_arm64 + // With NCE, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region. { .bit_width = 39, .address = 128_MiB , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, #else { .bit_width = 39, .address = 128_MiB , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index c6a200320..d2e9f2a2e 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -1214,6 +1214,17 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) { ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute); ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read); ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); + +#ifdef ARCHITECTURE_arm64 + if (Settings::IsNceEnabled()) { + auto& buffer = m_kernel.System().DeviceMemory().buffer; + const auto& code = code_set.CodeSegment(); + const auto& patch = code_set.PatchSegment(); + buffer.Protect(GetInteger(base_addr + code.addr), code.size, true, true, true); + buffer.Protect(GetInteger(base_addr + patch.addr), patch.size, true, true, true); + ReprotectSegment(code_set.PatchSegment(), Svc::MemoryPermission::None); + } +#endif } bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index 54b8e0a59..7b97d452b 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -112,6 +112,7 @@ private: std::array m_pinned_threads{}; std::array m_watchpoints{}; std::map m_debug_page_refcounts{}; + std::unordered_map m_post_handlers{}; std::atomic m_cpu_time{}; std::atomic m_num_process_switches{}; std::atomic m_num_thread_switches{}; @@ -467,6 +468,14 @@ public: static void Switch(KProcess* cur_process, KProcess* next_process); + std::unordered_map& GetPostHandlers() noexcept { + return m_post_handlers; + } + + KernelCore& GetKernel() noexcept { + return m_kernel; + } + public: // Attempts to insert a watchpoint into a free slot. Returns false if none are available. bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index e1f80b04f..e9ca5dfca 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -655,6 +655,21 @@ public: return m_stack_top; } +public: + // TODO: This shouldn't be defined in kernel namespace + struct NativeExecutionParameters { + u64 tpidr_el0{}; + u64 tpidrro_el0{}; + void* native_context{}; + std::atomic lock{1}; + bool is_running{}; + u32 magic{Common::MakeMagic('Y', 'U', 'Z', 'U')}; + }; + + NativeExecutionParameters& GetNativeExecutionParameters() { + return m_native_execution_parameters; + } + private: KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key, bool is_kernel_address_key); @@ -914,6 +929,7 @@ private: ThreadWaitReasonForDebugging m_wait_reason_for_debugging{}; uintptr_t m_argument{}; KProcessAddress m_stack_top{}; + NativeExecutionParameters m_native_execution_parameters{}; public: using ConditionVariableThreadTreeType = ConditionVariableThreadTree; diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index 5ee869fa2..15434212e 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -1,8 +1,12 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/settings.h" #include "core/arm/dynarmic/arm_dynarmic_32.h" #include "core/arm/dynarmic/arm_dynarmic_64.h" +#ifdef ARCHITECTURE_arm64 +#include "core/arm/nce/arm_nce.h" +#endif #include "core/core.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/kernel.h" @@ -14,7 +18,8 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu : m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} { #if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) // TODO(bunnei): Initialization relies on a core being available. We may later replace this with - // a 32-bit instance of Dynarmic. This should be abstracted out to a CPU manager. + // an NCE interface or a 32-bit instance of Dynarmic. This should be abstracted out to a CPU + // manager. auto& kernel = system.Kernel(); m_arm_interface = std::make_unique( system, kernel.IsMulticore(), @@ -28,6 +33,13 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu PhysicalCore::~PhysicalCore() = default; void PhysicalCore::Initialize(bool is_64_bit) { +#if defined(ARCHITECTURE_arm64) + if (Settings::IsNceEnabled()) { + m_arm_interface = std::make_unique(m_system, m_system.Kernel().IsMulticore(), + m_core_index); + return; + } +#endif #if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) auto& kernel = m_system.Kernel(); if (!is_64_bit) { diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 48c0edaea..e7fc8f438 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -3,6 +3,7 @@ #include #include "common/logging/log.h" +#include "common/settings.h" #include "core/core.h" #include "core/file_sys/content_archive.h" #include "core/file_sys/control_metadata.h" @@ -14,6 +15,10 @@ #include "core/loader/deconstructed_rom_directory.h" #include "core/loader/nso.h" +#ifdef ARCHITECTURE_arm64 +#include "core/arm/nce/patch.h" +#endif + namespace Loader { AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_, @@ -124,21 +129,41 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect } metadata.Print(); - const auto static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", - "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", - "subsdk8", "subsdk9", "sdk"}; + // Enable NCE only for 64-bit programs. + Settings::SetNceEnabled(metadata.Is64BitProgram()); + + const std::array static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", + "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", + "subsdk8", "subsdk9", "sdk"}; + + std::size_t code_size{}; + + // Define an nce patch context for each potential module. +#ifdef ARCHITECTURE_arm64 + std::array module_patchers; +#endif + + const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* { +#ifdef ARCHITECTURE_arm64 + if (Settings::IsNceEnabled()) { + return &module_patchers[i]; + } +#endif + return nullptr; + }; // Use the NSO module loader to figure out the code layout - std::size_t code_size{}; - for (const auto& module : static_modules) { + for (size_t i = 0; i < static_modules.size(); i++) { + const auto& module = static_modules[i]; const FileSys::VirtualFile module_file{dir->GetFile(module)}; if (!module_file) { continue; } const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; - const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( - process, system, *module_file, code_size, should_pass_arguments, false); + const auto tentative_next_load_addr = + AppLoader_NSO::LoadModule(process, system, *module_file, code_size, + should_pass_arguments, false, {}, GetPatcher(i)); if (!tentative_next_load_addr) { return {ResultStatus::ErrorLoadingNSO, {}}; } @@ -146,8 +171,18 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect code_size = *tentative_next_load_addr; } + // Enable direct memory mapping in case of NCE. + const u64 fastmem_base = [&]() -> size_t { + if (Settings::IsNceEnabled()) { + auto& buffer = system.DeviceMemory().buffer; + buffer.EnableDirectMappedAddress(); + return reinterpret_cast(buffer.VirtualBasePointer()); + } + return 0; + }(); + // Setup the process code layout - if (process.LoadFromMetadata(metadata, code_size, 0, is_hbl).IsError()) { + if (process.LoadFromMetadata(metadata, code_size, fastmem_base, is_hbl).IsError()) { return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; } @@ -157,7 +192,8 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect VAddr next_load_addr{base_address}; const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(), system.GetContentProvider()}; - for (const auto& module : static_modules) { + for (size_t i = 0; i < static_modules.size(); i++) { + const auto& module = static_modules[i]; const FileSys::VirtualFile module_file{dir->GetFile(module)}; if (!module_file) { continue; @@ -165,15 +201,16 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect const VAddr load_addr{next_load_addr}; const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; - const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( - process, system, *module_file, load_addr, should_pass_arguments, true, pm); + const auto tentative_next_load_addr = + AppLoader_NSO::LoadModule(process, system, *module_file, load_addr, + should_pass_arguments, true, pm, GetPatcher(i)); if (!tentative_next_load_addr) { return {ResultStatus::ErrorLoadingNSO, {}}; } next_load_addr = *tentative_next_load_addr; modules.insert_or_assign(load_addr, module); - LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr); + LOG_DEBUG(Loader, "loaded module {} @ {:#X}", module, load_addr); } // Find the RomFS by searching for a ".romfs" file in this directory diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index dfed296a5..49cf90317 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -22,6 +22,10 @@ #include "core/loader/nso.h" #include "core/memory.h" +#ifdef ARCHITECTURE_arm64 +#include "core/arm/nce/patch.h" +#endif + namespace Loader { struct NroSegmentHeader { @@ -139,7 +143,8 @@ static constexpr u32 PageAlignSize(u32 size) { return static_cast((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK); } -static bool LoadNroImpl(Kernel::KProcess& process, const std::vector& data) { +static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, + const std::vector& data) { if (data.size() < sizeof(NroHeader)) { return {}; } @@ -195,14 +200,60 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector& data) codeset.DataSegment().size += bss_size; program_image.resize(static_cast(program_image.size()) + bss_size); +#ifdef ARCHITECTURE_arm64 + const auto& code = codeset.CodeSegment(); + + // NROs are always 64-bit programs. + Settings::SetNceEnabled(true); + + // Create NCE patcher + Core::NCE::Patcher patch{}; + size_t image_size = program_image.size(); + + if (Settings::IsNceEnabled()) { + // Patch SVCs and MRS calls in the guest code + patch.PatchText(program_image, code); + + // We only support PostData patching for NROs. + ASSERT(patch.Mode() == Core::NCE::PatchMode::PostData); + + // Update patch section. + auto& patch_segment = codeset.PatchSegment(); + patch_segment.addr = image_size; + patch_segment.size = static_cast(patch.SectionSize()); + + // Add patch section size to the module size. + image_size += patch_segment.size; + } +#endif + + // Enable direct memory mapping in case of NCE. + const u64 fastmem_base = [&]() -> size_t { + if (Settings::IsNceEnabled()) { + auto& buffer = system.DeviceMemory().buffer; + buffer.EnableDirectMappedAddress(); + return reinterpret_cast(buffer.VirtualBasePointer()); + } + return 0; + }(); + // Setup the process code layout if (process - .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), 0, + .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), image_size, fastmem_base, false) .IsError()) { return false; } + // Relocate code patch and copy to the program_image if running under NCE. + // This needs to be after LoadFromMetadata so we can use the process entry point. +#ifdef ARCHITECTURE_arm64 + if (Settings::IsNceEnabled()) { + patch.RelocateAndCopy(process.GetEntryPoint(), code, program_image, + &process.GetPostHandlers()); + } +#endif + // Load codeset for current process codeset.memory = std::move(program_image); process.LoadModule(std::move(codeset), process.GetEntryPoint()); @@ -210,8 +261,9 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector& data) return true; } -bool AppLoader_NRO::LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file) { - return LoadNroImpl(process, nro_file.ReadAllBytes()); +bool AppLoader_NRO::LoadNro(Core::System& system, Kernel::KProcess& process, + const FileSys::VfsFile& nro_file) { + return LoadNroImpl(system, process, nro_file.ReadAllBytes()); } AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) { @@ -219,7 +271,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::S return {ResultStatus::ErrorAlreadyLoaded, {}}; } - if (!LoadNro(process, *file)) { + if (!LoadNro(system, process, *file)) { return {ResultStatus::ErrorLoadingNRO, {}}; } diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h index 8de6eebc6..d2928cba0 100644 --- a/src/core/loader/nro.h +++ b/src/core/loader/nro.h @@ -54,7 +54,7 @@ public: bool IsRomFSUpdatable() const override; private: - bool LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file); + bool LoadNro(Core::System& system, Kernel::KProcess& process, const FileSys::VfsFile& nro_file); std::vector icon_data; std::unique_ptr nacp; diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 1350da8dc..34b10ef2e 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -20,6 +20,10 @@ #include "core/loader/nso.h" #include "core/memory.h" +#ifdef ARCHITECTURE_arm64 +#include "core/arm/nce/patch.h" +#endif + namespace Loader { namespace { struct MODHeader { @@ -72,7 +76,8 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& in_file) { std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system, const FileSys::VfsFile& nso_file, VAddr load_base, bool should_pass_arguments, bool load_into_process, - std::optional pm) { + std::optional pm, + Core::NCE::Patcher* patch) { if (nso_file.GetSize() < sizeof(NSOHeader)) { return std::nullopt; } @@ -86,6 +91,16 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: return std::nullopt; } + // Allocate some space at the beginning if we are patching in PreText mode. + const size_t module_start = [&]() -> size_t { +#ifdef ARCHITECTURE_arm64 + if (patch && patch->Mode() == Core::NCE::PatchMode::PreText) { + return patch->SectionSize(); + } +#endif + return 0; + }(); + // Build program image Kernel::CodeSet codeset; Kernel::PhysicalMemory program_image; @@ -95,11 +110,12 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: if (nso_header.IsSegmentCompressed(i)) { data = DecompressSegment(data, nso_header.segments[i]); } - program_image.resize(nso_header.segments[i].location + static_cast(data.size())); - std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), - data.size()); - codeset.segments[i].addr = nso_header.segments[i].location; - codeset.segments[i].offset = nso_header.segments[i].location; + program_image.resize(module_start + nso_header.segments[i].location + + static_cast(data.size())); + std::memcpy(program_image.data() + module_start + nso_header.segments[i].location, + data.data(), data.size()); + codeset.segments[i].addr = module_start + nso_header.segments[i].location; + codeset.segments[i].offset = module_start + nso_header.segments[i].location; codeset.segments[i].size = nso_header.segments[i].size; } @@ -118,7 +134,7 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: } codeset.DataSegment().size += nso_header.segments[2].bss_size; - const u32 image_size{ + u32 image_size{ PageAlignSize(static_cast(program_image.size()) + nso_header.segments[2].bss_size)}; program_image.resize(image_size); @@ -139,6 +155,32 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); } +#ifdef ARCHITECTURE_arm64 + // If we are computing the process code layout and using nce backend, patch. + const auto& code = codeset.CodeSegment(); + if (patch && patch->Mode() == Core::NCE::PatchMode::None) { + // Patch SVCs and MRS calls in the guest code + patch->PatchText(program_image, code); + + // Add patch section size to the module size. + image_size += patch->SectionSize(); + } else if (patch) { + // Relocate code patch and copy to the program_image. + patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers()); + + // Update patch section. + auto& patch_segment = codeset.PatchSegment(); + patch_segment.addr = patch->Mode() == Core::NCE::PatchMode::PreText ? 0 : image_size; + patch_segment.size = static_cast(patch->SectionSize()); + + // Add patch section size to the module size. In PreText mode image_size + // already contains the patch segment as part of module_start. + if (patch->Mode() == Core::NCE::PatchMode::PostData) { + image_size += patch_segment.size; + } + } +#endif + // If we aren't actually loading (i.e. just computing the process code layout), we are done if (!load_into_process) { return load_base + image_size; diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h index 0b53b4ecd..29b86ed4c 100644 --- a/src/core/loader/nso.h +++ b/src/core/loader/nso.h @@ -15,6 +15,10 @@ namespace Core { class System; } +namespace Core::NCE { +class Patcher; +} + namespace Kernel { class KProcess; } @@ -88,7 +92,8 @@ public: static std::optional LoadModule(Kernel::KProcess& process, Core::System& system, const FileSys::VfsFile& nso_file, VAddr load_base, bool should_pass_arguments, bool load_into_process, - std::optional pm = {}); + std::optional pm = {}, + Core::NCE::Patcher* patch = nullptr); LoadResult Load(Kernel::KProcess& process, Core::System& system) override; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 14db64f9d..e5ca78ef4 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -1001,4 +1001,17 @@ void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { impl->FlushRegion(dest_addr, size); } +bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { + bool mapped = true; + u8* const ptr = impl->GetPointerImpl( + GetInteger(vaddr), + [&] { + LOG_ERROR(HW_Memory, "Unmapped InvalidateNCE for {} bytes @ {:#x}", size, + GetInteger(vaddr)); + mapped = false; + }, + [&] { impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); }); + return mapped && ptr != nullptr; +} + } // namespace Core::Memory diff --git a/src/core/memory.h b/src/core/memory.h index 73195549f..e5fbc0025 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -474,6 +474,7 @@ public: void SetGPUDirtyManagers(std::span managers); void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); + bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); void FlushRegion(Common::ProcessAddress dest_addr, size_t size); private: From 8fab363237083a8130a7b2a023cd9c5dd83f8f4f Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sat, 18 Nov 2023 16:10:39 +0200 Subject: [PATCH 11/28] android: Add cpu bakend gui toggle --- .codespellrc | 2 +- .../yuzu_emu/features/settings/model/IntSetting.kt | 1 + .../features/settings/model/view/SettingsItem.kt | 9 +++++++++ .../features/settings/ui/SettingsFragmentPresenter.kt | 1 + src/android/app/src/main/res/values/arrays.xml | 10 ++++++++++ src/android/app/src/main/res/values/strings.xml | 5 +++++ src/common/host_memory.cpp | 6 +++--- src/common/settings.cpp | 8 +++++++- src/common/signal_chain.cpp | 2 +- src/core/loader/deconstructed_rom_directory.cpp | 6 ++++-- src/core/loader/nro.cpp | 2 +- 11 files changed, 43 insertions(+), 9 deletions(-) diff --git a/.codespellrc b/.codespellrc index 3336d31fe..9ff49898b 100644 --- a/.codespellrc +++ b/.codespellrc @@ -3,4 +3,4 @@ [codespell] skip = ./.git,./build,./dist,./Doxyfile,./externals,./LICENSES,./src/android/app/src/main/res -ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink +ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink,nce diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt index 151362124..ef10b209f 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt @@ -10,6 +10,7 @@ enum class IntSetting( override val category: Settings.Category, override val androidDefault: Int? = null ) : AbstractIntSetting { + CPU_BACKEND("cpu_backend", Settings.Category.Cpu), CPU_ACCURACY("cpu_accuracy", Settings.Category.Cpu), REGION_INDEX("region_index", Settings.Category.System), LANGUAGE_INDEX("language_index", Settings.Category.System), diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index 6aba69dbe..1f090424b 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -77,6 +77,15 @@ abstract class SettingsItem( "%" ) ) + put( + SingleChoiceSetting( + IntSetting.CPU_BACKEND, + R.string.cpu_backend, + 0, + R.array.cpuBackendNames, + R.array.cpuBackendValues + ) + ) put( SingleChoiceSetting( IntSetting.CPU_ACCURACY, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index 8b71e32f3..7425728c6 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -269,6 +269,7 @@ class SettingsFragmentPresenter( add(BooleanSetting.RENDERER_DEBUG.key) add(HeaderSetting(R.string.cpu)) + add(IntSetting.CPU_BACKEND.key) add(IntSetting.CPU_ACCURACY.key) add(BooleanSetting.CPU_DEBUG_MODE.key) add(SettingsItem.FASTMEM_COMBINED) diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index 51bcc49a3..2756e5cc9 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -175,6 +175,16 @@ 2 + + @string/cpu_backend_dynarmic + @string/cpu_backend_nce + + + + 0 + 1 + + @string/auto @string/cpu_accuracy_accurate diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index 471af8795..f07121f6a 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -185,6 +185,7 @@ Limits emulation speed to a specified percentage of normal speed. Limit speed percent Specifies the percentage to limit emulation speed. 100% is the normal speed. Values higher or lower will increase or decrease the speed limit. + CPU Backend CPU accuracy %1$s%2$s @@ -416,6 +417,10 @@ Force 16:10 Stretch to window + + Dynarmic (Slow) + Native code execution (NCE) + Accurate Unsafe diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 38d7b29f7..8a869e558 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -363,13 +363,13 @@ private: #ifdef ARCHITECTURE_arm64 -uint64_t GetRandomU64() { +static uint64_t GetRandomU64() { uint64_t ret; ASSERT(getrandom(&ret, sizeof(ret), 0) == 0); return ret; } -void* ChooseVirtualBase(size_t virtual_size) { +static void* ChooseVirtualBase(size_t virtual_size) { constexpr uintptr_t Map39BitSize = (1ULL << 39); constexpr uintptr_t Map36BitSize = (1ULL << 36); @@ -410,7 +410,7 @@ void* ChooseVirtualBase(size_t virtual_size) { return MAP_FAILED; } #else -void* ChooseVirtualBase(size_t virtual_size) { +static void* ChooseVirtualBase(size_t virtual_size) { return mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); } diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 81a036ef0..90e7475d7 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -159,7 +159,13 @@ bool IsFastmemEnabled() { static bool is_nce_enabled = false; void SetNceEnabled(bool is_39bit) { - is_nce_enabled = values.cpu_backend.GetValue() == CpuBackend::Nce && is_39bit; + const bool is_nce_selected = values.cpu_backend.GetValue() == CpuBackend::Nce; + is_nce_enabled = is_nce_selected && is_39bit; + if (is_nce_selected && !is_nce_enabled) { + LOG_WARNING( + Common, + "Program does not utilize 39-bit address space, unable to natively execute code"); + } } bool IsNceEnabled() { diff --git a/src/common/signal_chain.cpp b/src/common/signal_chain.cpp index e0c6b9d4e..2e4fecc48 100644 --- a/src/common/signal_chain.cpp +++ b/src/common/signal_chain.cpp @@ -18,7 +18,7 @@ T* LookupLibcSymbol(const char* name) { UNREACHABLE_MSG("Failed to open libc!"); } #else - // For other operating environments, we assume the symbol is not overriden. + // For other operating environments, we assume the symbol is not overridden. const char* base = nullptr; Common::DynamicLibrary provider(base); #endif diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index e7fc8f438..31c00f0a3 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -129,8 +129,10 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect } metadata.Print(); - // Enable NCE only for 64-bit programs. - Settings::SetNceEnabled(metadata.Is64BitProgram()); + // Enable NCE only for programs with 39-bit address space. + const bool is_39bit = + metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is39Bit; + Settings::SetNceEnabled(is_39bit); const std::array static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 49cf90317..76ff38041 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -199,6 +199,7 @@ static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, codeset.DataSegment().size += bss_size; program_image.resize(static_cast(program_image.size()) + bss_size); + size_t image_size = program_image.size(); #ifdef ARCHITECTURE_arm64 const auto& code = codeset.CodeSegment(); @@ -208,7 +209,6 @@ static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, // Create NCE patcher Core::NCE::Patcher patch{}; - size_t image_size = program_image.size(); if (Settings::IsNceEnabled()) { // Patch SVCs and MRS calls in the guest code From 6de2edcca1624982e99a72741d4fa289dc9d7551 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sun, 19 Nov 2023 11:21:53 +0200 Subject: [PATCH 12/28] Address some review comments --- .codespellrc | 2 +- src/common/host_memory.cpp | 3 +++ src/common/settings.h | 2 +- src/common/signal_chain.h | 4 ++-- src/core/arm/nce/arm_nce.cpp | 5 ++--- src/core/arm/nce/patch.cpp | 5 ++--- src/core/arm/nce/patch.h | 16 +++++----------- src/core/core.cpp | 1 + src/core/hle/kernel/k_page_table_base.cpp | 2 ++ src/core/hle/kernel/k_process.h | 4 ---- src/core/loader/nro.cpp | 6 +++--- src/core/loader/nso.cpp | 15 ++++++++------- 12 files changed, 30 insertions(+), 35 deletions(-) diff --git a/.codespellrc b/.codespellrc index 9ff49898b..d1f998449 100644 --- a/.codespellrc +++ b/.codespellrc @@ -3,4 +3,4 @@ [codespell] skip = ./.git,./build,./dist,./Doxyfile,./externals,./LICENSES,./src/android/app/src/main/res -ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink,nce +ignore-words-list = aci,allright,ba,canonicalizations,deques,froms,hda,inout,lod,masia,nam,nax,nce,nd,optin,pullrequests,pullrequest,te,transfered,unstall,uscaled,vas,zink diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 8a869e558..f14077750 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -409,11 +409,14 @@ static void* ChooseVirtualBase(size_t virtual_size) { return MAP_FAILED; } + #else + static void* ChooseVirtualBase(size_t virtual_size) { return mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); } + #endif class HostMemory::Impl { diff --git a/src/common/settings.h b/src/common/settings.h index 648e0be0d..fea639ee3 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -181,7 +181,7 @@ struct Values { // Cpu SwitchableSetting cpu_backend{ - linkage, CpuBackend::Nce, CpuBackend::Dynarmic, + linkage, CpuBackend::Dynarmic, CpuBackend::Dynarmic, #ifdef ARCHITECTURE_arm64 CpuBackend::Nce, #else diff --git a/src/common/signal_chain.h b/src/common/signal_chain.h index e3bfe6882..8d06a1bd1 100644 --- a/src/common/signal_chain.h +++ b/src/common/signal_chain.h @@ -10,8 +10,8 @@ namespace Common { // Android's ART overrides sigaction with its own wrapper. This is problematic for SIGSEGV -// in particular, because ARTs handler access TPIDR_EL0, so this extracts the libc version -// and calls it directly. +// in particular, because ART's handler accesses tpidr_el0, which conflicts with NCE. +// This extracts the libc symbol and calls it directly. int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact); } // namespace Common diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 511248a0d..fd82f3b0e 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -4,7 +4,6 @@ #include #include -#include "common/scope_exit.h" #include "common/signal_chain.h" #include "core/arm/nce/arm_nce.h" #include "core/arm/nce/patch.h" @@ -32,7 +31,7 @@ static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic); fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); while (header->magic != FPSIMD_MAGIC) { - header = reinterpret_cast<_aarch64_ctx*>((char*)header + header->size); + header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast(header) + header->size); } return reinterpret_cast(header); } @@ -124,7 +123,7 @@ bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* ra // Forcibly mark the context as locked. We are still running. // We may race with SignalInterrupt here: - // - If we lose the race, then SignalInterrupt will send us a signal which are masking, + // - If we lose the race, then SignalInterrupt will send us a signal we are masking, // and it will do nothing when it is unmasked, as we have already left guest code. // - If we win the race, then SignalInterrupt will wait for us to unlock first. auto& thread_params = guest_ctx->parent->running_thread->GetNativeExecutionParameters(); diff --git a/src/core/arm/nce/patch.cpp b/src/core/arm/nce/patch.cpp index c79399c2b..bc4b73634 100644 --- a/src/core/arm/nce/patch.cpp +++ b/src/core/arm/nce/patch.cpp @@ -100,7 +100,7 @@ void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines) { - const size_t patch_size = SectionSize(); + const size_t patch_size = GetSectionSize(); const size_t image_size = program_image.size(); // Retrieve text segment data. @@ -180,7 +180,7 @@ void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, } } -size_t Patcher::SectionSize() const noexcept { +size_t Patcher::GetSectionSize() const noexcept { return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE); } @@ -256,7 +256,6 @@ void Patcher::WriteSaveContext() { } void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { - LOG_ERROR(Core_ARM, "Patching SVC {:#x} at {:#x}", svc_id, module_dest - 4); // We are about to start saving state, so we need to lock the context. this->LockContext(); diff --git a/src/core/arm/nce/patch.h b/src/core/arm/nce/patch.h index b727d4e48..dcce1bfc6 100644 --- a/src/core/arm/nce/patch.h +++ b/src/core/arm/nce/patch.h @@ -7,23 +7,17 @@ #include #include -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshorten-64-to-32" #include #include -#pragma clang diagnostic pop +#pragma GCC diagnostic pop #include "common/common_types.h" #include "core/hle/kernel/code_set.h" #include "core/hle/kernel/k_typed_address.h" #include "core/hle/kernel/physical_memory.h" -#include - -namespace Core { -struct GuestContext; -} - namespace Core::NCE { enum class PatchMode : u32 { @@ -45,9 +39,9 @@ public: const Kernel::CodeSet::Segment& code); void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines); - size_t SectionSize() const noexcept; + size_t GetSectionSize() const noexcept; - [[nodiscard]] PatchMode Mode() const noexcept { + [[nodiscard]] PatchMode GetPatchMode() const noexcept { return mode; } diff --git a/src/core/core.cpp b/src/core/core.cpp index 408479019..14d6c8c27 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -137,6 +137,7 @@ struct System::Impl { void Initialize(System& system) { device_memory = std::make_unique(); + is_multicore = Settings::values.use_multi_core.GetValue(); extended_memory_layout = Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb; diff --git a/src/core/hle/kernel/k_page_table_base.cpp b/src/core/hle/kernel/k_page_table_base.cpp index f2ffc39c1..f7f1e8a3b 100644 --- a/src/core/hle/kernel/k_page_table_base.cpp +++ b/src/core/hle/kernel/k_page_table_base.cpp @@ -96,9 +96,11 @@ constexpr Common::MemoryPermission ConvertToMemoryPermission(KMemoryPermission p if (True(perm & KMemoryPermission::UserWrite)) { perms |= Common::MemoryPermission::Write; } +#ifdef ARCHITECTURE_arm64 if (True(perm & KMemoryPermission::UserExecute)) { perms |= Common::MemoryPermission::Execute; } +#endif return perms; } diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index 7b97d452b..e5f796ac7 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -472,10 +472,6 @@ public: return m_post_handlers; } - KernelCore& GetKernel() noexcept { - return m_kernel; - } - public: // Attempts to insert a watchpoint into a free slot. Returns false if none are available. bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 76ff38041..49d4d7e43 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -204,7 +204,7 @@ static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, #ifdef ARCHITECTURE_arm64 const auto& code = codeset.CodeSegment(); - // NROs are always 64-bit programs. + // NROs always have a 39-bit address space. Settings::SetNceEnabled(true); // Create NCE patcher @@ -215,12 +215,12 @@ static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, patch.PatchText(program_image, code); // We only support PostData patching for NROs. - ASSERT(patch.Mode() == Core::NCE::PatchMode::PostData); + ASSERT(patch.GetPatchMode() == Core::NCE::PatchMode::PostData); // Update patch section. auto& patch_segment = codeset.PatchSegment(); patch_segment.addr = image_size; - patch_segment.size = static_cast(patch.SectionSize()); + patch_segment.size = static_cast(patch.GetSectionSize()); // Add patch section size to the module size. image_size += patch_segment.size; diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 34b10ef2e..1ad2e917c 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -94,8 +94,8 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: // Allocate some space at the beginning if we are patching in PreText mode. const size_t module_start = [&]() -> size_t { #ifdef ARCHITECTURE_arm64 - if (patch && patch->Mode() == Core::NCE::PatchMode::PreText) { - return patch->SectionSize(); + if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PreText) { + return patch->GetSectionSize(); } #endif return 0; @@ -158,24 +158,25 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: #ifdef ARCHITECTURE_arm64 // If we are computing the process code layout and using nce backend, patch. const auto& code = codeset.CodeSegment(); - if (patch && patch->Mode() == Core::NCE::PatchMode::None) { + if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::None) { // Patch SVCs and MRS calls in the guest code patch->PatchText(program_image, code); // Add patch section size to the module size. - image_size += patch->SectionSize(); + image_size += patch->GetSectionSize(); } else if (patch) { // Relocate code patch and copy to the program_image. patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers()); // Update patch section. auto& patch_segment = codeset.PatchSegment(); - patch_segment.addr = patch->Mode() == Core::NCE::PatchMode::PreText ? 0 : image_size; - patch_segment.size = static_cast(patch->SectionSize()); + patch_segment.addr = + patch->GetPatchMode() == Core::NCE::PatchMode::PreText ? 0 : image_size; + patch_segment.size = static_cast(patch->GetSectionSize()); // Add patch section size to the module size. In PreText mode image_size // already contains the patch segment as part of module_start. - if (patch->Mode() == Core::NCE::PatchMode::PostData) { + if (patch->GetPatchMode() == Core::NCE::PatchMode::PostData) { image_size += patch_segment.size; } } From 263b7a44f9ec1ac78fdb1c14a84c652c1bd63905 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 19 Nov 2023 13:57:23 -0500 Subject: [PATCH 13/28] arm_nce: skip dc cvac on possibly write-protected areas --- src/core/arm/nce/arm_nce.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index fd82f3b0e..bb1f6d2e6 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -377,9 +377,7 @@ void ARM_NCE::ClearInstructionCache() { } void ARM_NCE::InvalidateCacheRange(u64 addr, std::size_t size) { - // Clean cache. - auto* ptr = reinterpret_cast(addr); - __builtin___clear_cache(ptr, ptr + size); + this->ClearInstructionCache(); } void ARM_NCE::ClearExclusiveState() { From c37b5f431fcbffac173fb70ef0a0d7fb6cde7f2d Mon Sep 17 00:00:00 2001 From: GPUCode Date: Mon, 20 Nov 2023 15:33:44 +0200 Subject: [PATCH 14/28] common: Enforce fastmem for nce usage --- src/common/settings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 90e7475d7..16c2feeab 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -160,7 +160,7 @@ static bool is_nce_enabled = false; void SetNceEnabled(bool is_39bit) { const bool is_nce_selected = values.cpu_backend.GetValue() == CpuBackend::Nce; - is_nce_enabled = is_nce_selected && is_39bit; + is_nce_enabled = IsFastmemEnabled() && is_nce_selected && is_39bit; if (is_nce_selected && !is_nce_enabled) { LOG_WARNING( Common, From 3ec3cca4d8d4e1733cbc337b0499ad3bdcdf52b0 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Mon, 20 Nov 2023 15:52:18 +0200 Subject: [PATCH 15/28] core: Define HAS_NCE macro --- src/core/CMakeLists.txt | 3 ++- src/core/device_memory.cpp | 2 +- src/core/hle/kernel/code_set.h | 5 +++++ src/core/hle/kernel/k_address_space_info.cpp | 2 +- src/core/hle/kernel/k_page_table_base.cpp | 2 +- src/core/hle/kernel/k_process.h | 6 +++++- src/core/hle/kernel/physical_core.cpp | 4 ++-- src/core/loader/deconstructed_rom_directory.cpp | 6 +++--- src/core/loader/nro.cpp | 6 +++--- src/core/loader/nso.cpp | 6 +++--- 10 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index c5805ec61..e0bfb5b95 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -926,7 +926,8 @@ if (ENABLE_WEB_SERVICE) target_link_libraries(core PRIVATE web_service) endif() -if (ARCHITECTURE_arm64) +if (ARCHITECTURE_arm64 AND (ANDROID OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")) + target_compile_definitions(core PRIVATE -DHAS_NCE) enable_language(C ASM) set(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp") diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp index 3a9151646..1aea56a99 100644 --- a/src/core/device_memory.cpp +++ b/src/core/device_memory.cpp @@ -6,7 +6,7 @@ namespace Core { -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE constexpr size_t VirtualReserveSize = 1ULL << 38; #else constexpr size_t VirtualReserveSize = 1ULL << 39; diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h index d53da82f4..4d2d0098e 100644 --- a/src/core/hle/kernel/code_set.h +++ b/src/core/hle/kernel/code_set.h @@ -75,6 +75,7 @@ struct CodeSet final { return segments[2]; } +#ifdef HAS_NCE Segment& PatchSegment() { return patch_segment; } @@ -82,13 +83,17 @@ struct CodeSet final { const Segment& PatchSegment() const { return patch_segment; } +#endif /// The overall data that backs this code set. Kernel::PhysicalMemory memory; /// The segments that comprise this code set. std::array segments; + +#ifdef HAS_NCE Segment patch_segment; +#endif /// The entry point address for this code set. KProcessAddress entrypoint = 0; diff --git a/src/core/hle/kernel/k_address_space_info.cpp b/src/core/hle/kernel/k_address_space_info.cpp index 3235a7a37..23258071e 100644 --- a/src/core/hle/kernel/k_address_space_info.cpp +++ b/src/core/hle/kernel/k_address_space_info.cpp @@ -25,7 +25,7 @@ constexpr std::array AddressSpaceInfos{{ { .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, }, { .bit_width = 36, .address = Size_Invalid, .size = 8_GiB , .type = KAddressSpaceInfo::Type::Heap, }, { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, }, -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE // With NCE, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region. { .bit_width = 39, .address = 128_MiB , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, #else diff --git a/src/core/hle/kernel/k_page_table_base.cpp b/src/core/hle/kernel/k_page_table_base.cpp index f7f1e8a3b..2b5e77ccf 100644 --- a/src/core/hle/kernel/k_page_table_base.cpp +++ b/src/core/hle/kernel/k_page_table_base.cpp @@ -96,7 +96,7 @@ constexpr Common::MemoryPermission ConvertToMemoryPermission(KMemoryPermission p if (True(perm & KMemoryPermission::UserWrite)) { perms |= Common::MemoryPermission::Write; } -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE if (True(perm & KMemoryPermission::UserExecute)) { perms |= Common::MemoryPermission::Execute; } diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index e5f796ac7..d8cd0fdde 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -112,7 +112,6 @@ private: std::array m_pinned_threads{}; std::array m_watchpoints{}; std::map m_debug_page_refcounts{}; - std::unordered_map m_post_handlers{}; std::atomic m_cpu_time{}; std::atomic m_num_process_switches{}; std::atomic m_num_thread_switches{}; @@ -121,6 +120,9 @@ private: std::atomic m_num_ipc_messages{}; std::atomic m_num_ipc_replies{}; std::atomic m_num_ipc_receives{}; +#ifdef HAS_NCE + std::unordered_map m_post_handlers{}; +#endif private: Result StartTermination(); @@ -468,9 +470,11 @@ public: static void Switch(KProcess* cur_process, KProcess* next_process); +#ifdef HAS_NCE std::unordered_map& GetPostHandlers() noexcept { return m_post_handlers; } +#endif public: // Attempts to insert a watchpoint into a free slot. Returns false if none are available. diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index 15434212e..073039825 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -4,7 +4,7 @@ #include "common/settings.h" #include "core/arm/dynarmic/arm_dynarmic_32.h" #include "core/arm/dynarmic/arm_dynarmic_64.h" -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE #include "core/arm/nce/arm_nce.h" #endif #include "core/core.h" @@ -33,7 +33,7 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu PhysicalCore::~PhysicalCore() = default; void PhysicalCore::Initialize(bool is_64_bit) { -#if defined(ARCHITECTURE_arm64) +#if defined(HAS_NCE) if (Settings::IsNceEnabled()) { m_arm_interface = std::make_unique(m_system, m_system.Kernel().IsMulticore(), m_core_index); diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 31c00f0a3..a1344f1c4 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -15,7 +15,7 @@ #include "core/loader/deconstructed_rom_directory.h" #include "core/loader/nso.h" -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE #include "core/arm/nce/patch.h" #endif @@ -141,12 +141,12 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect std::size_t code_size{}; // Define an nce patch context for each potential module. -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE std::array module_patchers; #endif const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* { -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE if (Settings::IsNceEnabled()) { return &module_patchers[i]; } diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 49d4d7e43..2e7368349 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -22,7 +22,7 @@ #include "core/loader/nso.h" #include "core/memory.h" -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE #include "core/arm/nce/patch.h" #endif @@ -201,7 +201,7 @@ static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, program_image.resize(static_cast(program_image.size()) + bss_size); size_t image_size = program_image.size(); -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE const auto& code = codeset.CodeSegment(); // NROs always have a 39-bit address space. @@ -247,7 +247,7 @@ static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process, // Relocate code patch and copy to the program_image if running under NCE. // This needs to be after LoadFromMetadata so we can use the process entry point. -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE if (Settings::IsNceEnabled()) { patch.RelocateAndCopy(process.GetEntryPoint(), code, program_image, &process.GetPostHandlers()); diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 1ad2e917c..878c1c6cb 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -20,7 +20,7 @@ #include "core/loader/nso.h" #include "core/memory.h" -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE #include "core/arm/nce/patch.h" #endif @@ -93,7 +93,7 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: // Allocate some space at the beginning if we are patching in PreText mode. const size_t module_start = [&]() -> size_t { -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PreText) { return patch->GetSectionSize(); } @@ -155,7 +155,7 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); } -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE // If we are computing the process code layout and using nce backend, patch. const auto& code = codeset.CodeSegment(); if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::None) { From f542a3bb7aaca4a7560e9fcaa2e9b4305f9856c0 Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 20 Nov 2023 11:36:35 -0500 Subject: [PATCH 16/28] patch: check offsets from first code word --- src/core/arm/nce/patch.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/arm/nce/patch.cpp b/src/core/arm/nce/patch.cpp index bc4b73634..30c3c6cdd 100644 --- a/src/core/arm/nce/patch.cpp +++ b/src/core/arm/nce/patch.cpp @@ -20,6 +20,7 @@ using namespace oaknut::util; using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; constexpr size_t MaxRelativeBranch = 128_MiB; +constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32); Patcher::Patcher() : c(m_patch_instructions) {} @@ -42,7 +43,7 @@ void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, std::span{reinterpret_cast(text.data()), text.size() / sizeof(u32)}; // Loop through instructions, patching as needed. - for (u32 i = 0; i < static_cast(text_words.size()); i++) { + for (u32 i = ModuleCodeIndex; i < static_cast(text_words.size()); i++) { const u32 inst = text_words[i]; const auto AddRelocations = [&] { @@ -161,8 +162,8 @@ void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, } // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. - // Convert to ordered to preserve this assumption - for (u32 i = 0; i < static_cast(text_words.size()); i++) { + // Convert to ordered to preserve this assumption. + for (u32 i = ModuleCodeIndex; i < static_cast(text_words.size()); i++) { const u32 inst = text_words[i]; if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { text_words[i] = exclusive.AsOrdered(); From 1cde01c8c8f4c7272f7f2db3846ff7403caa49e1 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Mon, 20 Nov 2023 18:56:54 +0200 Subject: [PATCH 17/28] arm: Print backtrace on data abort --- src/core/arm/arm_interface.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index 558fba5bd..d231bf89c 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp @@ -201,6 +201,8 @@ void ARM_Interface::Run() { if (True(hr & HaltReason::DataAbort)) { if (system.DebuggerEnabled()) { system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint()); + } else { + LogBacktrace(); } current_thread->RequestSuspend(SuspendType::Debug); break; From 20de0ddf1f3afce420beebdf69ec675f0dee2b9f Mon Sep 17 00:00:00 2001 From: Liam Date: Mon, 20 Nov 2023 12:12:24 -0500 Subject: [PATCH 18/28] android: show current backend in fps overlay --- .../app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt | 5 +++++ .../java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt | 4 +++- src/android/app/src/main/jni/native.cpp | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt index f2ba2504c..e0f01127c 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt @@ -299,6 +299,11 @@ object NativeLibrary { */ external fun getPerfStats(): DoubleArray + /** + * Returns the current CPU backend. + */ + external fun getCpuBackend(): String + /** * Notifies the core emulation that the orientation has changed. */ diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt index c32fa0d7e..734c1d5ca 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt @@ -414,8 +414,10 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { perfStatsUpdater = { if (emulationViewModel.emulationStarted.value) { val perfStats = NativeLibrary.getPerfStats() + val cpuBackend = NativeLibrary.getCpuBackend() if (_binding != null) { - binding.showFpsText.text = String.format("FPS: %.1f", perfStats[FPS]) + binding.showFpsText.text = + String.format("FPS: %.1f\n%s", perfStats[FPS], cpuBackend) } perfStatsUpdateHandler.postDelayed(perfStatsUpdater!!, 800) } diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index 617288ae4..ed5ce6f8a 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -694,6 +694,14 @@ jdoubleArray Java_org_yuzu_yuzu_1emu_NativeLibrary_getPerfStats(JNIEnv* env, jcl return j_stats; } +jstring Java_org_yuzu_yuzu_1emu_NativeLibrary_getCpuBackend(JNIEnv* env, jclass clazz) { + if (Settings::IsNceEnabled()) { + return ToJString(env, "NCE"); + } + + return ToJString(env, "JIT"); +} + void Java_org_yuzu_yuzu_1emu_utils_DirectoryInitialization_setSysDirectory(JNIEnv* env, jclass clazz, jstring j_path) {} From cf534f514989e0d2e52df208818a39aa5eead7c0 Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 21 Nov 2023 23:23:48 -0500 Subject: [PATCH 19/28] arm_nce: skip data aborts for crash handling parity --- src/core/arm/nce/arm_nce.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index bb1f6d2e6..6eb6299cc 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -116,10 +116,18 @@ bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* ra return true; } - // We can't handle the access, so trigger an exception. + // We can't handle the access, so determine why we crashed. const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast(info->si_addr); - guest_ctx->esr_el1.fetch_or( - static_cast(is_prefetch_abort ? HaltReason::PrefetchAbort : HaltReason::DataAbort)); + + // For data aborts, skip the instruction and return to guest code. + // This will allow games to continue in many scenarios where they would otherwise crash. + if (!is_prefetch_abort) { + host_ctx.pc += 4; + return true; + } + + // This is a prefetch abort. + guest_ctx->esr_el1.fetch_or(static_cast(HaltReason::PrefetchAbort)); // Forcibly mark the context as locked. We are still running. // We may race with SignalInterrupt here: From d040b27a35fdf520b17fe6b1cd792058172c8ccc Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 21 Nov 2023 23:29:40 -0500 Subject: [PATCH 20/28] loader: apply nso patch to offset program image --- src/core/loader/nso.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 878c1c6cb..e310c8f0f 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -145,14 +145,16 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: // Apply patches if necessary const auto name = nso_file.GetName(); if (pm && (pm->HasNSOPatch(nso_header.build_id, name) || Settings::values.dump_nso)) { - std::vector pi_header(sizeof(NSOHeader) + program_image.size()); + std::span patchable_section(program_image.data() + module_start, + program_image.size() - module_start); + std::vector pi_header(sizeof(NSOHeader) + patchable_section.size()); std::memcpy(pi_header.data(), &nso_header, sizeof(NSOHeader)); - std::memcpy(pi_header.data() + sizeof(NSOHeader), program_image.data(), - program_image.size()); + std::memcpy(pi_header.data() + sizeof(NSOHeader), patchable_section.data(), + patchable_section.size()); pi_header = pm->PatchNSO(pi_header, name); - std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); + std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), patchable_section.data()); } #ifdef HAS_NCE From 9ff8d0f3e652fee83d9f4374d9f6e0aa1759dd88 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Thu, 23 Nov 2023 11:26:06 +0200 Subject: [PATCH 21/28] Address more review comments --- .../settings/model/view/SettingsItem.kt | 4 +- .../app/src/main/res/values/arrays.xml | 12 ++++- .../app/src/main/res/values/strings.xml | 2 +- src/core/arm/nce/patch.cpp | 12 +++-- src/core/arm/nce/patch.h | 1 + src/core/hle/kernel/k_page_table_base.cpp | 11 +--- src/core/memory.cpp | 50 +++++++++++++++++++ src/core/memory.h | 11 ++++ 8 files changed, 84 insertions(+), 19 deletions(-) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index 1f090424b..e198b18a0 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -82,8 +82,8 @@ abstract class SettingsItem( IntSetting.CPU_BACKEND, R.string.cpu_backend, 0, - R.array.cpuBackendNames, - R.array.cpuBackendValues + R.array.cpuBackendArm64Names, + R.array.cpuBackendArm64Values ) ) put( diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index 2756e5cc9..ab435dce9 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -175,16 +175,24 @@ 2 - + @string/cpu_backend_dynarmic @string/cpu_backend_nce - + 0 1 + + @string/cpu_backend_dynarmic + + + + 0 + + @string/auto @string/cpu_accuracy_accurate diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index f07121f6a..95b90fd6d 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -185,7 +185,7 @@ Limits emulation speed to a specified percentage of normal speed. Limit speed percent Specifies the percentage to limit emulation speed. 100% is the normal speed. Values higher or lower will increase or decrease the speed limit. - CPU Backend + CPU backend CPU accuracy %1$s%2$s diff --git a/src/core/arm/nce/patch.cpp b/src/core/arm/nce/patch.cpp index 30c3c6cdd..a08859d0b 100644 --- a/src/core/arm/nce/patch.cpp +++ b/src/core/arm/nce/patch.cpp @@ -90,6 +90,10 @@ void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast(msr.GetRt())}); continue; } + + if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { + m_exclusives.push_back(i); + } } // Determine patching mode for the final relocation step @@ -163,11 +167,9 @@ void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. // Convert to ordered to preserve this assumption. - for (u32 i = ModuleCodeIndex; i < static_cast(text_words.size()); i++) { - const u32 inst = text_words[i]; - if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { - text_words[i] = exclusive.AsOrdered(); - } + for (const ModuleTextAddress i : m_exclusives) { + auto exclusive = Exclusive{text_words[i]}; + text_words[i] = exclusive.AsOrdered(); } // Copy to program image diff --git a/src/core/arm/nce/patch.h b/src/core/arm/nce/patch.h index dcce1bfc6..112f839a4 100644 --- a/src/core/arm/nce/patch.h +++ b/src/core/arm/nce/patch.h @@ -93,6 +93,7 @@ private: std::vector m_branch_to_patch_relocations{}; std::vector m_branch_to_module_relocations{}; std::vector m_write_module_pc_relocations{}; + std::vector m_exclusives{}; oaknut::Label m_save_context{}; oaknut::Label m_load_context{}; PatchMode mode{PatchMode::None}; diff --git a/src/core/hle/kernel/k_page_table_base.cpp b/src/core/hle/kernel/k_page_table_base.cpp index 2b5e77ccf..6691586ed 100644 --- a/src/core/hle/kernel/k_page_table_base.cpp +++ b/src/core/hle/kernel/k_page_table_base.cpp @@ -5678,15 +5678,8 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a case OperationType::ChangePermissions: case OperationType::ChangePermissionsAndRefresh: case OperationType::ChangePermissionsAndRefreshAndFlush: { - const bool read = True(properties.perm & Kernel::KMemoryPermission::UserRead); - const bool write = True(properties.perm & Kernel::KMemoryPermission::UserWrite); - // todo: this doesn't really belong here and should go into m_memory to handle rasterizer - // access todo: ignore exec on non-direct-mapped case - const bool exec = True(properties.perm & Kernel::KMemoryPermission::UserExecute); - if (Settings::IsFastmemEnabled()) { - m_system.DeviceMemory().buffer.Protect(GetInteger(virt_addr), num_pages * PageSize, - read, write, exec); - } + m_memory->ProtectRegion(*m_impl, virt_addr, num_pages * PageSize, + ConvertToMemoryPermission(properties.perm)); R_SUCCEED(); } default: diff --git a/src/core/memory.cpp b/src/core/memory.cpp index e5ca78ef4..5b376b202 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -78,6 +78,51 @@ struct Memory::Impl { } } + void ProtectRegion(Common::PageTable& page_table, VAddr vaddr, u64 size, + Common::MemoryPermission perms) { + ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size); + ASSERT_MSG((vaddr & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", vaddr); + + if (!Settings::IsFastmemEnabled()) { + return; + } + + const bool is_r = True(perms & Common::MemoryPermission::Read); + const bool is_w = True(perms & Common::MemoryPermission::Write); + const bool is_x = + True(perms & Common::MemoryPermission::Execute) && Settings::IsNceEnabled(); + + if (!current_page_table) { + system.DeviceMemory().buffer.Protect(vaddr, size, is_r, is_w, is_x); + return; + } + + u64 protect_bytes{}; + u64 protect_begin{}; + for (u64 addr = vaddr; addr < vaddr + size; addr += YUZU_PAGESIZE) { + const Common::PageType page_type{ + current_page_table->pointers[addr >> YUZU_PAGEBITS].Type()}; + switch (page_type) { + case Common::PageType::RasterizerCachedMemory: + if (protect_bytes > 0) { + system.DeviceMemory().buffer.Protect(protect_begin, protect_bytes, is_r, is_w, + is_x); + protect_bytes = 0; + } + break; + default: + if (protect_bytes == 0) { + protect_begin = addr; + } + protect_bytes += YUZU_PAGESIZE; + } + } + + if (protect_bytes > 0) { + system.DeviceMemory().buffer.Protect(protect_begin, protect_bytes, is_r, is_w, is_x); + } + } + [[nodiscard]] u8* GetPointerFromRasterizerCachedMemory(u64 vaddr) const { const Common::PhysicalAddress paddr{ current_page_table->backing_addr[vaddr >> YUZU_PAGEBITS]}; @@ -839,6 +884,11 @@ void Memory::UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress b impl->UnmapRegion(page_table, base, size); } +void Memory::ProtectRegion(Common::PageTable& page_table, Common::ProcessAddress vaddr, u64 size, + Common::MemoryPermission perms) { + impl->ProtectRegion(page_table, GetInteger(vaddr), size, perms); +} + bool Memory::IsValidVirtualAddress(const Common::ProcessAddress vaddr) const { const Kernel::KProcess& process = *system.ApplicationProcess(); const auto& page_table = process.GetPageTable().GetImpl(); diff --git a/src/core/memory.h b/src/core/memory.h index e5fbc0025..ed8ebb5eb 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -97,6 +97,17 @@ public: */ void UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size); + /** + * Protects a region of the emulated process address space with the new permissions. + * + * @param page_table The page table of the emulated process. + * @param base The start address to re-protect. Must be page-aligned. + * @param size The amount of bytes to protect. Must be page-aligned. + * @param perms The permissions the address range is mapped. + */ + void ProtectRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, + Common::MemoryPermission perms); + /** * Checks whether or not the supplied address is a valid virtual * address for the current process. From 5a9ffa81a6452f9f0af6f5b288f302cbaf0475b2 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Thu, 23 Nov 2023 16:55:56 +0200 Subject: [PATCH 22/28] host_memory: Simplify randomness generation --- src/common/host_memory.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index f14077750..4cbc22b85 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -363,21 +363,12 @@ private: #ifdef ARCHITECTURE_arm64 -static uint64_t GetRandomU64() { - uint64_t ret; - ASSERT(getrandom(&ret, sizeof(ret), 0) == 0); - return ret; -} - static void* ChooseVirtualBase(size_t virtual_size) { constexpr uintptr_t Map39BitSize = (1ULL << 39); constexpr uintptr_t Map36BitSize = (1ULL << 36); - // Seed the MT with some initial strong randomness. - // - // This is not a cryptographic application, we just want something more - // random than the current time. - std::mt19937_64 rng(GetRandomU64()); + // This is not a cryptographic application, we just want something random. + std::mt19937_64 rng; // We want to ensure we are allocating at an address aligned to the L2 block size. // For Qualcomm devices, we must also allocate memory above 36 bits. From 6432508740383b9894af03086bda4d1b0e254e3e Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sat, 25 Nov 2023 00:13:48 +0200 Subject: [PATCH 23/28] oaknut: Address warnings --- src/core/arm/nce/patch.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/core/arm/nce/patch.h b/src/core/arm/nce/patch.h index 112f839a4..c6d1608c1 100644 --- a/src/core/arm/nce/patch.h +++ b/src/core/arm/nce/patch.h @@ -6,12 +6,8 @@ #include #include #include - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshorten-64-to-32" #include #include -#pragma GCC diagnostic pop #include "common/common_types.h" #include "core/hle/kernel/code_set.h" From 7482e03c770332dca12a08467a64192c8179a613 Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 26 Nov 2023 19:34:00 -0500 Subject: [PATCH 24/28] loader: fix gcc compile --- src/core/loader/nso.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index e310c8f0f..ade8f85f8 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -165,7 +165,7 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: patch->PatchText(program_image, code); // Add patch section size to the module size. - image_size += patch->GetSectionSize(); + image_size += static_cast(patch->GetSectionSize()); } else if (patch) { // Relocate code patch and copy to the program_image. patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers()); From 15f35b8657ef863ea3df0d141521b72c61f0069f Mon Sep 17 00:00:00 2001 From: Liam Date: Sun, 26 Nov 2023 19:50:10 -0500 Subject: [PATCH 25/28] general: fix mac compile --- src/common/host_memory.cpp | 2 ++ src/core/hle/kernel/k_process.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 4cbc22b85..413dd3e98 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -621,6 +621,8 @@ public: void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {} + void EnableDirectMappedAddress() {} + u8* backing_base{nullptr}; u8* virtual_base{nullptr}; }; diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index d2e9f2a2e..6c29eb72c 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -1215,7 +1215,7 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) { ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read); ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE if (Settings::IsNceEnabled()) { auto& buffer = m_kernel.System().DeviceMemory().buffer; const auto& code = code_set.CodeSegment(); From a76a8fb5fecb4731a9a9231f3e06d5743c34e090 Mon Sep 17 00:00:00 2001 From: amazingfate Date: Sun, 26 Nov 2023 20:25:18 -0500 Subject: [PATCH 26/28] qt: add cpu_backend configuration --- CMakeLists.txt | 5 ++++ src/common/CMakeLists.txt | 2 +- src/common/settings.h | 2 +- src/common/wall_clock.cpp | 4 +-- src/core/CMakeLists.txt | 3 +- src/yuzu/configuration/configure_cpu.cpp | 12 ++++++++ src/yuzu/configuration/configure_cpu.h | 1 + src/yuzu/configuration/configure_cpu.ui | 30 +++++++++++++++++++ src/yuzu/configuration/shared_translation.cpp | 6 ++++ 9 files changed, 59 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e5cac8fe9..325f39e1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,6 +260,11 @@ if (UNIX) add_definitions(-DYUZU_UNIX=1) endif() +if (ARCHITECTURE_arm64 AND (ANDROID OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")) + set(HAS_NCE 1) + add_definitions(-DHAS_NCE=1) +endif() + # Configure C++ standard # =========================== diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 98fd5f1e4..d38d5c6d1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -199,7 +199,7 @@ if(ARCHITECTURE_x86_64) target_link_libraries(common PRIVATE xbyak::xbyak) endif() -if (ARCHITECTURE_arm64 AND (ANDROID OR LINUX)) +if (HAS_NCE) target_sources(common PRIVATE arm64/native_clock.cpp diff --git a/src/common/settings.h b/src/common/settings.h index fea639ee3..508615011 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -182,7 +182,7 @@ struct Values { // Cpu SwitchableSetting cpu_backend{ linkage, CpuBackend::Dynarmic, CpuBackend::Dynarmic, -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE CpuBackend::Nce, #else CpuBackend::Dynarmic, diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index caca9a123..012fdc1e0 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -10,7 +10,7 @@ #include "common/x64/rdtsc.h" #endif -#if defined(ARCHITECTURE_arm64) && defined(__linux__) +#ifdef HAS_NCE #include "common/arm64/native_clock.h" #endif @@ -68,7 +68,7 @@ std::unique_ptr CreateOptimalClock() { // - Is not more precise than 1 GHz (1ns resolution) return std::make_unique(); } -#elif defined(ARCHITECTURE_arm64) && defined(__linux__) +#elif defined(HAS_NCE) return std::make_unique(); #else return std::make_unique(); diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e0bfb5b95..c3ab5ecf9 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -926,8 +926,7 @@ if (ENABLE_WEB_SERVICE) target_link_libraries(core PRIVATE web_service) endif() -if (ARCHITECTURE_arm64 AND (ANDROID OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")) - target_compile_definitions(core PRIVATE -DHAS_NCE) +if (HAS_NCE) enable_language(C ASM) set(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp") diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index a51359903..7e16cf17d 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -27,6 +27,13 @@ ConfigureCpu::ConfigureCpu(const Core::System& system_, connect(accuracy_combobox, qOverload(&QComboBox::currentIndexChanged), this, &ConfigureCpu::UpdateGroup); + + connect(backend_combobox, qOverload(&QComboBox::currentIndexChanged), this, + &ConfigureCpu::UpdateGroup); + +#ifdef HAS_NCE + ui->backend_group->setVisible(true); +#endif } ConfigureCpu::~ConfigureCpu() = default; @@ -34,6 +41,7 @@ ConfigureCpu::~ConfigureCpu() = default; void ConfigureCpu::SetConfiguration() {} void ConfigureCpu::Setup(const ConfigurationShared::Builder& builder) { auto* accuracy_layout = ui->widget_accuracy->layout(); + auto* backend_layout = ui->widget_backend->layout(); auto* unsafe_layout = ui->unsafe_widget->layout(); std::map unsafe_hold{}; @@ -62,6 +70,9 @@ void ConfigureCpu::Setup(const ConfigurationShared::Builder& builder) { // Keep track of cpu_accuracy combobox to display/hide the unsafe settings accuracy_layout->addWidget(widget); accuracy_combobox = widget->combobox; + } else if (setting->Id() == Settings::values.cpu_backend.Id()) { + backend_layout->addWidget(widget); + backend_combobox = widget->combobox; } else { // Presently, all other settings here are unsafe checkboxes unsafe_hold.insert({setting->Id(), widget}); @@ -73,6 +84,7 @@ void ConfigureCpu::Setup(const ConfigurationShared::Builder& builder) { } UpdateGroup(accuracy_combobox->currentIndex()); + UpdateGroup(backend_combobox->currentIndex()); } void ConfigureCpu::UpdateGroup(int index) { diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h index 61a6de7aa..a102b4c1f 100644 --- a/src/yuzu/configuration/configure_cpu.h +++ b/src/yuzu/configuration/configure_cpu.h @@ -49,4 +49,5 @@ private: std::vector> apply_funcs{}; QComboBox* accuracy_combobox; + QComboBox* backend_combobox; }; diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index f734e842e..13fd43605 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -59,6 +59,36 @@ + + + + CPU Backend + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + false + + + diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp index a7b5def32..daf0e1663 100644 --- a/src/yuzu/configuration/shared_translation.cpp +++ b/src/yuzu/configuration/shared_translation.cpp @@ -44,6 +44,7 @@ std::unique_ptr InitializeTranslations(QWidget* parent) { // Cpu INSERT(Settings, cpu_accuracy, tr("Accuracy:"), QStringLiteral()); + INSERT(Settings, cpu_backend, tr("Backend:"), QStringLiteral()); // Cpu Debug @@ -240,6 +241,11 @@ std::unique_ptr ComboboxEnumeration(QWidget* parent) { PAIR(CpuAccuracy, Unsafe, tr("Unsafe")), PAIR(CpuAccuracy, Paranoid, tr("Paranoid (disables most optimizations)")), }}); + translations->insert({Settings::EnumMetadata::Index(), + { + PAIR(CpuBackend, Dynarmic, tr("Dynarmic")), + PAIR(CpuBackend, Nce, tr("NCE")), + }}); translations->insert({Settings::EnumMetadata::Index(), { PAIR(FullscreenMode, Borderless, tr("Borderless Windowed")), From 340548aba798ca2d0accf3bc3f0f787d15484946 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Wed, 29 Nov 2023 01:33:47 +0200 Subject: [PATCH 27/28] cmake: Move HAS_NCE to root cmake * So we can use it in common --- src/common/host_memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 413dd3e98..3a9ea6eb4 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -541,7 +541,7 @@ public: if (write) { flags |= PROT_WRITE; } -#ifdef ARCHITECTURE_arm64 +#ifdef HAS_NCE if (execute) { flags |= PROT_EXEC; } From 4a3abba16d9821ed163aab427d4c7bc9ef7acb32 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Wed, 29 Nov 2023 23:49:16 +0200 Subject: [PATCH 28/28] core: Rename patcher file --- src/core/CMakeLists.txt | 4 ++-- src/core/arm/nce/arm_nce.cpp | 2 +- src/core/arm/nce/{patch.cpp => patcher.cpp} | 2 +- src/core/arm/nce/{patch.h => patcher.h} | 0 src/core/loader/deconstructed_rom_directory.cpp | 2 +- src/core/loader/nro.cpp | 2 +- src/core/loader/nso.cpp | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) rename src/core/arm/nce/{patch.cpp => patcher.cpp} (99%) rename src/core/arm/nce/{patch.h => patcher.h} (100%) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index c3ab5ecf9..85583941c 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -935,8 +935,8 @@ if (HAS_NCE) arm/nce/arm_nce.h arm/nce/arm_nce.s arm/nce/guest_context.h - arm/nce/patch.cpp - arm/nce/patch.h + arm/nce/patcher.cpp + arm/nce/patcher.h arm/nce/instructions.h ) target_link_libraries(core PRIVATE merry::oaknut) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 6eb6299cc..f7bdafd39 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -6,7 +6,7 @@ #include "common/signal_chain.h" #include "core/arm/nce/arm_nce.h" -#include "core/arm/nce/patch.h" +#include "core/arm/nce/patcher.h" #include "core/core.h" #include "core/memory.h" diff --git a/src/core/arm/nce/patch.cpp b/src/core/arm/nce/patcher.cpp similarity index 99% rename from src/core/arm/nce/patch.cpp rename to src/core/arm/nce/patcher.cpp index a08859d0b..ec8527224 100644 --- a/src/core/arm/nce/patch.cpp +++ b/src/core/arm/nce/patcher.cpp @@ -7,7 +7,7 @@ #include "core/arm/nce/arm_nce.h" #include "core/arm/nce/guest_context.h" #include "core/arm/nce/instructions.h" -#include "core/arm/nce/patch.h" +#include "core/arm/nce/patcher.h" #include "core/core.h" #include "core/core_timing.h" #include "core/hle/kernel/svc.h" diff --git a/src/core/arm/nce/patch.h b/src/core/arm/nce/patcher.h similarity index 100% rename from src/core/arm/nce/patch.h rename to src/core/arm/nce/patcher.h diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index a1344f1c4..60ee78e89 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -16,7 +16,7 @@ #include "core/loader/nso.h" #ifdef HAS_NCE -#include "core/arm/nce/patch.h" +#include "core/arm/nce/patcher.h" #endif namespace Loader { diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 2e7368349..e74697cda 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -23,7 +23,7 @@ #include "core/memory.h" #ifdef HAS_NCE -#include "core/arm/nce/patch.h" +#include "core/arm/nce/patcher.h" #endif namespace Loader { diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index ade8f85f8..b053a0d14 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -21,7 +21,7 @@ #include "core/memory.h" #ifdef HAS_NCE -#include "core/arm/nce/patch.h" +#include "core/arm/nce/patcher.h" #endif namespace Loader {