Merge pull request #436 from bunnei/multi-core

Initial support for multi-core
2018-05-11 12:59:23 -04:00 · 2018-05-11 12:59:23 -04:00 · 1b5c02fc37
parent e07218906d 811dae12f9
commit 1b5c02fc37
24 changed files with 605 additions and 181 deletions
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@ -4,6 +4,8 @@ add_library(core STATIC
    arm/unicorn/arm_unicorn.h
    core.cpp
    core.h
+    core_cpu.cpp
+    core_cpu.h
    core_timing.cpp
    core_timing.h
    file_sys/directory.h
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@ -52,7 +52,7 @@ static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
 static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
                               void* user_data) {
    ARM_Interface::ThreadContext ctx{};
-    Core::CPU().SaveContext(ctx);
+    Core::CurrentArmInterface().SaveContext(ctx);
    ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
               ctx.pc, ctx.cpu_registers[30]);
    return {};
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -5,10 +5,6 @@
 #include <memory>
 #include <utility>
 #include "common/logging/log.h"
-#ifdef ARCHITECTURE_x86_64
-#include "core/arm/dynarmic/arm_dynarmic.h"
-#endif
-#include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/gdbstub/gdbstub.h"
@ -31,11 +27,31 @@ namespace Core {

 System::~System() = default;

+/// Runs a CPU core while the system is powered on
+static void RunCpuCore(std::shared_ptr<Cpu> cpu_state) {
+    while (Core::System().GetInstance().IsPoweredOn()) {
+        cpu_state->RunLoop(true);
+    }
+}
+
+Cpu& System::CurrentCpuCore() {
+    // If multicore is enabled, use host thread to figure out the current CPU core
+    if (Settings::values.use_multi_core) {
+        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
+        ASSERT(search != thread_to_cpu.end());
+        ASSERT(search->second);
+        return *search->second;
+    }
+
+    // Otherwise, use single-threaded mode active_core variable
+    return *cpu_cores[active_core];
+}
+
 System::ResultStatus System::RunLoop(bool tight_loop) {
    status = ResultStatus::Success;
-    if (!cpu_core) {
-        return ResultStatus::ErrorNotInitialized;
-    }
+
+    // Update thread_to_cpu in case Core 0 is run from a different host thread
+    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];

    if (GDBStub::IsServerEnabled()) {
        GDBStub::HandlePacket();
@ -52,25 +68,14 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
        }
    }

-    // If we don't have a currently active thread then don't execute instructions,
-    // instead advance to the next event and try to yield to the next thread
-    if (Kernel::GetCurrentThread() == nullptr) {
-        NGLOG_TRACE(Core_ARM, "Idling");
-        CoreTiming::Idle();
-        CoreTiming::Advance();
-        PrepareReschedule();
-    } else {
-        CoreTiming::Advance();
-        if (tight_loop) {
-            cpu_core->Run();
-        } else {
-            cpu_core->Step();
+    for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
+        cpu_cores[active_core]->RunLoop(tight_loop);
+        if (Settings::values.use_multi_core) {
+            // Cores 1-3 are run on other threads in this mode
+            break;
        }
    }

-    HW::Update();
-    Reschedule();
-
    return status;
 }

@ -133,21 +138,26 @@ System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& file
 }

 void System::PrepareReschedule() {
-    cpu_core->PrepareReschedule();
-    reschedule_pending = true;
+    CurrentCpuCore().PrepareReschedule();
 }

 PerfStats::Results System::GetAndResetPerfStats() {
    return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
 }

-void System::Reschedule() {
-    if (!reschedule_pending) {
-        return;
+const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return cpu_cores[core_index]->Scheduler();
 }

-    reschedule_pending = false;
-    Core::System::GetInstance().Scheduler().Reschedule();
+ARM_Interface& System::ArmInterface(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return cpu_cores[core_index]->ArmInterface();
+}
+
+Cpu& System::CpuCore(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return *cpu_cores[core_index];
 }

 System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
@ -157,26 +167,17 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {

    current_process = Kernel::Process::Create("main");

-    if (Settings::values.use_cpu_jit) {
-#ifdef ARCHITECTURE_x86_64
-        cpu_core = std::make_shared<ARM_Dynarmic>();
-#else
-        cpu_core = std::make_shared<ARM_Unicorn>();
-        NGLOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
-#endif
-    } else {
-        cpu_core = std::make_shared<ARM_Unicorn>();
+    cpu_barrier = std::make_shared<CpuBarrier>();
+    for (size_t index = 0; index < cpu_cores.size(); ++index) {
+        cpu_cores[index] = std::make_shared<Cpu>(cpu_barrier, index);
    }

    gpu_core = std::make_unique<Tegra::GPU>();
-
    telemetry_session = std::make_unique<Core::TelemetrySession>();
-
    service_manager = std::make_shared<Service::SM::ServiceManager>();

    HW::Init();
    Kernel::Init(system_mode);
-    scheduler = std::make_unique<Kernel::Scheduler>(cpu_core.get());
    Service::Init(service_manager);
    GDBStub::Init();

@ -184,6 +185,17 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
        return ResultStatus::ErrorVideoCore;
    }

+    // Create threads for CPU cores 1-3, and build thread_to_cpu map
+    // CPU core 0 is run on the main thread
+    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
+    if (Settings::values.use_multi_core) {
+        for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
+            cpu_core_threads[index] =
+                std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
+            thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
+        }
+    }
+
    NGLOG_DEBUG(Core, "Initialized OK");

    // Reset counters and set time origin to current frame
@ -207,15 +219,30 @@ void System::Shutdown() {
    VideoCore::Shutdown();
    GDBStub::Shutdown();
    Service::Shutdown();
-    scheduler.reset();
    Kernel::Shutdown();
    HW::Shutdown();
    service_manager.reset();
    telemetry_session.reset();
    gpu_core.reset();
+
+    // Close all CPU/threading state
+    cpu_barrier->NotifyEnd();
+    if (Settings::values.use_multi_core) {
+        for (auto& thread : cpu_core_threads) {
+            thread->join();
+            thread.reset();
+        }
+    }
+    thread_to_cpu.clear();
+    for (auto& cpu_core : cpu_cores) {
        cpu_core.reset();
+    }
+    cpu_barrier.reset();
+
+    // Close core timing
    CoreTiming::Shutdown();

+    // Close app loader
    app_loader.reset();

    NGLOG_DEBUG(Core, "Shutdown OK");
--- a/src/core/core.h
+++ b/src/core/core.h
@ -4,9 +4,12 @@

 #pragma once

+#include <array>
 #include <memory>
 #include <string>
+#include <thread>
 #include "common/common_types.h"
+#include "core/core_cpu.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/loader/loader.h"
@ -89,7 +92,7 @@ public:
     * @returns True if the emulated system is powered on, otherwise false.
     */
    bool IsPoweredOn() const {
-        return cpu_core != nullptr;
+        return cpu_barrier && cpu_barrier->IsAlive();
    }

    /**
@ -103,24 +106,34 @@ public:
    /// Prepare the core emulation for a reschedule
    void PrepareReschedule();

+    /// Gets and resets core performance statistics
    PerfStats::Results GetAndResetPerfStats();

-    /**
-     * Gets a reference to the emulated CPU.
-     * @returns A reference to the emulated CPU.
-     */
-    ARM_Interface& CPU() {
-        return *cpu_core;
+    /// Gets an ARM interface to the CPU core that is currently running
+    ARM_Interface& CurrentArmInterface() {
+        return CurrentCpuCore().ArmInterface();
    }

+    /// Gets an ARM interface to the CPU core with the specified index
+    ARM_Interface& ArmInterface(size_t core_index);
+
+    /// Gets a CPU interface to the CPU core with the specified index
+    Cpu& CpuCore(size_t core_index);
+
+    /// Gets the GPU interface
    Tegra::GPU& GPU() {
        return *gpu_core;
    }

-    Kernel::Scheduler& Scheduler() {
-        return *scheduler;
+    /// Gets the scheduler for the CPU core that is currently running
+    Kernel::Scheduler& CurrentScheduler() {
+        return *CurrentCpuCore().Scheduler();
    }

+    /// Gets the scheduler for the CPU core with the specified index
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index);
+
+    /// Gets the current process
    Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
        return current_process;
    }
@ -155,6 +168,9 @@ public:
    }

 private:
+    /// Returns the currently running CPU core
+    Cpu& CurrentCpuCore();
+
    /**
     * Initialize the emulated system.
     * @param emu_window Pointer to the host-system window used for video output and keyboard input.
@ -163,22 +179,15 @@ private:
     */
    ResultStatus Init(EmuWindow* emu_window, u32 system_mode);

-    /// Reschedule the core emulation
-    void Reschedule();
-
    /// AppLoader used to load the current executing application
    std::unique_ptr<Loader::AppLoader> app_loader;
-
-    std::shared_ptr<ARM_Interface> cpu_core;
-    std::unique_ptr<Kernel::Scheduler> scheduler;
    std::unique_ptr<Tegra::GPU> gpu_core;
-
    std::shared_ptr<Tegra::DebugContext> debug_context;
-
    Kernel::SharedPtr<Kernel::Process> current_process;
-
-    /// When true, signals that a reschedule should happen
-    bool reschedule_pending{};
+    std::shared_ptr<CpuBarrier> cpu_barrier;
+    std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
+    std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
+    size_t active_core{}; ///< Active core, only used in single thread mode

    /// Service manager
    std::shared_ptr<Service::SM::ServiceManager> service_manager;
@ -190,10 +199,13 @@ private:

    ResultStatus status = ResultStatus::Success;
    std::string status_details = "";
+
+    /// Map of guest threads to CPU cores
+    std::map<std::thread::id, std::shared_ptr<Cpu>> thread_to_cpu;
 };

-inline ARM_Interface& CPU() {
-    return System::GetInstance().CPU();
+inline ARM_Interface& CurrentArmInterface() {
+    return System::GetInstance().CurrentArmInterface();
 }

 inline TelemetrySession& Telemetry() {
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@ -0,0 +1,119 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <mutex>
+
+#include "common/logging/log.h"
+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic.h"
+#endif
+#include "core/arm/unicorn/arm_unicorn.h"
+#include "core/core_cpu.h"
+#include "core/core_timing.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/thread.h"
+#include "core/settings.h"
+
+namespace Core {
+
+void CpuBarrier::NotifyEnd() {
+    std::unique_lock<std::mutex> lock(mutex);
+    end = true;
+    condition.notify_all();
+}
+
+bool CpuBarrier::Rendezvous() {
+    if (!Settings::values.use_multi_core) {
+        // Meaningless when running in single-core mode
+        return true;
+    }
+
+    if (!end) {
+        std::unique_lock<std::mutex> lock(mutex);
+
+        --cores_waiting;
+        if (!cores_waiting) {
+            cores_waiting = NUM_CPU_CORES;
+            condition.notify_all();
+            return true;
+        }
+
+        condition.wait(lock);
+        return true;
+    }
+
+    return false;
+}
+
+Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
+    : cpu_barrier{std::move(cpu_barrier)}, core_index{core_index} {
+
+    if (Settings::values.use_cpu_jit) {
+#ifdef ARCHITECTURE_x86_64
+        arm_interface = std::make_shared<ARM_Dynarmic>();
+#else
+        cpu_core = std::make_shared<ARM_Unicorn>();
+        NGLOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
+#endif
+    } else {
+        arm_interface = std::make_shared<ARM_Unicorn>();
+    }
+
+    scheduler = std::make_shared<Kernel::Scheduler>(arm_interface.get());
+}
+
+void Cpu::RunLoop(bool tight_loop) {
+    // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step
+    if (!cpu_barrier->Rendezvous()) {
+        // If rendezvous failed, session has been killed
+        return;
+    }
+
+    // If we don't have a currently active thread then don't execute instructions,
+    // instead advance to the next event and try to yield to the next thread
+    if (Kernel::GetCurrentThread() == nullptr) {
+        NGLOG_TRACE(Core, "Core-{} idling", core_index);
+
+        if (IsMainCore()) {
+            CoreTiming::Idle();
+            CoreTiming::Advance();
+        }
+
+        PrepareReschedule();
+    } else {
+        if (IsMainCore()) {
+            CoreTiming::Advance();
+        }
+
+        if (tight_loop) {
+            arm_interface->Run();
+        } else {
+            arm_interface->Step();
+        }
+    }
+
+    Reschedule();
+}
+
+void Cpu::SingleStep() {
+    return RunLoop(false);
+}
+
+void Cpu::PrepareReschedule() {
+    arm_interface->PrepareReschedule();
+    reschedule_pending = true;
+}
+
+void Cpu::Reschedule() {
+    if (!reschedule_pending) {
+        return;
+    }
+
+    reschedule_pending = false;
+    scheduler->Reschedule();
+}
+
+} // namespace Core
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@ -0,0 +1,78 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <memory>
+#include <mutex>
+#include <string>
+#include "common/common_types.h"
+
+class ARM_Interface;
+
+namespace Kernel {
+class Scheduler;
+}
+
+namespace Core {
+
+constexpr unsigned NUM_CPU_CORES{4};
+
+class CpuBarrier {
+public:
+    bool IsAlive() const {
+        return !end;
+    }
+
+    void NotifyEnd();
+
+    bool Rendezvous();
+
+private:
+    unsigned cores_waiting{NUM_CPU_CORES};
+    std::mutex mutex;
+    std::condition_variable condition;
+    std::atomic<bool> end{};
+};
+
+class Cpu {
+public:
+    Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index);
+
+    void RunLoop(bool tight_loop = true);
+
+    void SingleStep();
+
+    void PrepareReschedule();
+
+    ARM_Interface& ArmInterface() {
+        return *arm_interface;
+    }
+
+    const ARM_Interface& ArmInterface() const {
+        return *arm_interface;
+    }
+
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler() const {
+        return scheduler;
+    }
+
+    bool IsMainCore() const {
+        return core_index == 0;
+    }
+
+private:
+    void Reschedule();
+
+    std::shared_ptr<ARM_Interface> arm_interface;
+    std::shared_ptr<CpuBarrier> cpu_barrier;
+    std::shared_ptr<Kernel::Scheduler> scheduler;
+
+    bool reschedule_pending{};
+    size_t core_index;
+};
+
+} // namespace Core
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@ -598,11 +598,11 @@ static void ReadRegister() {
    }

    if (id <= SP_REGISTER) {
-        LongToGdbHex(reply, Core::CPU().GetReg(static_cast<int>(id)));
+        LongToGdbHex(reply, Core::CurrentArmInterface().GetReg(static_cast<int>(id)));
    } else if (id == PC_REGISTER) {
-        LongToGdbHex(reply, Core::CPU().GetPC());
+        LongToGdbHex(reply, Core::CurrentArmInterface().GetPC());
    } else if (id == CPSR_REGISTER) {
-        IntToGdbHex(reply, Core::CPU().GetCPSR());
+        IntToGdbHex(reply, Core::CurrentArmInterface().GetCPSR());
    } else {
        return SendReply("E01");
    }
@ -618,16 +618,16 @@ static void ReadRegisters() {
    u8* bufptr = buffer;

    for (int reg = 0; reg <= SP_REGISTER; reg++) {
-        LongToGdbHex(bufptr + reg * 16, Core::CPU().GetReg(reg));
+        LongToGdbHex(bufptr + reg * 16, Core::CurrentArmInterface().GetReg(reg));
    }

    bufptr += (32 * 16);

-    LongToGdbHex(bufptr, Core::CPU().GetPC());
+    LongToGdbHex(bufptr, Core::CurrentArmInterface().GetPC());

    bufptr += 16;

-    IntToGdbHex(bufptr, Core::CPU().GetCPSR());
+    IntToGdbHex(bufptr, Core::CurrentArmInterface().GetCPSR());

    bufptr += 8;

@ -646,11 +646,11 @@ static void WriteRegister() {
    }

    if (id <= SP_REGISTER) {
-        Core::CPU().SetReg(id, GdbHexToLong(buffer_ptr));
+        Core::CurrentArmInterface().SetReg(id, GdbHexToLong(buffer_ptr));
    } else if (id == PC_REGISTER) {
-        Core::CPU().SetPC(GdbHexToLong(buffer_ptr));
+        Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr));
    } else if (id == CPSR_REGISTER) {
-        Core::CPU().SetCPSR(GdbHexToInt(buffer_ptr));
+        Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr));
    } else {
        return SendReply("E01");
    }
@ -667,11 +667,11 @@ static void WriteRegisters() {

    for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) {
        if (reg <= SP_REGISTER) {
-            Core::CPU().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16));
        } else if (reg == PC_REGISTER) {
-            Core::CPU().SetPC(GdbHexToLong(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr + i * 16));
        } else if (reg == CPSR_REGISTER) {
-            Core::CPU().SetCPSR(GdbHexToInt(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr + i * 16));
        } else {
            UNIMPLEMENTED();
        }
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@ -9,6 +9,8 @@

 namespace Kernel {

+std::mutex Scheduler::scheduler_mutex;
+
 Scheduler::Scheduler(ARM_Interface* cpu_core) : cpu_core(cpu_core) {}

 Scheduler::~Scheduler() {
@ -18,6 +20,7 @@ Scheduler::~Scheduler() {
 }

 bool Scheduler::HaveReadyThreads() {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
    return ready_queue.get_first() != nullptr;
 }

@ -90,6 +93,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 }

 void Scheduler::Reschedule() {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    Thread* cur = GetCurrentThread();
    Thread* next = PopNextReadyThread();

@ -105,26 +110,36 @@ void Scheduler::Reschedule() {
 }

 void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    thread_list.push_back(thread);
    ready_queue.prepare(priority);
 }

 void Scheduler::RemoveThread(Thread* thread) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
                      thread_list.end());
 }

 void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    ASSERT(thread->status == THREADSTATUS_READY);
    ready_queue.push_back(priority, thread);
 }

 void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    ASSERT(thread->status == THREADSTATUS_READY);
    ready_queue.remove(priority, thread);
 }

 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
    // If thread was ready, adjust queues
    if (thread->status == THREADSTATUS_READY)
        ready_queue.move(thread, thread->current_priority, priority);
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@ -4,6 +4,7 @@

 #pragma once

+#include <mutex>
 #include <vector>
 #include "common/common_types.h"
 #include "common/thread_queue_list.h"
@ -68,6 +69,8 @@ private:
    SharedPtr<Thread> current_thread = nullptr;

    ARM_Interface* cpu_core;
+
+    static std::mutex scheduler_mutex;
 };

 } // namespace Kernel
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@ -401,8 +401,8 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {

 /// Get which CPU core is executing the current thread
 static u32 GetCurrentProcessorNumber() {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, defaulting to processor 0");
-    return 0;
+    NGLOG_TRACE(Kernel_SVC, "called");
+    return GetCurrentThread()->processor_id;
 }

 static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size,
@ -485,8 +485,7 @@ static void ExitProcess() {

    Core::CurrentProcess()->status = ProcessStatus::Exited;

-    // Stop all the process threads that are currently waiting for objects.
-    auto& thread_list = Core::System::GetInstance().Scheduler().GetThreadList();
+    auto stop_threads = [](const std::vector<SharedPtr<Thread>>& thread_list) {
        for (auto& thread : thread_list) {
            if (thread->owner_process != Core::CurrentProcess())
                continue;
@ -501,6 +500,13 @@ static void ExitProcess() {

            thread->Stop();
        }
+    };
+
+    auto& system = Core::System::GetInstance();
+    stop_threads(system.Scheduler(0)->GetThreadList());
+    stop_threads(system.Scheduler(1)->GetThreadList());
+    stop_threads(system.Scheduler(2)->GetThreadList());
+    stop_threads(system.Scheduler(3)->GetThreadList());

    // Kill the current thread
    GetCurrentThread()->Stop();
@ -530,14 +536,9 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V

    switch (processor_id) {
    case THREADPROCESSORID_0:
-        break;
    case THREADPROCESSORID_1:
    case THREADPROCESSORID_2:
    case THREADPROCESSORID_3:
-        // TODO(bunnei): Implement support for other processor IDs
-        NGLOG_ERROR(Kernel_SVC,
-                    "Newly created thread must run in another thread ({}), unimplemented.",
-                    processor_id);
        break;
    default:
        ASSERT_MSG(false, "Unsupported thread processor ID: {}", processor_id);
@ -576,7 +577,7 @@ static ResultCode StartThread(Handle thread_handle) {

 /// Called when a thread exits
 static void ExitThread() {
-    NGLOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CPU().GetPC());
+    NGLOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());

    ExitCurrentThread();
    Core::System::GetInstance().PrepareReschedule();
@ -588,7 +589,7 @@ static void SleepThread(s64 nanoseconds) {

    // Don't attempt to yield execution if there are no available threads to run,
    // this way we avoid a useless reschedule to the idle thread.
-    if (nanoseconds == 0 && !Core::System::GetInstance().Scheduler().HaveReadyThreads())
+    if (nanoseconds == 0 && !Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
        return;

    // Sleep current thread and check for next thread to schedule
@ -624,7 +625,7 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var

    // Note: Deliberately don't attempt to inherit the lock owner's priority.

-    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(current_thread->processor_id).PrepareReschedule();
    return RESULT_SUCCESS;
 }

@ -634,9 +635,10 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
                condition_variable_addr, target);

    u32 processed = 0;
-    auto& thread_list = Core::System::GetInstance().Scheduler().GetThreadList();

-    for (auto& thread : thread_list) {
+    auto signal_process_wide_key = [&](size_t core_index) {
+        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+        for (auto& thread : scheduler->GetThreadList()) {
            if (thread->condvar_wait_address != condition_variable_addr)
                continue;

@ -676,11 +678,17 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target

                owner->AddMutexWaiter(thread);

-            Core::System::GetInstance().PrepareReschedule();
+                Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();
            }

            ++processed;
        }
+    };
+
+    signal_process_wide_key(0);
+    signal_process_wide_key(1);
+    signal_process_wide_key(2);
+    signal_process_wide_key(3);

    return RESULT_SUCCESS;
 }
@ -718,16 +726,31 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
    return RESULT_SUCCESS;
 }

-static ResultCode GetThreadCoreMask(Handle handle, u32* mask, u64* unknown) {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}", handle);
-    *mask = 0x0;
-    *unknown = 0xf;
+static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
+    NGLOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
+
+    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    *core = thread->ideal_core;
+    *mask = thread->affinity_mask;
+
    return RESULT_SUCCESS;
 }

-static ResultCode SetThreadCoreMask(Handle handle, u32 mask, u64 unknown) {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}, mask=0x{:08X}, unknown=0x{:X}",
-                  handle, mask, unknown);
+static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
+    NGLOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:08X}, core=0x{:X}", thread_handle,
+                mask, core);
+
+    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    thread->ChangeCore(core, mask);
+
    return RESULT_SUCCESS;
 }

--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@ -13,14 +13,14 @@

 namespace Kernel {

-#define PARAM(n) Core::CPU().GetReg(n)
+#define PARAM(n) Core::CurrentArmInterface().GetReg(n)

 /**
 * HLE a function return from the current ARM userland process
 * @param res Result to return
 */
 static inline void FuncReturn(u64 res) {
-    Core::CPU().SetReg(0, res);
+    Core::CurrentArmInterface().SetReg(0, res);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
@ -45,7 +45,7 @@ template <ResultCode func(u32*, u32)>
 void SvcWrap() {
    u32 param_1 = 0;
    u32 retval = func(&param_1, (u32)PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@ -53,7 +53,7 @@ template <ResultCode func(u32*, u64)>
 void SvcWrap() {
    u32 param_1 = 0;
    u32 retval = func(&param_1, PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@ -66,7 +66,7 @@ template <ResultCode func(u64*, u64)>
 void SvcWrap() {
    u64 param_1 = 0;
    u32 retval = func(&param_1, PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@ -85,8 +85,8 @@ void SvcWrap() {
    u32 param_1 = 0;
    u64 param_2 = 0;
    ResultCode retval = func((u32)(PARAM(2) & 0xFFFFFFFF), &param_1, &param_2);
-    Core::CPU().SetReg(1, param_1);
-    Core::CPU().SetReg(2, param_2);
+    Core::CurrentArmInterface().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(2, param_2);
    FuncReturn(retval.raw);
 }

@ -120,7 +120,7 @@ template <ResultCode func(u32*, u64, u64, s64)>
 void SvcWrap() {
    u32 param_1 = 0;
    ResultCode retval = func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (s64)PARAM(3));
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval.raw);
 }

@ -133,7 +133,7 @@ template <ResultCode func(u64*, u64, u64, u64)>
 void SvcWrap() {
    u64 param_1 = 0;
    u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@ -143,7 +143,7 @@ void SvcWrap() {
    u32 retval =
        func(&param_1, PARAM(1), PARAM(2), PARAM(3), (u32)PARAM(4), (s32)(PARAM(5) & 0xFFFFFFFF))
            .raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@ -166,7 +166,7 @@ template <ResultCode func(u32*, u64, u64, u32)>
 void SvcWrap() {
    u32 param_1 = 0;
    u32 retval = func(&param_1, PARAM(1), PARAM(2), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

@ -175,7 +175,7 @@ void SvcWrap() {
    u32 param_1 = 0;
    u32 retval =
        func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
    FuncReturn(retval);
 }

--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@ -64,7 +64,7 @@ void Thread::Stop() {
    // Clean up thread from ready queue
    // This is only needed when the thread is termintated forcefully (SVC TerminateProcess)
    if (status == THREADSTATUS_READY) {
-        Core::System::GetInstance().Scheduler().UnscheduleThread(this, current_priority);
+        scheduler->UnscheduleThread(this, current_priority);
    }

    status = THREADSTATUS_DEAD;
@ -92,7 +92,7 @@ void WaitCurrentThread_Sleep() {
 void ExitCurrentThread() {
    Thread* thread = GetCurrentThread();
    thread->Stop();
-    Core::System::GetInstance().Scheduler().RemoveThread(thread);
+    Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
 }

 /**
@ -154,6 +154,18 @@ void Thread::CancelWakeupTimer() {
    CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle);
 }

+static boost::optional<s32> GetNextProcessorId(u64 mask) {
+    for (s32 index = 0; index < Core::NUM_CPU_CORES; ++index) {
+        if (mask & (1ULL << index)) {
+            if (!Core::System().GetInstance().Scheduler(index)->GetCurrentThread()) {
+                // Core is enabled and not running any threads, use this one
+                return index;
+            }
+        }
+    }
+    return {};
+}
+
 void Thread::ResumeFromWait() {
    ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects");

@ -188,8 +200,37 @@ void Thread::ResumeFromWait() {
    wakeup_callback = nullptr;

    status = THREADSTATUS_READY;
-    Core::System::GetInstance().Scheduler().ScheduleThread(this, current_priority);
-    Core::System::GetInstance().PrepareReschedule();
+
+    boost::optional<s32> new_processor_id = GetNextProcessorId(affinity_mask);
+    if (!new_processor_id) {
+        new_processor_id = processor_id;
+    }
+    if (ideal_core != -1 &&
+        Core::System().GetInstance().Scheduler(ideal_core)->GetCurrentThread() == nullptr) {
+        new_processor_id = ideal_core;
+    }
+
+    ASSERT(*new_processor_id < 4);
+
+    // Add thread to new core's scheduler
+    auto& next_scheduler = Core::System().GetInstance().Scheduler(*new_processor_id);
+
+    if (*new_processor_id != processor_id) {
+        // Remove thread from previous core's scheduler
+        scheduler->RemoveThread(this);
+        next_scheduler->AddThread(this, current_priority);
+    }
+
+    processor_id = *new_processor_id;
+
+    // If the thread was ready, unschedule from the previous core and schedule on the new core
+    scheduler->UnscheduleThread(this, current_priority);
+    next_scheduler->ScheduleThread(this, current_priority);
+
+    // Change thread's scheduler
+    scheduler = next_scheduler;
+
+    Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
 }

 /**
@ -259,8 +300,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,

    SharedPtr<Thread> thread(new Thread);

-    Core::System::GetInstance().Scheduler().AddThread(thread, priority);
-
    thread->thread_id = NewThreadId();
    thread->status = THREADSTATUS_DORMANT;
    thread->entry_point = entry_point;
@ -268,6 +307,8 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
    thread->nominal_priority = thread->current_priority = priority;
    thread->last_running_ticks = CoreTiming::GetTicks();
    thread->processor_id = processor_id;
+    thread->ideal_core = processor_id;
+    thread->affinity_mask = 1ULL << processor_id;
    thread->wait_objects.clear();
    thread->mutex_wait_address = 0;
    thread->condvar_wait_address = 0;
@ -275,6 +316,8 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
    thread->name = std::move(name);
    thread->callback_handle = wakeup_callback_handle_table.Create(thread).Unwrap();
    thread->owner_process = owner_process;
+    thread->scheduler = Core::System().GetInstance().Scheduler(processor_id);
+    thread->scheduler->AddThread(thread, priority);

    // Find the next available TLS index, and mark it as used
    auto& tls_slots = owner_process->tls_slots;
@ -337,7 +380,7 @@ void Thread::SetPriority(u32 priority) {
 }

 void Thread::BoostPriority(u32 priority) {
-    Core::System::GetInstance().Scheduler().SetThreadPriority(this, priority);
+    scheduler->SetThreadPriority(this, priority);
    current_priority = priority;
 }

@ -406,7 +449,7 @@ void Thread::UpdatePriority() {
    if (new_priority == current_priority)
        return;

-    Core::System::GetInstance().Scheduler().SetThreadPriority(this, new_priority);
+    scheduler->SetThreadPriority(this, new_priority);

    current_priority = new_priority;

@ -415,13 +458,54 @@ void Thread::UpdatePriority() {
        lock_owner->UpdatePriority();
 }

+void Thread::ChangeCore(u32 core, u64 mask) {
+    ideal_core = core;
+    mask = mask;
+
+    if (status != THREADSTATUS_READY) {
+        return;
+    }
+
+    boost::optional<s32> new_processor_id{GetNextProcessorId(mask)};
+
+    if (!new_processor_id) {
+        new_processor_id = processor_id;
+    }
+    if (ideal_core != -1 &&
+        Core::System().GetInstance().Scheduler(ideal_core)->GetCurrentThread() == nullptr) {
+        new_processor_id = ideal_core;
+    }
+
+    ASSERT(new_processor_id < 4);
+
+    // Add thread to new core's scheduler
+    auto& next_scheduler = Core::System().GetInstance().Scheduler(*new_processor_id);
+
+    if (*new_processor_id != processor_id) {
+        // Remove thread from previous core's scheduler
+        scheduler->RemoveThread(this);
+        next_scheduler->AddThread(this, current_priority);
+    }
+
+    processor_id = *new_processor_id;
+
+    // If the thread was ready, unschedule from the previous core and schedule on the new core
+    scheduler->UnscheduleThread(this, current_priority);
+    next_scheduler->ScheduleThread(this, current_priority);
+
+    // Change thread's scheduler
+    scheduler = next_scheduler;
+
+    Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////

 /**
 * Gets the current thread
 */
 Thread* GetCurrentThread() {
-    return Core::System::GetInstance().Scheduler().GetCurrentThread();
+    return Core::System::GetInstance().CurrentScheduler().GetCurrentThread();
 }

 void ThreadingInit() {
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@ -4,6 +4,7 @@

 #pragma once

+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
@ -56,6 +57,7 @@ enum class ThreadWakeupReason {
 namespace Kernel {

 class Process;
+class Scheduler;

 class Thread final : public WaitObject {
 public:
@ -118,6 +120,9 @@ public:
    /// Recalculates the current priority taking into account priority inheritance.
    void UpdatePriority();

+    /// Changes the core that the thread is running or scheduled to run on.
+    void ChangeCore(u32 core, u64 mask);
+
    /**
     * Gets the thread's thread ID
     * @return The thread's ID
@ -240,6 +245,11 @@ public:
    // available. In case of a timeout, the object will be nullptr.
    std::function<WakeupCallback> wakeup_callback;

+    std::shared_ptr<Scheduler> scheduler;
+
+    u32 ideal_core{0xFFFFFFFF};
+    u64 affinity_mask{0x1};
+
 private:
    Thread();
    ~Thread() override;
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@ -104,7 +104,14 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
    VirtualMemoryArea& final_vma = vma_handle->second;
    ASSERT(final_vma.size == size);

-    Core::CPU().MapBackingMemory(target, size, block->data() + offset,
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset,
                                            VMAPermission::ReadWriteExecute);

    final_vma.type = VMAType::AllocatedMemoryBlock;
@ -126,7 +133,11 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
    VirtualMemoryArea& final_vma = vma_handle->second;
    ASSERT(final_vma.size == size);

-    Core::CPU().MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);

    final_vma.type = VMAType::BackingMemory;
    final_vma.permissions = VMAPermission::ReadWrite;
@ -184,7 +195,11 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {

    ASSERT(FindVMA(target)->second.size >= size);

-    Core::CPU().UnmapMemory(target, size);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).UnmapMemory(target, size);
+    system.ArmInterface(1).UnmapMemory(target, size);
+    system.ArmInterface(2).UnmapMemory(target, size);
+    system.ArmInterface(3).UnmapMemory(target, size);

    return RESULT_SUCCESS;
 }
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@ -28,8 +28,13 @@ static PageTable* current_page_table = nullptr;

 void SetCurrentPageTable(PageTable* page_table) {
    current_page_table = page_table;
-    if (Core::System::GetInstance().IsPoweredOn()) {
-        Core::CPU().PageTableChanged();
+
+    auto& system = Core::System::GetInstance();
+    if (system.IsPoweredOn()) {
+        system.ArmInterface(0).PageTableChanged();
+        system.ArmInterface(1).PageTableChanged();
+        system.ArmInterface(2).PageTableChanged();
+        system.ArmInterface(3).PageTableChanged();
    }
 }

--- a/src/core/settings.h
+++ b/src/core/settings.h
@ -121,6 +121,7 @@ struct Values {

    // Core
    bool use_cpu_jit;
+    bool use_multi_core;

    // Data Storage
    bool use_virtual_sd;
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@ -155,6 +155,8 @@ TelemetrySession::TelemetrySession() {

    // Log user configuration information
    AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.use_cpu_jit);
+    AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
+             Settings::values.use_multi_core);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
             Settings::values.resolution_factor);
    AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit",
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@ -78,6 +78,7 @@ void Config::ReadValues() {

    qt_config->beginGroup("Core");
    Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool();
+    Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool();
    qt_config->endGroup();

    qt_config->beginGroup("Renderer");
@ -177,6 +178,7 @@ void Config::SaveValues() {

    qt_config->beginGroup("Core");
    qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit);
+    qt_config->setValue("use_multi_core", Settings::values.use_multi_core);
    qt_config->endGroup();

    qt_config->beginGroup("Renderer");
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@ -20,6 +20,7 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
    this->setConfiguration();

    ui->use_cpu_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->use_multi_core->setEnabled(!Core::System::GetInstance().IsPoweredOn());
    ui->use_docked_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
 }

@ -30,6 +31,7 @@ void ConfigureGeneral::setConfiguration() {
    ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
    ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
    ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit);
+    ui->use_multi_core->setChecked(Settings::values.use_multi_core);
    ui->use_docked_mode->setChecked(Settings::values.use_docked_mode);
 }

@ -40,6 +42,7 @@ void ConfigureGeneral::applyConfiguration() {
        ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();

    Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked();
+    Settings::values.use_multi_core = ui->use_multi_core->isChecked();
    Settings::values.use_docked_mode = ui->use_docked_mode->isChecked();
    Settings::Apply();
 }
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@ -58,6 +58,13 @@
            </property>
           </widget>
          </item>
+          <item>
+           <widget class="QCheckBox" name="use_multi_core">
+            <property name="text">
+             <string>Enable multi-core</string>
+            </property>
+           </widget>
+          </item>
         </layout>
        </item>
       </layout>
--- a/src/yuzu/debugger/registers.cpp
+++ b/src/yuzu/debugger/registers.cpp
@ -63,7 +63,7 @@ void RegistersWidget::OnDebugModeEntered() {

    for (int i = 0; i < core_registers->childCount(); ++i)
        core_registers->child(i)->setText(
-            1, QString("0x%1").arg(Core::CPU().GetReg(i), 8, 16, QLatin1Char('0')));
+            1, QString("0x%1").arg(Core::CurrentArmInterface().GetReg(i), 8, 16, QLatin1Char('0')));

    UpdateCPSRValues();
 }
@ -122,7 +122,7 @@ void RegistersWidget::CreateCPSRChildren() {
 }

 void RegistersWidget::UpdateCPSRValues() {
-    const u32 cpsr_val = Core::CPU().GetCPSR();
+    const u32 cpsr_val = Core::CurrentArmInterface().GetCPSR();

    cpsr->setText(1, QString("0x%1").arg(cpsr_val, 8, 16, QLatin1Char('0')));
    cpsr->child(0)->setText(
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@ -51,13 +51,21 @@ std::size_t WaitTreeItem::Row() const {
 }

 std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList() {
-    const auto& threads = Core::System::GetInstance().Scheduler().GetThreadList();
    std::vector<std::unique_ptr<WaitTreeThread>> item_list;
-    item_list.reserve(threads.size());
+    std::size_t row = 0;
+    auto add_threads = [&](const std::vector<Kernel::SharedPtr<Kernel::Thread>>& threads) {
        for (std::size_t i = 0; i < threads.size(); ++i) {
            item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
-        item_list.back()->row = i;
+            item_list.back()->row = row;
+            ++row;
        }
+    };
+
+    add_threads(Core::System::GetInstance().Scheduler(0)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(1)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(2)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(3)->GetThreadList());
+
    return item_list;
 }

@ -240,6 +248,9 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
    }

    list.push_back(std::make_unique<WaitTreeText>(tr("processor = %1").arg(processor)));
+    list.push_back(std::make_unique<WaitTreeText>(tr("ideal core = %1").arg(thread.ideal_core)));
+    list.push_back(
+        std::make_unique<WaitTreeText>(tr("affinity mask = %1").arg(thread.affinity_mask)));
    list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadId())));
    list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)")
                                                      .arg(thread.current_priority)
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@ -91,6 +91,7 @@ void Config::ReadValues() {

    // Core
    Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true);
+    Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);

    // Renderer
    Settings::values.resolution_factor =
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@ -80,6 +80,10 @@ touch_device=
 # 0: Interpreter (slow), 1 (default): JIT (fast)
 use_cpu_jit =

+# Whether to use multi-core for CPU emulation
+# 0 (default): Disabled, 1: Enabled
+use_multi_core=
+
 [Renderer]
 # Whether to use software or hardware rendering.
 # 0: Software, 1 (default): Hardware