kernel: Implement host thread register methods without locking
Locks on GetCurrentHostThreadID were causing performance issues according to Visual Studio's profiler. It was consuming twice the time as arm_interface.Run(). The cost was not in the function itself but in the lockinig it required. Reimplement these functions using atomics and static storage instead of an unordered_map. This is a side effect to avoid locking and using linked lists for reads. Replace unordered_map with a linear search.
This commit is contained in:
parent
d291fc1a51
commit
b9a9b83bee
|
@ -7,7 +7,6 @@
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
@ -107,7 +106,11 @@ struct KernelCore::Impl {
|
||||||
cores.clear();
|
cores.clear();
|
||||||
|
|
||||||
exclusive_monitor.reset();
|
exclusive_monitor.reset();
|
||||||
host_thread_ids.clear();
|
|
||||||
|
num_host_threads = 0;
|
||||||
|
std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(),
|
||||||
|
std::thread::id{});
|
||||||
|
std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void InitializePhysicalCores() {
|
void InitializePhysicalCores() {
|
||||||
|
@ -177,54 +180,56 @@ struct KernelCore::Impl {
|
||||||
|
|
||||||
void MakeCurrentProcess(Process* process) {
|
void MakeCurrentProcess(Process* process) {
|
||||||
current_process = process;
|
current_process = process;
|
||||||
|
|
||||||
if (process == nullptr) {
|
if (process == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const u32 core_id = GetCurrentHostThreadID();
|
||||||
u32 core_id = GetCurrentHostThreadID();
|
|
||||||
if (core_id < Core::Hardware::NUM_CPU_CORES) {
|
if (core_id < Core::Hardware::NUM_CPU_CORES) {
|
||||||
system.Memory().SetCurrentPageTable(*process, core_id);
|
system.Memory().SetCurrentPageTable(*process, core_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegisterCoreThread(std::size_t core_id) {
|
void RegisterCoreThread(std::size_t core_id) {
|
||||||
std::unique_lock lock{register_thread_mutex};
|
|
||||||
if (!is_multicore) {
|
|
||||||
single_core_thread_id = std::this_thread::get_id();
|
|
||||||
}
|
|
||||||
const std::thread::id this_id = std::this_thread::get_id();
|
const std::thread::id this_id = std::this_thread::get_id();
|
||||||
const auto it = host_thread_ids.find(this_id);
|
if (!is_multicore) {
|
||||||
|
single_core_thread_id = this_id;
|
||||||
|
}
|
||||||
|
const auto end = register_host_thread_keys.begin() + num_host_threads;
|
||||||
|
const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
|
||||||
ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
|
ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
|
||||||
ASSERT(it == host_thread_ids.end());
|
ASSERT(it == end);
|
||||||
ASSERT(!registered_core_threads[core_id]);
|
ASSERT(!registered_core_threads[core_id]);
|
||||||
host_thread_ids[this_id] = static_cast<u32>(core_id);
|
InsertHostThread(static_cast<u32>(core_id));
|
||||||
registered_core_threads.set(core_id);
|
registered_core_threads.set(core_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegisterHostThread() {
|
void RegisterHostThread() {
|
||||||
std::unique_lock lock{register_thread_mutex};
|
|
||||||
const std::thread::id this_id = std::this_thread::get_id();
|
const std::thread::id this_id = std::this_thread::get_id();
|
||||||
const auto it = host_thread_ids.find(this_id);
|
const auto end = register_host_thread_keys.begin() + num_host_threads;
|
||||||
if (it != host_thread_ids.end()) {
|
const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
|
||||||
return;
|
if (it == end) {
|
||||||
|
InsertHostThread(registered_thread_ids++);
|
||||||
}
|
}
|
||||||
host_thread_ids[this_id] = registered_thread_ids++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 GetCurrentHostThreadID() const {
|
void InsertHostThread(u32 value) {
|
||||||
|
const size_t index = num_host_threads++;
|
||||||
|
ASSERT_MSG(index < NUM_REGISTRABLE_HOST_THREADS, "Too many host threads");
|
||||||
|
register_host_thread_values[index] = value;
|
||||||
|
register_host_thread_keys[index] = std::this_thread::get_id();
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] u32 GetCurrentHostThreadID() const {
|
||||||
const std::thread::id this_id = std::this_thread::get_id();
|
const std::thread::id this_id = std::this_thread::get_id();
|
||||||
if (!is_multicore) {
|
if (!is_multicore && single_core_thread_id == this_id) {
|
||||||
if (single_core_thread_id == this_id) {
|
return static_cast<u32>(system.GetCpuManager().CurrentCore());
|
||||||
return static_cast<u32>(system.GetCpuManager().CurrentCore());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
std::unique_lock lock{register_thread_mutex};
|
const auto end = register_host_thread_keys.begin() + num_host_threads;
|
||||||
const auto it = host_thread_ids.find(this_id);
|
const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
|
||||||
if (it == host_thread_ids.end()) {
|
if (it == end) {
|
||||||
return Core::INVALID_HOST_THREAD_ID;
|
return Core::INVALID_HOST_THREAD_ID;
|
||||||
}
|
}
|
||||||
return it->second;
|
return register_host_thread_values[std::distance(register_host_thread_keys.begin(), it)];
|
||||||
}
|
}
|
||||||
|
|
||||||
Core::EmuThreadHandle GetCurrentEmuThreadID() const {
|
Core::EmuThreadHandle GetCurrentEmuThreadID() const {
|
||||||
|
@ -322,10 +327,15 @@ struct KernelCore::Impl {
|
||||||
std::vector<Kernel::PhysicalCore> cores;
|
std::vector<Kernel::PhysicalCore> cores;
|
||||||
|
|
||||||
// 0-3 IDs represent core threads, >3 represent others
|
// 0-3 IDs represent core threads, >3 represent others
|
||||||
std::unordered_map<std::thread::id, u32> host_thread_ids;
|
std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
|
||||||
u32 registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
|
|
||||||
std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads;
|
std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads;
|
||||||
mutable std::mutex register_thread_mutex;
|
|
||||||
|
// Number of host threads is a relatively high number to avoid overflowing
|
||||||
|
static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64;
|
||||||
|
std::atomic<size_t> num_host_threads{0};
|
||||||
|
std::array<std::atomic<std::thread::id>, NUM_REGISTRABLE_HOST_THREADS>
|
||||||
|
register_host_thread_keys{};
|
||||||
|
std::array<std::atomic<u32>, NUM_REGISTRABLE_HOST_THREADS> register_host_thread_values{};
|
||||||
|
|
||||||
// Kernel memory management
|
// Kernel memory management
|
||||||
std::unique_ptr<Memory::MemoryManager> memory_manager;
|
std::unique_ptr<Memory::MemoryManager> memory_manager;
|
||||||
|
|
Reference in New Issue