x64: Deduplicate RDTSC usage
This commit is contained in:
parent
9c6fc44a59
commit
dd12dd4c67
|
@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64)
|
||||||
x64/cpu_wait.h
|
x64/cpu_wait.h
|
||||||
x64/native_clock.cpp
|
x64/native_clock.cpp
|
||||||
x64/native_clock.h
|
x64/native_clock.h
|
||||||
|
x64/rdtsc.cpp
|
||||||
|
x64/rdtsc.h
|
||||||
x64/xbyak_abi.h
|
x64/xbyak_abi.h
|
||||||
x64/xbyak_util.h
|
x64/xbyak_util.h
|
||||||
)
|
)
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/x64/cpu_detect.h"
|
#include "common/x64/cpu_detect.h"
|
||||||
|
#include "common/x64/rdtsc.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
@ -187,6 +188,8 @@ static CPUCaps Detect() {
|
||||||
caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) *
|
caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) *
|
||||||
caps.tsc_crystal_ratio_numerator /
|
caps.tsc_crystal_ratio_numerator /
|
||||||
caps.tsc_crystal_ratio_denominator;
|
caps.tsc_crystal_ratio_denominator;
|
||||||
|
} else {
|
||||||
|
caps.tsc_frequency = X64::EstimateRDTSCFrequency();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,19 +9,11 @@
|
||||||
|
|
||||||
#include "common/x64/cpu_detect.h"
|
#include "common/x64/cpu_detect.h"
|
||||||
#include "common/x64/cpu_wait.h"
|
#include "common/x64/cpu_wait.h"
|
||||||
|
#include "common/x64/rdtsc.h"
|
||||||
|
|
||||||
namespace Common::X64 {
|
namespace Common::X64 {
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
__forceinline static u64 FencedRDTSC() {
|
|
||||||
_mm_lfence();
|
|
||||||
_ReadWriteBarrier();
|
|
||||||
const u64 result = __rdtsc();
|
|
||||||
_mm_lfence();
|
|
||||||
_ReadWriteBarrier();
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline static void TPAUSE() {
|
__forceinline static void TPAUSE() {
|
||||||
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
||||||
// For reference:
|
// For reference:
|
||||||
|
@ -32,16 +24,6 @@ __forceinline static void TPAUSE() {
|
||||||
_tpause(0, FencedRDTSC() + PauseCycles);
|
_tpause(0, FencedRDTSC() + PauseCycles);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static u64 FencedRDTSC() {
|
|
||||||
u64 eax;
|
|
||||||
u64 edx;
|
|
||||||
asm volatile("lfence\n\t"
|
|
||||||
"rdtsc\n\t"
|
|
||||||
"lfence\n\t"
|
|
||||||
: "=a"(eax), "=d"(edx));
|
|
||||||
return (edx << 32) | eax;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TPAUSE() {
|
static void TPAUSE() {
|
||||||
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
||||||
// For reference:
|
// For reference:
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#include "common/steady_clock.h"
|
||||||
|
#include "common/uint128.h"
|
||||||
|
#include "common/x64/rdtsc.h"
|
||||||
|
|
||||||
|
namespace Common::X64 {
|
||||||
|
|
||||||
|
template <u64 Nearest>
|
||||||
|
static u64 RoundToNearest(u64 value) {
|
||||||
|
const auto mod = value % Nearest;
|
||||||
|
return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 EstimateRDTSCFrequency() {
|
||||||
|
// Discard the first result measuring the rdtsc.
|
||||||
|
FencedRDTSC();
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds{1});
|
||||||
|
FencedRDTSC();
|
||||||
|
|
||||||
|
// Get the current time.
|
||||||
|
const auto start_time = RealTimeClock::Now();
|
||||||
|
const u64 tsc_start = FencedRDTSC();
|
||||||
|
// Wait for 100 milliseconds.
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds{100});
|
||||||
|
const auto end_time = RealTimeClock::Now();
|
||||||
|
const u64 tsc_end = FencedRDTSC();
|
||||||
|
// Calculate differences.
|
||||||
|
const u64 timer_diff = static_cast<u64>(
|
||||||
|
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
|
||||||
|
const u64 tsc_diff = tsc_end - tsc_start;
|
||||||
|
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
|
||||||
|
return RoundToNearest<100'000>(tsc_freq);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Common::X64
|
|
@ -0,0 +1,37 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Common::X64 {
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
__forceinline static u64 FencedRDTSC() {
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
const u64 result = __rdtsc();
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline u64 FencedRDTSC() {
|
||||||
|
u64 eax;
|
||||||
|
u64 edx;
|
||||||
|
asm volatile("lfence\n\t"
|
||||||
|
"rdtsc\n\t"
|
||||||
|
"lfence\n\t"
|
||||||
|
: "=a"(eax), "=d"(edx));
|
||||||
|
return (edx << 32) | eax;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
u64 EstimateRDTSCFrequency();
|
||||||
|
|
||||||
|
} // namespace Common::X64
|
Reference in New Issue