Improve core timing accuracy (#5257)
* Improve core timing accuracy * remove wrong global_ticks, use biggest ticks over all cores for GetGlobalTicks * merge max slice length change
This commit is contained in:
parent
d11d600b61
commit
57aa18f52e
|
@ -148,7 +148,11 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
|
||||||
for (auto& cpu_core : cpu_cores) {
|
for (auto& cpu_core : cpu_cores) {
|
||||||
if (cpu_core->GetTimer().GetTicks() < global_ticks) {
|
if (cpu_core->GetTimer().GetTicks() < global_ticks) {
|
||||||
s64 delay = global_ticks - cpu_core->GetTimer().GetTicks();
|
s64 delay = global_ticks - cpu_core->GetTimer().GetTicks();
|
||||||
cpu_core->GetTimer().Advance(delay);
|
kernel->SetRunningCPU(cpu_core.get());
|
||||||
|
cpu_core->GetTimer().Advance();
|
||||||
|
cpu_core->PrepareReschedule();
|
||||||
|
kernel->GetThreadManager(cpu_core->GetID()).Reschedule();
|
||||||
|
cpu_core->GetTimer().SetNextSlice(delay);
|
||||||
if (max_delay < delay) {
|
if (max_delay < delay) {
|
||||||
max_delay = delay;
|
max_delay = delay;
|
||||||
current_core_to_execute = cpu_core.get();
|
current_core_to_execute = cpu_core.get();
|
||||||
|
@ -156,7 +160,11 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_delay > 0) {
|
// jit sometimes overshoot by a few ticks which might lead to a minimal desync in the cores.
|
||||||
|
// This small difference shouldn't make it necessary to sync the cores and would only cost
|
||||||
|
// performance. Thus we don't sync delays below min_delay
|
||||||
|
static constexpr s64 min_delay = 100;
|
||||||
|
if (max_delay > min_delay) {
|
||||||
LOG_TRACE(Core_ARM11, "Core {} running (delayed) for {} ticks",
|
LOG_TRACE(Core_ARM11, "Core {} running (delayed) for {} ticks",
|
||||||
current_core_to_execute->GetID(),
|
current_core_to_execute->GetID(),
|
||||||
current_core_to_execute->GetTimer().GetDowncount());
|
current_core_to_execute->GetTimer().GetDowncount());
|
||||||
|
@ -181,12 +189,15 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
|
||||||
// TODO: Make special check for idle since we can easily revert the time of idle cores
|
// TODO: Make special check for idle since we can easily revert the time of idle cores
|
||||||
s64 max_slice = Timing::MAX_SLICE_LENGTH;
|
s64 max_slice = Timing::MAX_SLICE_LENGTH;
|
||||||
for (const auto& cpu_core : cpu_cores) {
|
for (const auto& cpu_core : cpu_cores) {
|
||||||
|
kernel->SetRunningCPU(cpu_core.get());
|
||||||
|
cpu_core->GetTimer().Advance();
|
||||||
|
cpu_core->PrepareReschedule();
|
||||||
|
kernel->GetThreadManager(cpu_core->GetID()).Reschedule();
|
||||||
max_slice = std::min(max_slice, cpu_core->GetTimer().GetMaxSliceLength());
|
max_slice = std::min(max_slice, cpu_core->GetTimer().GetMaxSliceLength());
|
||||||
}
|
}
|
||||||
for (auto& cpu_core : cpu_cores) {
|
for (auto& cpu_core : cpu_cores) {
|
||||||
cpu_core->GetTimer().Advance(max_slice);
|
cpu_core->GetTimer().SetNextSlice(max_slice);
|
||||||
}
|
auto start_ticks = cpu_core->GetTimer().GetTicks();
|
||||||
for (auto& cpu_core : cpu_cores) {
|
|
||||||
LOG_TRACE(Core_ARM11, "Core {} running for {} ticks", cpu_core->GetID(),
|
LOG_TRACE(Core_ARM11, "Core {} running for {} ticks", cpu_core->GetID(),
|
||||||
cpu_core->GetTimer().GetDowncount());
|
cpu_core->GetTimer().GetDowncount());
|
||||||
running_core = cpu_core.get();
|
running_core = cpu_core.get();
|
||||||
|
@ -204,8 +215,8 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
|
||||||
cpu_core->Step();
|
cpu_core->Step();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
max_slice = cpu_core->GetTimer().GetTicks() - start_ticks;
|
||||||
}
|
}
|
||||||
timing->AddToGlobalTicks(max_slice);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GDBStub::IsServerEnabled()) {
|
if (GDBStub::IsServerEnabled()) {
|
||||||
|
|
|
@ -111,11 +111,15 @@ s64 Timing::GetTicks() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
s64 Timing::GetGlobalTicks() const {
|
s64 Timing::GetGlobalTicks() const {
|
||||||
return global_timer;
|
const auto& timer =
|
||||||
|
std::max_element(timers.cbegin(), timers.cend(), [](const auto& a, const auto& b) {
|
||||||
|
return a->GetTicks() < b->GetTicks();
|
||||||
|
});
|
||||||
|
return (*timer)->GetTicks();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::microseconds Timing::GetGlobalTimeUs() const {
|
std::chrono::microseconds Timing::GetGlobalTimeUs() const {
|
||||||
return std::chrono::microseconds{GetTicks() * 1000000 / BASE_CLOCK_RATE_ARM11};
|
return std::chrono::microseconds{GetGlobalTicks() * 1000000 / BASE_CLOCK_RATE_ARM11};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Timing::Timer> Timing::GetTimer(std::size_t cpu_id) {
|
std::shared_ptr<Timing::Timer> Timing::GetTimer(std::size_t cpu_id) {
|
||||||
|
@ -161,21 +165,22 @@ void Timing::Timer::MoveEvents() {
|
||||||
}
|
}
|
||||||
|
|
||||||
s64 Timing::Timer::GetMaxSliceLength() const {
|
s64 Timing::Timer::GetMaxSliceLength() const {
|
||||||
auto next_event = std::find_if(event_queue.begin(), event_queue.end(),
|
const auto& next_event = event_queue.begin();
|
||||||
[&](const Event& e) { return e.time - executed_ticks > 0; });
|
|
||||||
if (next_event != event_queue.end()) {
|
if (next_event != event_queue.end()) {
|
||||||
|
ASSERT(next_event->time - executed_ticks > 0);
|
||||||
return next_event->time - executed_ticks;
|
return next_event->time - executed_ticks;
|
||||||
}
|
}
|
||||||
return MAX_SLICE_LENGTH;
|
return MAX_SLICE_LENGTH;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Timing::Timer::Advance(s64 max_slice_length) {
|
void Timing::Timer::Advance() {
|
||||||
MoveEvents();
|
MoveEvents();
|
||||||
|
|
||||||
s64 cycles_executed = slice_length - downcount;
|
s64 cycles_executed = slice_length - downcount;
|
||||||
idled_cycles = 0;
|
idled_cycles = 0;
|
||||||
executed_ticks += cycles_executed;
|
executed_ticks += cycles_executed;
|
||||||
slice_length = max_slice_length;
|
slice_length = 0;
|
||||||
|
downcount = 0;
|
||||||
|
|
||||||
is_timer_sane = true;
|
is_timer_sane = true;
|
||||||
|
|
||||||
|
@ -191,6 +196,10 @@ void Timing::Timer::Advance(s64 max_slice_length) {
|
||||||
}
|
}
|
||||||
|
|
||||||
is_timer_sane = false;
|
is_timer_sane = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Timing::Timer::SetNextSlice(s64 max_slice_length) {
|
||||||
|
slice_length = max_slice_length;
|
||||||
|
|
||||||
// Still events left (scheduled in the future)
|
// Still events left (scheduled in the future)
|
||||||
if (!event_queue.empty()) {
|
if (!event_queue.empty()) {
|
||||||
|
|
|
@ -171,7 +171,13 @@ public:
|
||||||
BOOST_SERIALIZATION_SPLIT_MEMBER()
|
BOOST_SERIALIZATION_SPLIT_MEMBER()
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr int MAX_SLICE_LENGTH = 20000;
|
// currently Service::HID::pad_update_ticks is the smallest interval for an event that gets
|
||||||
|
// always scheduled. Therfore we use this as orientation for the MAX_SLICE_LENGTH
|
||||||
|
// For performance bigger slice length are desired, though this will lead to cores desync
|
||||||
|
// But we never want to schedule events into the current slice, because then cores might to
|
||||||
|
// run small slices to sync up again. This is especially important for events that are always
|
||||||
|
// scheduled and repated.
|
||||||
|
static constexpr int MAX_SLICE_LENGTH = BASE_CLOCK_RATE_ARM11 / 234;
|
||||||
|
|
||||||
class Timer {
|
class Timer {
|
||||||
public:
|
public:
|
||||||
|
@ -180,7 +186,9 @@ public:
|
||||||
|
|
||||||
s64 GetMaxSliceLength() const;
|
s64 GetMaxSliceLength() const;
|
||||||
|
|
||||||
void Advance(s64 max_slice_length = MAX_SLICE_LENGTH);
|
void Advance();
|
||||||
|
|
||||||
|
void SetNextSlice(s64 max_slice_length = MAX_SLICE_LENGTH);
|
||||||
|
|
||||||
void Idle();
|
void Idle();
|
||||||
|
|
||||||
|
@ -227,6 +235,9 @@ public:
|
||||||
void serialize(Archive& ar, const unsigned int) {
|
void serialize(Archive& ar, const unsigned int) {
|
||||||
MoveEvents();
|
MoveEvents();
|
||||||
// NOTE: ts_queue should be empty now
|
// NOTE: ts_queue should be empty now
|
||||||
|
// TODO(SaveState): Remove the next two lines when we break compatibility
|
||||||
|
s64 x;
|
||||||
|
ar& x; // to keep compatibility with old save states that stored global_timer
|
||||||
ar& event_queue;
|
ar& event_queue;
|
||||||
ar& event_fifo_id;
|
ar& event_fifo_id;
|
||||||
ar& slice_length;
|
ar& slice_length;
|
||||||
|
@ -260,10 +271,6 @@ public:
|
||||||
|
|
||||||
s64 GetGlobalTicks() const;
|
s64 GetGlobalTicks() const;
|
||||||
|
|
||||||
void AddToGlobalTicks(s64 ticks) {
|
|
||||||
global_timer += ticks;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates the value of the cpu clock scaling to the new percentage.
|
* Updates the value of the cpu clock scaling to the new percentage.
|
||||||
*/
|
*/
|
||||||
|
@ -274,8 +281,6 @@ public:
|
||||||
std::shared_ptr<Timer> GetTimer(std::size_t cpu_id);
|
std::shared_ptr<Timer> GetTimer(std::size_t cpu_id);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
s64 global_timer = 0;
|
|
||||||
|
|
||||||
// unordered_map stores each element separately as a linked list node so pointers to
|
// unordered_map stores each element separately as a linked list node so pointers to
|
||||||
// elements remain stable regardless of rehashes/resizing.
|
// elements remain stable regardless of rehashes/resizing.
|
||||||
std::unordered_map<std::string, TimingEventType> event_types = {};
|
std::unordered_map<std::string, TimingEventType> event_types = {};
|
||||||
|
@ -290,7 +295,6 @@ private:
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar, const unsigned int file_version) {
|
void serialize(Archive& ar, const unsigned int file_version) {
|
||||||
// event_types set during initialization of other things
|
// event_types set during initialization of other things
|
||||||
ar& global_timer;
|
|
||||||
ar& timers;
|
ar& timers;
|
||||||
if (file_version == 0) {
|
if (file_version == 0) {
|
||||||
std::shared_ptr<Timer> x;
|
std::shared_ptr<Timer> x;
|
||||||
|
|
|
@ -111,7 +111,7 @@ void ThreadManager::SwitchContext(Thread* new_thread) {
|
||||||
// Save context for previous thread
|
// Save context for previous thread
|
||||||
if (previous_thread) {
|
if (previous_thread) {
|
||||||
previous_process = previous_thread->owner_process;
|
previous_process = previous_thread->owner_process;
|
||||||
previous_thread->last_running_ticks = timing.GetGlobalTicks();
|
previous_thread->last_running_ticks = cpu->GetTimer().GetTicks();
|
||||||
cpu->SaveContext(previous_thread->context);
|
cpu->SaveContext(previous_thread->context);
|
||||||
|
|
||||||
if (previous_thread->status == ThreadStatus::Running) {
|
if (previous_thread->status == ThreadStatus::Running) {
|
||||||
|
@ -344,7 +344,7 @@ ResultVal<std::shared_ptr<Thread>> KernelSystem::CreateThread(
|
||||||
thread->entry_point = entry_point;
|
thread->entry_point = entry_point;
|
||||||
thread->stack_top = stack_top;
|
thread->stack_top = stack_top;
|
||||||
thread->nominal_priority = thread->current_priority = priority;
|
thread->nominal_priority = thread->current_priority = priority;
|
||||||
thread->last_running_ticks = timing.GetGlobalTicks();
|
thread->last_running_ticks = timing.GetTimer(processor_id)->GetTicks();
|
||||||
thread->processor_id = processor_id;
|
thread->processor_id = processor_id;
|
||||||
thread->wait_objects.clear();
|
thread->wait_objects.clear();
|
||||||
thread->wait_address = 0;
|
thread->wait_address = 0;
|
||||||
|
|
|
@ -13,7 +13,8 @@
|
||||||
|
|
||||||
// Numbers are chosen randomly to make sure the correct one is given.
|
// Numbers are chosen randomly to make sure the correct one is given.
|
||||||
static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
|
static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
|
||||||
static constexpr int MAX_SLICE_LENGTH = 20000; // Copied from CoreTiming internals
|
static constexpr int MAX_SLICE_LENGTH =
|
||||||
|
BASE_CLOCK_RATE_ARM11 / 234; // Copied from CoreTiming internals
|
||||||
|
|
||||||
static std::bitset<CB_IDS.size()> callbacks_ran_flags;
|
static std::bitset<CB_IDS.size()> callbacks_ran_flags;
|
||||||
static u64 expected_callback = 0;
|
static u64 expected_callback = 0;
|
||||||
|
@ -36,7 +37,9 @@ static void AdvanceAndCheck(Core::Timing& timing, u32 idx, int downcount, int ex
|
||||||
|
|
||||||
timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount() -
|
timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount() -
|
||||||
cpu_downcount); // Pretend we executed X cycles of instructions.
|
cpu_downcount); // Pretend we executed X cycles of instructions.
|
||||||
|
|
||||||
timing.GetTimer(0)->Advance();
|
timing.GetTimer(0)->Advance();
|
||||||
|
timing.GetTimer(0)->SetNextSlice();
|
||||||
|
|
||||||
REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
|
REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
|
||||||
REQUIRE(downcount == timing.GetTimer(0)->GetDowncount());
|
REQUIRE(downcount == timing.GetTimer(0)->GetDowncount());
|
||||||
|
@ -53,6 +56,7 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
|
||||||
|
|
||||||
// Enter slice 0
|
// Enter slice 0
|
||||||
timing.GetTimer(0)->Advance();
|
timing.GetTimer(0)->Advance();
|
||||||
|
timing.GetTimer(0)->SetNextSlice();
|
||||||
|
|
||||||
// D -> B -> C -> A -> E
|
// D -> B -> C -> A -> E
|
||||||
timing.ScheduleEvent(1000, cb_a, CB_IDS[0], 0);
|
timing.ScheduleEvent(1000, cb_a, CB_IDS[0], 0);
|
||||||
|
@ -106,6 +110,7 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
|
||||||
|
|
||||||
// Enter slice 0
|
// Enter slice 0
|
||||||
timing.GetTimer(0)->Advance();
|
timing.GetTimer(0)->Advance();
|
||||||
|
timing.GetTimer(0)->SetNextSlice();
|
||||||
REQUIRE(1000 == timing.GetTimer(0)->GetDowncount());
|
REQUIRE(1000 == timing.GetTimer(0)->GetDowncount());
|
||||||
|
|
||||||
callbacks_ran_flags = 0;
|
callbacks_ran_flags = 0;
|
||||||
|
@ -113,6 +118,7 @@ TEST_CASE("CoreTiming[SharedSlot]", "[core]") {
|
||||||
lateness = 0;
|
lateness = 0;
|
||||||
timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount());
|
timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount());
|
||||||
timing.GetTimer(0)->Advance();
|
timing.GetTimer(0)->Advance();
|
||||||
|
timing.GetTimer(0)->SetNextSlice();
|
||||||
REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount());
|
REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount());
|
||||||
REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
|
REQUIRE(0x1FULL == callbacks_ran_flags.to_ullong());
|
||||||
}
|
}
|
||||||
|
@ -125,6 +131,7 @@ TEST_CASE("CoreTiming[PredictableLateness]", "[core]") {
|
||||||
|
|
||||||
// Enter slice 0
|
// Enter slice 0
|
||||||
timing.GetTimer(0)->Advance();
|
timing.GetTimer(0)->Advance();
|
||||||
|
timing.GetTimer(0)->SetNextSlice();
|
||||||
|
|
||||||
timing.ScheduleEvent(100, cb_a, CB_IDS[0], 0);
|
timing.ScheduleEvent(100, cb_a, CB_IDS[0], 0);
|
||||||
timing.ScheduleEvent(200, cb_b, CB_IDS[1], 0);
|
timing.ScheduleEvent(200, cb_b, CB_IDS[1], 0);
|
||||||
|
@ -161,6 +168,7 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
|
||||||
|
|
||||||
// Enter slice 0
|
// Enter slice 0
|
||||||
timing.GetTimer(0)->Advance();
|
timing.GetTimer(0)->Advance();
|
||||||
|
timing.GetTimer(0)->SetNextSlice();
|
||||||
|
|
||||||
timing.ScheduleEvent(800, cb_a, CB_IDS[0], 0);
|
timing.ScheduleEvent(800, cb_a, CB_IDS[0], 0);
|
||||||
timing.ScheduleEvent(1000, cb_b, CB_IDS[1], 0);
|
timing.ScheduleEvent(1000, cb_b, CB_IDS[1], 0);
|
||||||
|
@ -174,14 +182,16 @@ TEST_CASE("CoreTiming[ChainScheduling]", "[core]") {
|
||||||
REQUIRE(2 == reschedules);
|
REQUIRE(2 == reschedules);
|
||||||
|
|
||||||
timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount());
|
timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount());
|
||||||
timing.GetTimer(0)->Advance(); // cb_rs
|
timing.GetTimer(0)->Advance();
|
||||||
|
timing.GetTimer(0)->SetNextSlice(); // cb_rs
|
||||||
REQUIRE(1 == reschedules);
|
REQUIRE(1 == reschedules);
|
||||||
REQUIRE(200 == timing.GetTimer(0)->GetDowncount());
|
REQUIRE(200 == timing.GetTimer(0)->GetDowncount());
|
||||||
|
|
||||||
AdvanceAndCheck(timing, 2, 800); // cb_c
|
AdvanceAndCheck(timing, 2, 800); // cb_c
|
||||||
|
|
||||||
timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount());
|
timing.GetTimer(0)->AddTicks(timing.GetTimer(0)->GetDowncount());
|
||||||
timing.GetTimer(0)->Advance(); // cb_rs
|
timing.GetTimer(0)->Advance();
|
||||||
|
timing.GetTimer(0)->SetNextSlice(); // cb_rs
|
||||||
REQUIRE(0 == reschedules);
|
REQUIRE(0 == reschedules);
|
||||||
REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount());
|
REQUIRE(MAX_SLICE_LENGTH == timing.GetTimer(0)->GetDowncount());
|
||||||
}
|
}
|
||||||
|
|
Reference in New Issue