yuzu-emu
/
yuzu
Archived
1
0
Fork 0

Common: Remove section measurement from profiler (#1731)

This has been entirely superseded by MicroProfile. The rest of the code
can go when a simpler frametime/FPS meter is added to the GUI.
This commit is contained in:
Yuri Kunde Schlesner 2016-04-29 00:07:10 -07:00
parent 90501f20e6
commit e3a8292495
13 changed files with 8 additions and 306 deletions

View File

@ -9,6 +9,7 @@
#include "citra_qt/debugger/profiler.h"
#include "citra_qt/util/util.h"
#include "common/common_types.h"
#include "common/microprofile.h"
#include "common/profiler_reporting.h"
@ -36,21 +37,9 @@ static QVariant GetDataForColumn(int col, const AggregatedDuration& duration)
}
}
static const TimingCategoryInfo* GetCategoryInfo(int id)
{
const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
if ((size_t)id >= categories.size()) {
return nullptr;
} else {
return &categories[id];
}
}
ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent)
{
updateProfilingInfo();
const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
results.time_per_category.resize(categories.size());
}
QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const
@ -87,7 +76,7 @@ int ProfilerModel::rowCount(const QModelIndex& parent) const
if (parent.isValid()) {
return 0;
} else {
return static_cast<int>(results.time_per_category.size() + 2);
return 2;
}
}
@ -106,17 +95,6 @@ QVariant ProfilerModel::data(const QModelIndex& index, int role) const
} else {
return GetDataForColumn(index.column(), results.interframe_time);
}
} else {
if (index.column() == 0) {
const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2);
return info != nullptr ? QString(info->name) : QVariant();
} else {
if (index.row() - 2 < (int)results.time_per_category.size()) {
return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]);
} else {
return QVariant();
}
}
}
}

View File

@ -47,7 +47,6 @@ set(HEADERS
microprofile.h
microprofileui.h
platform.h
profiler.h
profiler_reporting.h
scm_rev.h
scope_exit.h

View File

@ -13,4 +13,7 @@
#define MICROPROFILE_HELP_ALT "Right-Click"
#define MICROPROFILE_HELP_MOD "Ctrl"
// This isn't included by microprofileui.h :(
#include <cstdlib> // For std::abs
#include <microprofileui.h>

View File

@ -7,71 +7,16 @@
#include <vector>
#include "common/assert.h"
#include "common/profiler.h"
#include "common/profiler_reporting.h"
#include "common/synchronized_wrapper.h"
#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
#define WIN32_LEAN_AND_MEAN
#include <Windows.h> // For QueryPerformanceCounter/Frequency
#endif
namespace Common {
namespace Profiling {
#if ENABLE_PROFILING
thread_local Timer* Timer::current_timer = nullptr;
#endif
#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
QPCClock::time_point QPCClock::now() {
static LARGE_INTEGER freq;
// Use this dummy local static to ensure this gets initialized once.
static BOOL dummy = QueryPerformanceFrequency(&freq);
LARGE_INTEGER ticks;
QueryPerformanceCounter(&ticks);
// This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
// correct way to approach this would be to just return ticks as a time_point and then subtract
// and do this conversion when creating a duration from two time_points, however, as far as I
// could tell the C++ requirements for these types are incompatible with this approach.
return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
}
#endif
TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
: accumulated_duration(0) {
ProfilingManager& manager = GetProfilingManager();
category_id = manager.RegisterTimingCategory(this, name);
if (parent != nullptr)
manager.SetTimingCategoryParent(category_id, parent->category_id);
}
ProfilingManager::ProfilingManager()
: last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
}
unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
TimingCategoryInfo info;
info.category = category;
info.name = name;
info.parent = TimingCategoryInfo::NO_PARENT;
unsigned int id = (unsigned int)timing_categories.size();
timing_categories.push_back(std::move(info));
return id;
}
void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
ASSERT(category < timing_categories.size());
ASSERT(parent < timing_categories.size());
timing_categories[category].parent = parent;
}
void ProfilingManager::BeginFrame() {
this_frame_start = Clock::now();
}
@ -82,11 +27,6 @@ void ProfilingManager::FinishFrame() {
results.interframe_time = now - last_frame_end;
results.frame_time = now - this_frame_start;
results.time_per_category.resize(timing_categories.size());
for (size_t i = 0; i < timing_categories.size(); ++i) {
results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
}
last_frame_end = now;
}
@ -100,26 +40,9 @@ void TimingResultsAggregator::Clear() {
window_size = cursor = 0;
}
void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
size_t old_size = times_per_category.size();
if (n == old_size)
return;
times_per_category.resize(n);
for (size_t i = old_size; i < n; ++i) {
times_per_category[i].resize(max_window_size, Duration::zero());
}
}
void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
SetNumberOfCategories(frame_result.time_per_category.size());
interframe_times[cursor] = frame_result.interframe_time;
frame_times[cursor] = frame_result.frame_time;
for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
times_per_category[i][cursor] = frame_result.time_per_category[i];
}
++cursor;
if (cursor == max_window_size)
@ -162,11 +85,6 @@ AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
result.fps = 0.0f;
}
result.time_per_category.resize(times_per_category.size());
for (size_t i = 0; i < times_per_category.size(); ++i) {
result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
}
return result;
}

View File

@ -1,152 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include <chrono>
#include "common/assert.h"
#include "common/thread.h"
namespace Common {
namespace Profiling {
// If this is defined to 0, it turns all Timers into no-ops.
#ifndef ENABLE_PROFILING
#define ENABLE_PROFILING 1
#endif
#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
// precision. We manually implement a clock based on QPC to get good results.
struct QPCClock {
using duration = std::chrono::microseconds;
using time_point = std::chrono::time_point<QPCClock>;
using rep = duration::rep;
using period = duration::period;
static const bool is_steady = false;
static time_point now();
};
using Clock = QPCClock;
#else
using Clock = std::chrono::high_resolution_clock;
#endif
using Duration = Clock::duration;
/**
* Represents a timing category that measured time can be accounted towards. Should be declared as a
* global variable and passed to Timers.
*/
class TimingCategory final {
public:
TimingCategory(const char* name, TimingCategory* parent = nullptr);
unsigned int GetCategoryId() const {
return category_id;
}
/// Adds some time to this category. Can safely be called from multiple threads at the same time.
void AddTime(Duration amount) {
std::atomic_fetch_add_explicit(
&accumulated_duration, amount.count(),
std::memory_order_relaxed);
}
/**
* Atomically retrieves the accumulated measured time for this category and resets the counter
* to zero. Can be safely called concurrently with AddTime.
*/
Duration GetAccumulatedTime() {
return Duration(std::atomic_exchange_explicit(
&accumulated_duration, (Duration::rep)0,
std::memory_order_relaxed));
}
private:
unsigned int category_id;
std::atomic<Duration::rep> accumulated_duration;
};
/**
* Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
* TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
* appropriately paired.
*
* When a Timer is started, it automatically pauses a previously running timer on the same thread,
* which is resumed when it is stopped. As such, no special action needs to be taken to avoid
* double-accounting of time on two categories.
*/
class Timer {
public:
Timer(TimingCategory& category) : category(category) {
}
void Start() {
#if ENABLE_PROFILING
ASSERT(!running);
previous_timer = current_timer;
current_timer = this;
if (previous_timer != nullptr)
previous_timer->StopTiming();
StartTiming();
#endif
}
void Stop() {
#if ENABLE_PROFILING
ASSERT(running);
StopTiming();
if (previous_timer != nullptr)
previous_timer->StartTiming();
current_timer = previous_timer;
#endif
}
private:
#if ENABLE_PROFILING
void StartTiming() {
start = Clock::now();
running = true;
}
void StopTiming() {
auto duration = Clock::now() - start;
running = false;
category.AddTime(std::chrono::duration_cast<Duration>(duration));
}
Clock::time_point start;
bool running = false;
Timer* previous_timer;
static thread_local Timer* current_timer;
#endif
TimingCategory& category;
};
/**
* A Timer that automatically starts timing when created and stops at the end of the scope. Should
* be used in the majority of cases.
*/
class ScopeTimer : public Timer {
public:
ScopeTimer(TimingCategory& category) : Timer(category) {
Start();
}
~ScopeTimer() {
Stop();
}
};
} // namespace Profiling
} // namespace Common

View File

@ -4,22 +4,17 @@
#pragma once
#include <chrono>
#include <cstddef>
#include <vector>
#include "common/profiler.h"
#include "common/synchronized_wrapper.h"
namespace Common {
namespace Profiling {
struct TimingCategoryInfo {
static const unsigned int NO_PARENT = -1;
TimingCategory* category;
const char* name;
unsigned int parent;
};
using Clock = std::chrono::high_resolution_clock;
using Duration = Clock::duration;
struct ProfilingFrameResult {
/// Time since the last delivered frame
@ -27,22 +22,12 @@ struct ProfilingFrameResult {
/// Time spent processing a frame, excluding VSync
Duration frame_time;
/// Total amount of time spent inside each category in this frame. Indexed by the category id
std::vector<Duration> time_per_category;
};
class ProfilingManager final {
public:
ProfilingManager();
unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
void SetTimingCategoryParent(unsigned int category, unsigned int parent);
const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
return timing_categories;
}
/// This should be called after swapping screen buffers.
void BeginFrame();
/// This should be called before swapping screen buffers.
@ -54,7 +39,6 @@ public:
}
private:
std::vector<TimingCategoryInfo> timing_categories;
Clock::time_point last_frame_end;
Clock::time_point this_frame_start;
@ -73,9 +57,6 @@ struct AggregatedFrameResult {
AggregatedDuration frame_time;
float fps;
/// Total amount of time spent inside each category in this frame. Indexed by the category id
std::vector<AggregatedDuration> time_per_category;
};
class TimingResultsAggregator final {
@ -83,7 +64,6 @@ public:
TimingResultsAggregator(size_t window_size);
void Clear();
void SetNumberOfCategories(size_t n);
void AddFrame(const ProfilingFrameResult& frame_result);
@ -95,7 +75,6 @@ public:
std::vector<Duration> interframe_times;
std::vector<Duration> frame_times;
std::vector<std::vector<Duration>> times_per_category;
};
ProfilingManager& GetProfilingManager();

View File

@ -10,7 +10,6 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/memory.h"
#include "core/hle/svc.h"
@ -25,9 +24,6 @@
#include "core/gdbstub/gdbstub.h"
Common::Profiling::TimingCategory profile_execute("DynCom::Execute");
Common::Profiling::TimingCategory profile_decode("DynCom::Decode");
enum {
COND = (1 << 0),
NON_BRANCH = (1 << 1),
@ -3496,7 +3492,6 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons
}
static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) {
Common::Profiling::ScopeTimer timer_decode(profile_decode);
MICROPROFILE_SCOPE(DynCom_Decode);
// Decode instruction, get index
@ -3530,7 +3525,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr)
}
static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) {
Common::Profiling::ScopeTimer timer_decode(profile_decode);
MICROPROFILE_SCOPE(DynCom_Decode);
ARM_INST_PTR inst_base = nullptr;
@ -3565,7 +3559,6 @@ static int clz(unsigned int x) {
MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0));
unsigned InterpreterMainLoop(ARMul_State* cpu) {
Common::Profiling::ScopeTimer timer_execute(profile_execute);
MICROPROFILE_SCOPE(DynCom_Execute);
GDBStub::BreakpointAddress breakpoint_data;

View File

@ -4,7 +4,6 @@
#include "common/bit_field.h"
#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/memory.h"
#include "core/hle/kernel/event.h"

View File

@ -6,7 +6,6 @@
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/profiler.h"
#include "common/string_util.h"
#include "common/symbols.h"
@ -1031,8 +1030,6 @@ static const FunctionDef SVC_Table[] = {
{0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"},
};
Common::Profiling::TimingCategory profiler_svc("SVC Calls");
static const FunctionDef* GetSVCInfo(u32 func_num) {
if (func_num >= ARRAY_SIZE(SVC_Table)) {
LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num);
@ -1044,7 +1041,6 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
void CallSVC(u32 immediate) {
Common::Profiling::ScopeTimer timer_svc(profiler_svc);
MICROPROFILE_SCOPE(Kernel_SVC);
const FunctionDef* info = GetSVCInfo(immediate);

View File

@ -7,7 +7,6 @@
#include "common/alignment.h"
#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/settings.h"
#include "core/hle/service/gsp_gpu.h"
@ -35,8 +34,6 @@ static int default_attr_counter = 0;
static u32 default_attr_write_buffer[3];
Common::Profiling::TimingCategory category_drawing("Drawing");
// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
static const u32 expand_bits_to_bytes[] = {
0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
@ -186,7 +183,6 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(trigger_draw):
case PICA_REG_INDEX(trigger_draw_indexed):
{
Common::Profiling::ScopeTimer scope_timer(category_drawing);
MICROPROFILE_SCOPE(GPU_Drawing);
#if PICA_LOG_TEV

View File

@ -9,7 +9,6 @@
#include "common/common_types.h"
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/memory.h"
#include "core/hw/gpu.h"
@ -287,7 +286,6 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
return Math::Cross(vec1, vec2).z;
};
static Common::Profiling::TimingCategory rasterization_category("Rasterization");
MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
/**
@ -300,7 +298,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
bool reversed = false)
{
const auto& regs = g_state.regs;
Common::Profiling::ScopeTimer timer(rasterization_category);
MICROPROFILE_SCOPE(GPU_Rasterization);
// vertex positions in rasterizer coordinates

View File

@ -11,7 +11,6 @@
#include "common/file_util.h"
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/memory.h"
#include "core/settings.h"

View File

@ -9,7 +9,6 @@
#include "common/hash.h"
#include "common/microprofile.h"
#include "common/profiler.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/pica.h"
@ -57,13 +56,11 @@ void Shutdown() {
#endif // ARCHITECTURE_x86_64
}
static Common::Profiling::TimingCategory shader_category("Vertex Shader");
MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
auto& config = g_state.regs.vs;
Common::Profiling::ScopeTimer timer(shader_category);
MICROPROFILE_SCOPE(GPU_VertexShader);
state.program_counter = config.main_offset;