From eed55a813eaad6126b9211d1c91e8cae68c8c0d0 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 8 Sep 2018 21:28:19 +0100 Subject: [PATCH] time_stretch: Simplify audio stretcher --- src/audio_core/dsp_interface.cpp | 23 +++-- src/audio_core/dsp_interface.h | 3 +- src/audio_core/time_stretch.cpp | 172 ++++++++++--------------------- src/audio_core/time_stretch.h | 52 +++------- 4 files changed, 88 insertions(+), 162 deletions(-) diff --git a/src/audio_core/dsp_interface.cpp b/src/audio_core/dsp_interface.cpp index 70f2c4340..ce17414a9 100644 --- a/src/audio_core/dsp_interface.cpp +++ b/src/audio_core/dsp_interface.cpp @@ -15,6 +15,7 @@ DspInterface::DspInterface() = default; DspInterface::~DspInterface() = default; void DspInterface::SetSink(const std::string& sink_id, const std::string& audio_device) { + sink.reset(); const SinkDetails& sink_details = GetSinkDetails(sink_id); sink = sink_details.factory(audio_device); sink->SetCallback( @@ -32,7 +33,7 @@ void DspInterface::EnableStretching(bool enable) { return; if (!enable) { - FlushResidualStretcherAudio(); + flushing_time_stretcher = true; } perform_time_stretching = enable; } @@ -51,17 +52,27 @@ void DspInterface::OutputFrame(StereoFrame16& frame) { fifo.Push(frame.data(), frame.size()); } -void DspInterface::FlushResidualStretcherAudio() {} - -void DspInterface::OutputCallback(s16* buffer, size_t num_frames) { - const size_t frames_written = fifo.Pop(buffer, num_frames); +void DspInterface::OutputCallback(s16* buffer, std::size_t num_frames) { + std::size_t frames_written; + if (perform_time_stretching) { + const std::vector in{fifo.Pop()}; + const std::size_t num_in{in.size() / 2}; + frames_written = time_stretcher.Process(in.data(), num_in, buffer, num_frames); + } else if (flushing_time_stretcher) { + time_stretcher.Flush(); + frames_written = time_stretcher.Process(nullptr, 0, buffer, num_frames); + frames_written += fifo.Pop(buffer, num_frames - frames_written); + flushing_time_stretcher = false; + } else { + frames_written = fifo.Pop(buffer, num_frames); + } if (frames_written > 0) { std::memcpy(&last_frame[0], buffer + 2 * (frames_written - 1), 2 * sizeof(s16)); } // Hold last emitted frame; this prevents popping. - for (size_t i = frames_written; i < num_frames; i++) { + for (std::size_t i = frames_written; i < num_frames; i++) { std::memcpy(buffer + 2 * i, &last_frame[0], 2 * sizeof(s16)); } } diff --git a/src/audio_core/dsp_interface.h b/src/audio_core/dsp_interface.h index f10bf9f7e..aef57db87 100644 --- a/src/audio_core/dsp_interface.h +++ b/src/audio_core/dsp_interface.h @@ -85,7 +85,8 @@ private: void OutputCallback(s16* buffer, std::size_t num_frames); std::unique_ptr sink; - bool perform_time_stretching = false; + std::atomic perform_time_stretching = false; + std::atomic flushing_time_stretcher = false; Common::RingBuffer fifo; std::array last_frame{}; TimeStretcher time_stretcher; diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp index df116d233..2f8c34e13 100644 --- a/src/audio_core/time_stretch.cpp +++ b/src/audio_core/time_stretch.cpp @@ -3,143 +3,75 @@ // Refer to the license.txt file included. #include -#include #include -#include +#include +#include #include #include "audio_core/audio_types.h" #include "audio_core/time_stretch.h" -#include "common/common_types.h" #include "common/logging/log.h" -using steady_clock = std::chrono::steady_clock; - namespace AudioCore { -constexpr double MIN_RATIO = 0.1; -constexpr double MAX_RATIO = 100.0; - -static double ClampRatio(double ratio) { - return std::clamp(ratio, MIN_RATIO, MAX_RATIO); +TimeStretcher::TimeStretcher() + : sample_rate(native_sample_rate), sound_touch(std::make_unique()) { + sound_touch->setChannels(2); + sound_touch->setSampleRate(native_sample_rate); + sound_touch->setPitch(1.0); + sound_touch->setTempo(1.0); } -constexpr double MIN_DELAY_TIME = 0.05; // Units: seconds -constexpr double MAX_DELAY_TIME = 0.25; // Units: seconds -constexpr std::size_t DROP_FRAMES_SAMPLE_DELAY = 16000; // Units: samples - -constexpr double SMOOTHING_FACTOR = 0.007; - -struct TimeStretcher::Impl { - soundtouch::SoundTouch soundtouch; - - steady_clock::time_point frame_timer = steady_clock::now(); - std::size_t samples_queued = 0; - - double smoothed_ratio = 1.0; - - double sample_rate = static_cast(native_sample_rate); -}; - -std::vector TimeStretcher::Process(std::size_t samples_in_queue) { - // This is a very simple algorithm without any fancy control theory. It works and is stable. - - double ratio = CalculateCurrentRatio(); - ratio = CorrectForUnderAndOverflow(ratio, samples_in_queue); - impl->smoothed_ratio = - (1.0 - SMOOTHING_FACTOR) * impl->smoothed_ratio + SMOOTHING_FACTOR * ratio; - impl->smoothed_ratio = ClampRatio(impl->smoothed_ratio); - - // SoundTouch's tempo definition the inverse of our ratio definition. - impl->soundtouch.setTempo(1.0 / impl->smoothed_ratio); - - std::vector samples = GetSamples(); - if (samples_in_queue >= DROP_FRAMES_SAMPLE_DELAY) { - samples.clear(); - LOG_DEBUG(Audio, "Dropping frames!"); - } - return samples; -} - -TimeStretcher::TimeStretcher() : impl(std::make_unique()) { - impl->soundtouch.setPitch(1.0); - impl->soundtouch.setChannels(2); - impl->soundtouch.setSampleRate(native_sample_rate); - Reset(); -} - -TimeStretcher::~TimeStretcher() { - impl->soundtouch.clear(); -} +TimeStretcher::~TimeStretcher() = default; void TimeStretcher::SetOutputSampleRate(unsigned int sample_rate) { - impl->sample_rate = static_cast(sample_rate); - impl->soundtouch.setRate(static_cast(native_sample_rate) / impl->sample_rate); + sound_touch->setSampleRate(sample_rate); + sample_rate = native_sample_rate; } -void TimeStretcher::AddSamples(const s16* buffer, std::size_t num_samples) { - impl->soundtouch.putSamples(buffer, static_cast(num_samples)); - impl->samples_queued += num_samples; +std::size_t TimeStretcher::Process(const s16* in, std::size_t num_in, s16* out, + std::size_t num_out) { + const double time_delta = static_cast(num_out) / sample_rate; // seconds + double current_ratio = static_cast(num_in) / static_cast(num_out); + + const double max_latency = 0.25; // seconds + const double max_backlog = sample_rate * max_latency; + const double backlog_fullness = sound_touch->numSamples() / max_backlog; + if (backlog_fullness > 4.0) { + // Too many samples in backlog: Don't push anymore on + num_in = 0; + } + + // We ideally want the backlog to be about 50% full. + // This gives some headroom both ways to prevent underflow and overflow. + // We tweak current_ratio to encourage this. + constexpr double tweak_time_scale = 0.050; // seconds + const double tweak_correction = (backlog_fullness - 0.5) * (time_delta / tweak_time_scale); + current_ratio *= std::pow(1.0 + 2.0 * tweak_correction, tweak_correction < 0 ? 3.0 : 1.0); + + // This low-pass filter smoothes out variance in the calculated stretch ratio. + // The time-scale determines how responsive this filter is. + constexpr double lpf_time_scale = 0.712; // seconds + const double lpf_gain = 1.0 - std::exp(-time_delta / lpf_time_scale); + stretch_ratio += lpf_gain * (current_ratio - stretch_ratio); + + // Place a lower limit of 5% speed. When a game boots up, there will be + // many silence samples. These do not need to be timestretched. + stretch_ratio = std::max(stretch_ratio, 0.05); + sound_touch->setTempo(stretch_ratio); + + LOG_DEBUG(Audio, "{:5}/{:5} ratio:{:0.6f} backlog:{:0.6f}", num_in, num_out, stretch_ratio, + backlog_fullness); + + sound_touch->putSamples(in, num_in); + return sound_touch->receiveSamples(out, num_out); +} + +void TimeStretcher::Clear() { + sound_touch->clear(); } void TimeStretcher::Flush() { - impl->soundtouch.flush(); -} - -void TimeStretcher::Reset() { - impl->soundtouch.setTempo(1.0); - impl->soundtouch.clear(); - impl->smoothed_ratio = 1.0; - impl->frame_timer = steady_clock::now(); - impl->samples_queued = 0; - SetOutputSampleRate(native_sample_rate); -} - -double TimeStretcher::CalculateCurrentRatio() { - const steady_clock::time_point now = steady_clock::now(); - const std::chrono::duration duration = now - impl->frame_timer; - - const double expected_time = - static_cast(impl->samples_queued) / static_cast(native_sample_rate); - const double actual_time = duration.count(); - - double ratio; - if (expected_time != 0) { - ratio = ClampRatio(actual_time / expected_time); - } else { - ratio = impl->smoothed_ratio; - } - - impl->frame_timer = now; - impl->samples_queued = 0; - - return ratio; -} - -double TimeStretcher::CorrectForUnderAndOverflow(double ratio, std::size_t sample_delay) const { - const std::size_t min_sample_delay = - static_cast(MIN_DELAY_TIME * impl->sample_rate); - const std::size_t max_sample_delay = - static_cast(MAX_DELAY_TIME * impl->sample_rate); - - if (sample_delay < min_sample_delay) { - // Make the ratio bigger. - ratio = ratio > 1.0 ? ratio * ratio : sqrt(ratio); - } else if (sample_delay > max_sample_delay) { - // Make the ratio smaller. - ratio = ratio > 1.0 ? sqrt(ratio) : ratio * ratio; - } - - return ClampRatio(ratio); -} - -std::vector TimeStretcher::GetSamples() { - uint available = impl->soundtouch.numSamples(); - - std::vector output(static_cast(available) * 2); - - impl->soundtouch.receiveSamples(output.data(), available); - - return output; + sound_touch->flush(); } } // namespace AudioCore diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h index ef0852ff5..85495fa27 100644 --- a/src/audio_core/time_stretch.h +++ b/src/audio_core/time_stretch.h @@ -4,57 +4,39 @@ #pragma once +#include #include #include -#include #include "common/common_types.h" +namespace soundtouch { +class SoundTouch; +} + namespace AudioCore { -class TimeStretcher final { +class TimeStretcher { public: TimeStretcher(); ~TimeStretcher(); - /** - * Set sample rate for the samples that Process returns. - * @param sample_rate The sample rate. - */ void SetOutputSampleRate(unsigned int sample_rate); - /** - * Add samples to be processed. - * @param sample_buffer Buffer of samples in interleaved stereo PCM16 format. - * @param num_samples Number of samples. - */ - void AddSamples(const s16* sample_buffer, std::size_t num_samples); + /// @param in Input sample buffer + /// @param num_in Number of input frames in `in` + /// @param out Output sample buffer + /// @param num_out Desired number of output frames in `out` + /// @returns Actual number of frames written to `out` + std::size_t Process(const s16* in, std::size_t num_in, s16* out, std::size_t num_out); + + void Clear(); - /// Flush audio remaining in internal buffers. void Flush(); - /// Resets internal state and clears buffers. - void Reset(); - - /** - * Does audio stretching and produces the time-stretched samples. - * Timer calculations use sample_delay to determine how much of a margin we have. - * @param sample_delay How many samples are buffered downstream of this module and haven't been - * played yet. - * @return Samples to play in interleaved stereo PCM16 format. - */ - std::vector Process(std::size_t sample_delay); - private: - struct Impl; - std::unique_ptr impl; - - /// INTERNAL: ratio = wallclock time / emulated time - double CalculateCurrentRatio(); - /// INTERNAL: If we have too many or too few samples downstream, nudge ratio in the appropriate - /// direction. - double CorrectForUnderAndOverflow(double ratio, std::size_t sample_delay) const; - /// INTERNAL: Gets the time-stretched samples from SoundTouch. - std::vector GetSamples(); + unsigned int sample_rate; + std::unique_ptr sound_touch; + double stretch_ratio = 1.0; }; } // namespace AudioCore