From 2b5166addf9956f0617e6007bc02387cde9927dd Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Thu, 25 May 2023 23:06:50 +0200 Subject: avfilter/af_silenceremove: add real peak detector Rename old peak detector to more correct name one. --- doc/filters.texi | 3 +- libavfilter/af_silenceremove.c | 42 ++++++++++++++++----- libavfilter/silenceremove_template.c | 71 ++++++++++++++++++++++++++++++++---- tests/fate/filter-audio.mak | 2 +- 4 files changed, 98 insertions(+), 20 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index 47b26fe92f..6f15b54d10 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -6461,8 +6461,7 @@ With @var{all}, only if all channels are detected as non-silence will cause stopped trimming of silence. @item detection -Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster -and works better with digital silence which is exactly 0. +Set how is silence detected. Can be @code{avg}, @code{rms} or @code{peak}. Default value is @code{rms}. @item window diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c index e0592c2368..28c972f86f 100644 --- a/libavfilter/af_silenceremove.c +++ b/libavfilter/af_silenceremove.c @@ -33,8 +33,10 @@ #include "internal.h" enum SilenceDetect { - D_PEAK, + D_AVG, D_RMS, + D_PEAK, + D_NB }; enum ThresholdMode { @@ -75,6 +77,12 @@ typedef struct SilenceRemoveContext { AVFrame *start_window; AVFrame *stop_window; + int *start_front; + int *start_back; + + int *stop_front; + int *stop_back; + int64_t window_duration; int start_window_pos; @@ -100,8 +108,8 @@ typedef struct SilenceRemoveContext { int detection; - float (*compute_flt)(float *c, float s, float ws, int size); - double (*compute_dbl)(double *c, double s, double ws, int size); + float (*compute_flt)(float *c, float s, float ws, int size, int *front, int *back); + double (*compute_dbl)(double *c, double s, double ws, int size, int *front, int *back); } SilenceRemoveContext; #define OFFSET(x) offsetof(SilenceRemoveContext, x) @@ -120,9 +128,10 @@ static const AVOption silenceremove_options[] = { { "stop_threshold", "set threshold for stop silence detection", OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, AF }, { "stop_silence", "set stop duration of silence part to keep", OFFSET(stop_silence_opt), AV_OPT_TYPE_DURATION, {.i64=0}, 0, INT32_MAX, AF }, { "stop_mode", "set which channel will trigger trimming from end", OFFSET(stop_mode), AV_OPT_TYPE_INT, {.i64=T_ANY}, T_ANY, T_ALL, AF, "mode" }, - { "detection", "set how silence is detected", OFFSET(detection), AV_OPT_TYPE_INT, {.i64=D_RMS}, D_PEAK,D_RMS, AF, "detection" }, - { "peak", "use absolute values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_PEAK},0, 0, AF, "detection" }, - { "rms", "use squared values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_RMS}, 0, 0, AF, "detection" }, + { "detection", "set how silence is detected", OFFSET(detection), AV_OPT_TYPE_INT, {.i64=D_RMS}, 0, D_NB-1, AF, "detection" }, + { "avg", "use mean absolute values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_AVG}, 0, 0, AF, "detection" }, + { "rms", "use root mean squared values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_RMS}, 0, 0, AF, "detection" }, + { "peak", "use max absolute values of samples", 0, AV_OPT_TYPE_CONST, {.i64=D_PEAK},0, 0, AF, "detection" }, { "window", "set duration of window for silence detection", OFFSET(window_duration_opt), AV_OPT_TYPE_DURATION, {.i64=20000}, 0, 100000000, AF }, { NULL } }; @@ -201,7 +210,9 @@ static int config_output(AVFilterLink *outlink) s->start_window = ff_get_audio_buffer(outlink, s->window_duration); s->stop_window = ff_get_audio_buffer(outlink, s->window_duration); - if (!s->start_window || !s->stop_window) + s->start_cache = av_calloc(outlink->ch_layout.nb_channels, s->window_duration * sizeof(*s->start_cache)); + s->stop_cache = av_calloc(outlink->ch_layout.nb_channels, s->window_duration * sizeof(*s->stop_cache)); + if (!s->start_window || !s->stop_window || !s->start_cache || !s->stop_cache) return AVERROR(ENOMEM); s->start_queuef = ff_get_audio_buffer(outlink, s->start_silence + 1); @@ -209,14 +220,20 @@ static int config_output(AVFilterLink *outlink) if (!s->start_queuef || !s->stop_queuef) return AVERROR(ENOMEM); - s->start_cache = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->start_cache)); - s->stop_cache = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->stop_cache)); - if (!s->start_cache || !s->stop_cache) + s->start_front = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->start_front)); + s->start_back = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->start_back)); + s->stop_front = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->stop_front)); + s->stop_back = av_calloc(outlink->ch_layout.nb_channels, sizeof(*s->stop_back)); + if (!s->start_front || !s->start_back || !s->stop_front || !s->stop_back) return AVERROR(ENOMEM); clear_windows(s); switch (s->detection) { + case D_AVG: + s->compute_flt = compute_avg_flt; + s->compute_dbl = compute_avg_dbl; + break; case D_PEAK: s->compute_flt = compute_peak_flt; s->compute_dbl = compute_peak_dbl; @@ -374,8 +391,13 @@ static av_cold void uninit(AVFilterContext *ctx) av_frame_free(&s->stop_window); av_frame_free(&s->start_queuef); av_frame_free(&s->stop_queuef); + av_freep(&s->start_cache); av_freep(&s->stop_cache); + av_freep(&s->start_front); + av_freep(&s->start_back); + av_freep(&s->stop_front); + av_freep(&s->stop_back); } static const AVFilterPad silenceremove_inputs[] = { diff --git a/libavfilter/silenceremove_template.c b/libavfilter/silenceremove_template.c index 1a12435ee6..ef63ea1e7e 100644 --- a/libavfilter/silenceremove_template.c +++ b/libavfilter/silenceremove_template.c @@ -99,8 +99,8 @@ static void fn(queue_sample)(AVFilterContext *ctx, *window_pos = 0; } -static ftype fn(compute_peak)(ftype *cache, ftype sample, ftype wsample, - int window_size) +static ftype fn(compute_avg)(ftype *cache, ftype sample, ftype wsample, + int window_size, int *unused, int *unused2) { ftype r; @@ -111,8 +111,49 @@ static ftype fn(compute_peak)(ftype *cache, ftype sample, ftype wsample, return r / window_size; } +static ftype fn(compute_peak)(ftype *peak, ftype sample, ftype wsample, + int size, int *ffront, int *bback) +{ + ftype r, abs_sample = FABS(sample); + int front = *ffront; + int back = *bback; + + if (front != back && abs_sample > peak[front]) { + while (front != back) { + front--; + if (front < 0) + front = size - 1; + } + } + + while (front != back && abs_sample > peak[back]) { + back++; + if (back >= size) + back = 0; + } + + if (front != back && FABS(wsample) == peak[front]) { + front--; + if (front < 0) + front = size - 1; + } + + back--; + if (back < 0) + back = size - 1; + av_assert2(back != front); + peak[back] = abs_sample; + + r = peak[front]; + + *ffront = front; + *bback = back; + + return r; +} + static ftype fn(compute_rms)(ftype *cache, ftype sample, ftype wsample, - int window_size) + int window_size, int *unused, int *unused2) { ftype r; @@ -143,6 +184,9 @@ static void fn(filter_start)(AVFilterContext *ctx, const int start_duration = s->start_duration; ftype *start_cache = (ftype *)s->start_cache; const int start_silence = s->start_silence; + int window_size = start_window_nb_samples; + int *front = s->start_front; + int *back = s->start_back; fn(queue_sample)(ctx, src, start, &s->start_queue_pos, @@ -153,15 +197,20 @@ static void fn(filter_start)(AVFilterContext *ctx, start_nb_samples, start_window_nb_samples); + if (s->detection != D_PEAK) + window_size = s->start_window_size; + for (int ch = 0; ch < nb_channels; ch++) { ftype start_sample = start[start_pos + ch]; ftype start_ow = startw[start_wpos + ch]; ftype tstart; - tstart = fn(s->compute)(start_cache + ch, + tstart = fn(s->compute)(start_cache + ch * start_window_nb_samples, start_sample, start_ow, - s->start_window_size); + window_size, + front + ch, + back + ch); startw[start_wpos + ch] = start_sample; @@ -226,6 +275,9 @@ static void fn(filter_stop)(AVFilterContext *ctx, ftype *stop_cache = (ftype *)s->stop_cache; const int stop_silence = s->stop_silence; const int restart = s->restart; + int window_size = stop_window_nb_samples; + int *front = s->stop_front; + int *back = s->stop_back; fn(queue_sample)(ctx, src, stop, &s->stop_queue_pos, @@ -236,15 +288,20 @@ static void fn(filter_stop)(AVFilterContext *ctx, stop_nb_samples, stop_window_nb_samples); + if (s->detection != D_PEAK) + window_size = s->stop_window_size; + for (int ch = 0; ch < nb_channels; ch++) { ftype stop_sample = stop[stop_pos + ch]; ftype stop_ow = stopw[stop_wpos + ch]; ftype tstop; - tstop = fn(s->compute)(stop_cache + ch, + tstop = fn(s->compute)(stop_cache + ch * stop_window_nb_samples, stop_sample, stop_ow, - s->stop_window_size); + window_size, + front + ch, + back + ch); stopw[stop_wpos + ch] = stop_sample; diff --git a/tests/fate/filter-audio.mak b/tests/fate/filter-audio.mak index eff32b9f81..445c0f9217 100644 --- a/tests/fate/filter-audio.mak +++ b/tests/fate/filter-audio.mak @@ -184,7 +184,7 @@ fate-filter-pan-downmix2: SRC = $(TARGET_PATH)/tests/data/asynth-44100-11.wav fate-filter-pan-downmix2: CMD = framecrc -ss 3.14 -i $(SRC) -frames:a 20 -filter:a "pan=5C|c0=0.7*c0+0.7*c10|c1=c9|c2=c8|c3=c7|c4=c6" FATE_AFILTER-$(call ALLYES, LAVFI_INDEV, AEVALSRC_FILTER SILENCEREMOVE_FILTER) += fate-filter-silenceremove -fate-filter-silenceremove: CMD = framecrc -auto_conversion_filters -f lavfi -i "aevalsrc=between(t\,1\,2)+between(t\,4\,5)+between(t\,7\,9):d=10:n=8192,silenceremove=start_periods=0:start_duration=0:start_threshold=0:stop_periods=-1:stop_duration=0:stop_threshold=-90dB:window=0:detection=peak" +fate-filter-silenceremove: CMD = framecrc -auto_conversion_filters -f lavfi -i "aevalsrc=between(t\,1\,2)+between(t\,4\,5)+between(t\,7\,9):d=10:n=8192,silenceremove=start_periods=0:start_duration=0:start_threshold=0:stop_periods=-1:stop_duration=0:stop_threshold=-90dB:window=0:detection=avg" FATE_AFILTER_SAMPLES-$(call FILTERDEMDECENCMUX, STEREOTOOLS, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-stereotools fate-filter-stereotools: SRC = $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -- cgit v1.2.3