diff options
author | Thomas Mundt <tmundt75@gmail.com> | 2017-09-19 22:23:23 +0200 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-09-23 16:19:58 -0300 |
commit | 40bfaa190c61b6eeff1b76b767c12edd6609967d (patch) | |
tree | 533340612ea536e60bd9189fb110772e4513a49a /libavfilter | |
parent | 58ca446672fec10e851b820ce7df64bd2d1f3a70 (diff) |
avfilter/interlace: add support for 10 and 12 bit
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Thomas Mundt <tmundt75@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavfilter')
-rw-r--r-- | libavfilter/interlace.h | 5 | ||||
-rw-r--r-- | libavfilter/tinterlace.h | 5 | ||||
-rw-r--r-- | libavfilter/vf_interlace.c | 97 | ||||
-rw-r--r-- | libavfilter/vf_tinterlace.c | 78 | ||||
-rw-r--r-- | libavfilter/x86/vf_interlace.asm | 80 | ||||
-rw-r--r-- | libavfilter/x86/vf_interlace_init.c | 51 | ||||
-rw-r--r-- | libavfilter/x86/vf_tinterlace_init.c | 51 |
7 files changed, 311 insertions, 56 deletions
diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h index 2101b79939..90a0198bdc 100644 --- a/libavfilter/interlace.h +++ b/libavfilter/interlace.h @@ -25,9 +25,11 @@ #ifndef AVFILTER_INTERLACE_H #define AVFILTER_INTERLACE_H +#include "libavutil/bswap.h" #include "libavutil/common.h" #include "libavutil/imgutils.h" #include "libavutil/opt.h" +#include "libavutil/pixdesc.h" #include "avfilter.h" #include "formats.h" @@ -55,8 +57,9 @@ typedef struct InterlaceContext { enum ScanMode scan; // top or bottom field first scanning int lowpass; // enable or disable low pass filtering AVFrame *cur, *next; // the two frames from which the new one is obtained + const AVPixFmtDescriptor *csp; void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref); + ptrdiff_t mref, ptrdiff_t pref, int clip_max); } InterlaceContext; void ff_interlace_init_x86(InterlaceContext *interlace); diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h index cc13a6cc50..b5c39aac52 100644 --- a/libavfilter/tinterlace.h +++ b/libavfilter/tinterlace.h @@ -27,7 +27,9 @@ #ifndef AVFILTER_TINTERLACE_H #define AVFILTER_TINTERLACE_H +#include "libavutil/bswap.h" #include "libavutil/opt.h" +#include "libavutil/pixdesc.h" #include "drawutils.h" #include "avfilter.h" @@ -60,8 +62,9 @@ typedef struct TInterlaceContext { int black_linesize[4]; FFDrawContext draw; FFDrawColor color; + const AVPixFmtDescriptor *csp; void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref); + ptrdiff_t mref, ptrdiff_t pref, int clip_max); } TInterlaceContext; void ff_tinterlace_init_x86(TInterlaceContext *interlace); diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c index 55bf782af8..0fdfe70f4c 100644 --- a/libavfilter/vf_interlace.c +++ b/libavfilter/vf_interlace.c @@ -61,8 +61,8 @@ static const AVOption interlace_options[] = { AVFILTER_DEFINE_CLASS(interlace); static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, - const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref) + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max) { const uint8_t *srcp_above = srcp + mref; const uint8_t *srcp_below = srcp + pref; @@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize, } } +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize, + const uint8_t *src8, ptrdiff_t mref, + ptrdiff_t pref, int clip_max) +{ + uint16_t *dstp = (uint16_t *)dst8; + const uint16_t *srcp = (const uint16_t *)src8; + const uint16_t *srcp_above = srcp + mref / 2; + const uint16_t *srcp_below = srcp + pref / 2; + int i, src_x; + for (i = 0; i < linesize; i++) { + // this calculation is an integer representation of + // '0.5 * current + 0.25 * above + 0.25 * below' + // '1 +' is for rounding. + src_x = av_le2ne16(srcp[i]) << 1; + dstp[i] = av_le2ne16((1 + src_x + av_le2ne16(srcp_above[i]) + + av_le2ne16(srcp_below[i])) >> 2); + } +} + static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, - const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref) + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max) { const uint8_t *srcp_above = srcp + mref; const uint8_t *srcp_below = srcp + pref; @@ -103,11 +122,51 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, } } +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t linesize, + const uint8_t *src8, ptrdiff_t mref, + ptrdiff_t pref, int clip_max) +{ + uint16_t *dstp = (uint16_t *)dst8; + const uint16_t *srcp = (const uint16_t *)src8; + const uint16_t *srcp_above = srcp + mref / 2; + const uint16_t *srcp_below = srcp + pref / 2; + const uint16_t *srcp_above2 = srcp + mref; + const uint16_t *srcp_below2 = srcp + pref; + int i, dst_le, src_le, src_x, src_ab; + for (i = 0; i < linesize; i++) { + // this calculation is an integer representation of + // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2' + // '4 +' is for rounding. + src_le = av_le2ne16(srcp[i]); + src_x = src_le << 1; + src_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]); + dst_le = av_clip((4 + ((src_le + src_x + src_ab) << 1) + - av_le2ne16(srcp_above2[i]) + - av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max); + // Prevent over-sharpening: + // dst must not exceed src when the average of above and below + // is less than src. And the other way around. + if (src_ab > src_x) { + if (dst_le < src_le) + dstp[i] = av_le2ne16(src_le); + else + dstp[i] = av_le2ne16(dst_le); + } else if (dst_le > src_le) { + dstp[i] = av_le2ne16(src_le); + } else + dstp[i] = av_le2ne16(dst_le); + } +} + static const enum AVPixelFormat formats_supported[] = { - AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, - AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P, - AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, - AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE + AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, + AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV444P10LE, + AV_PIX_FMT_YUV420P12LE, AV_PIX_FMT_YUV422P12LE, AV_PIX_FMT_YUV444P12LE, + AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P, + AV_PIX_FMT_YUVA420P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA444P10LE, + AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, + AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE }; static int query_formats(AVFilterContext *ctx) @@ -150,12 +209,19 @@ static int config_out_props(AVFilterLink *outlink) outlink->time_base.num *= 2; outlink->frame_rate.den *= 2; - + s->csp = av_pix_fmt_desc_get(outlink->format); if (s->lowpass) { - if (s->lowpass == VLPF_LIN) - s->lowpass_line = lowpass_line_c; - else if (s->lowpass == VLPF_CMP) - s->lowpass_line = lowpass_line_complex_c; + if (s->lowpass == VLPF_LIN) { + if (s->csp->comp[0].depth > 8) + s->lowpass_line = lowpass_line_c_16; + else + s->lowpass_line = lowpass_line_c; + } else if (s->lowpass == VLPF_CMP) { + if (s->csp->comp[0].depth > 8) + s->lowpass_line = lowpass_line_complex_c_16; + else + s->lowpass_line = lowpass_line_complex_c; + } if (ARCH_X86) ff_interlace_init_x86(s); } @@ -183,6 +249,7 @@ static void copy_picture_field(InterlaceContext *s, const uint8_t *srcp = src_frame->data[plane]; int srcp_linesize = src_frame->linesize[plane] * 2; int dstp_linesize = dst_frame->linesize[plane] * 2; + int clip_max = (1 << s->csp->comp[plane].depth) - 1; av_assert0(cols >= 0 || lines >= 0); @@ -202,11 +269,13 @@ static void copy_picture_field(InterlaceContext *s, mref = 0; else if (j <= (1 + x)) pref = 0; - s->lowpass_line(dstp, cols, srcp, mref, pref); + s->lowpass_line(dstp, cols, srcp, mref, pref, clip_max); dstp += dstp_linesize; srcp += srcp_linesize; } } else { + if (s->csp->comp[plane].depth > 8) + cols *= 2; av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines); } } diff --git a/libavfilter/vf_tinterlace.c b/libavfilter/vf_tinterlace.c index f934a06b69..163ab7c184 100644 --- a/libavfilter/vf_tinterlace.c +++ b/libavfilter/vf_tinterlace.c @@ -78,7 +78,12 @@ static int query_formats(AVFilterContext *ctx) AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, + AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV422P10LE, + AV_PIX_FMT_YUV440P10LE, AV_PIX_FMT_YUV444P10LE, + AV_PIX_FMT_YUV420P12LE, AV_PIX_FMT_YUV422P12LE, + AV_PIX_FMT_YUV440P12LE, AV_PIX_FMT_YUV444P12LE, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P, + AV_PIX_FMT_YUVA420P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA444P10LE, AV_PIX_FMT_GRAY8, FULL_SCALE_YUVJ_FORMATS, AV_PIX_FMT_NONE }; @@ -90,7 +95,7 @@ static int query_formats(AVFilterContext *ctx) } static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref) + ptrdiff_t mref, ptrdiff_t pref, int clip_max) { const uint8_t *srcp_above = srcp + mref; const uint8_t *srcp_below = srcp + pref; @@ -103,8 +108,26 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, } } +static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t width, const uint8_t *src8, + ptrdiff_t mref, ptrdiff_t pref, int clip_max) +{ + uint16_t *dstp = (uint16_t *)dst8; + const uint16_t *srcp = (const uint16_t *)src8; + const uint16_t *srcp_above = srcp + mref / 2; + const uint16_t *srcp_below = srcp + pref / 2; + int i, src_x; + for (i = 0; i < width; i++) { + // this calculation is an integer representation of + // '0.5 * current + 0.25 * above + 0.25 * below' + // '1 +' is for rounding. + src_x = av_le2ne16(srcp[i]) << 1; + dstp[i] = av_le2ne16((1 + src_x + av_le2ne16(srcp_above[i]) + + av_le2ne16(srcp_below[i])) >> 2); + } +} + static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref) + ptrdiff_t mref, ptrdiff_t pref, int clip_max) { const uint8_t *srcp_above = srcp + mref; const uint8_t *srcp_below = srcp + pref; @@ -130,6 +153,41 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t } } +static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t width, const uint8_t *src8, + ptrdiff_t mref, ptrdiff_t pref, int clip_max) +{ + uint16_t *dstp = (uint16_t *)dst8; + const uint16_t *srcp = (const uint16_t *)src8; + const uint16_t *srcp_above = srcp + mref / 2; + const uint16_t *srcp_below = srcp + pref / 2; + const uint16_t *srcp_above2 = srcp + mref; + const uint16_t *srcp_below2 = srcp + pref; + int i, dst_le, src_le, src_x, src_ab; + for (i = 0; i < width; i++) { + // this calculation is an integer representation of + // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2' + // '4 +' is for rounding. + src_le = av_le2ne16(srcp[i]); + src_x = src_le << 1; + src_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]); + dst_le = av_clip((4 + ((src_le + src_x + src_ab) << 1) + - av_le2ne16(srcp_above2[i]) + - av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max); + // Prevent over-sharpening: + // dst must not exceed src when the average of above and below + // is less than src. And the other way around. + if (src_ab > src_x) { + if (dst_le < src_le) + dstp[i] = av_le2ne16(src_le); + else + dstp[i] = av_le2ne16(dst_le); + } else if (dst_le > src_le) { + dstp[i] = av_le2ne16(src_le); + } else + dstp[i] = av_le2ne16(dst_le); + } +} + static av_cold void uninit(AVFilterContext *ctx) { TInterlaceContext *tinterlace = ctx->priv; @@ -198,12 +256,19 @@ static int config_out_props(AVFilterLink *outlink) (tinterlace->flags & TINTERLACE_FLAG_EXACT_TB)) outlink->time_base = tinterlace->preout_time_base; + tinterlace->csp = av_pix_fmt_desc_get(outlink->format); if (tinterlace->flags & TINTERLACE_FLAG_CVLPF) { - tinterlace->lowpass_line = lowpass_line_complex_c; + if (tinterlace->csp->comp[0].depth > 8) + tinterlace->lowpass_line = lowpass_line_complex_c_16; + else + tinterlace->lowpass_line = lowpass_line_complex_c; if (ARCH_X86) ff_tinterlace_init_x86(tinterlace); } else if (tinterlace->flags & TINTERLACE_FLAG_VLPF) { - tinterlace->lowpass_line = lowpass_line_c; + if (tinterlace->csp->comp[0].depth > 8) + tinterlace->lowpass_line = lowpass_line_c_16; + else + tinterlace->lowpass_line = lowpass_line_c; if (ARCH_X86) ff_tinterlace_init_x86(tinterlace); } @@ -250,6 +315,7 @@ void copy_picture_field(TInterlaceContext *tinterlace, const uint8_t *srcp = src[plane]; int srcp_linesize = src_linesize[plane] * k; int dstp_linesize = dst_linesize[plane] * (interleave ? 2 : 1); + int clip_max = (1 << tinterlace->csp->comp[plane].depth) - 1; lines = (lines + (src_field == FIELD_UPPER)) / k; if (src_field == FIELD_LOWER) @@ -267,11 +333,13 @@ void copy_picture_field(TInterlaceContext *tinterlace, if (h >= (lines - x)) mref = 0; // there is no line above else if (h <= (1 + x)) pref = 0; // there is no line below - tinterlace->lowpass_line(dstp, cols, srcp, mref, pref); + tinterlace->lowpass_line(dstp, cols, srcp, mref, pref, clip_max); dstp += dstp_linesize; srcp += srcp_linesize; } } else { + if (tinterlace->csp->comp[plane].depth > 8) + cols *= 2; av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines); } } diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm index d0fffd293b..7c0065d4d9 100644 --- a/libavfilter/x86/vf_interlace.asm +++ b/libavfilter/x86/vf_interlace.asm @@ -30,27 +30,26 @@ pw_4: times 8 dw 4 SECTION .text -%macro LOWPASS_LINE 0 -cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref +%macro LOWPASS 1 add dstq, hq add srcq, hq add mrefq, srcq add prefq, srcq neg hq - pcmpeqb m6, m6 + pcmpeq%1 m6, m6 .loop: mova m0, [mrefq+hq] mova m1, [mrefq+hq+mmsize] - pavgb m0, [prefq+hq] - pavgb m1, [prefq+hq+mmsize] + pavg%1 m0, [prefq+hq] + pavg%1 m1, [prefq+hq+mmsize] pxor m0, m6 pxor m1, m6 pxor m2, m6, [srcq+hq] pxor m3, m6, [srcq+hq+mmsize] - pavgb m0, m2 - pavgb m1, m3 + pavg%1 m0, m2 + pavg%1 m1, m3 pxor m0, m6 pxor m1, m6 mova [dstq+hq], m0 @@ -59,7 +58,15 @@ cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref add hq, 2*mmsize jl .loop REP_RET +%endmacro + +%macro LOWPASS_LINE 0 +cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref + LOWPASS b +cglobal lowpass_line_16, 5, 5, 7, dst, h, src, mref, pref + shl hq, 1 + LOWPASS w %endmacro %macro LOWPASS_LINE_COMPLEX 0 @@ -124,6 +131,65 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref jg .loop REP_RET +cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max + movd m7, DWORD clip_maxm + SPLATW m7, m7, 0 + mova [rsp], m7 +.loop: + mova m0, [srcq+mrefq] + mova m1, [srcq+mrefq+mmsize] + mova m2, [srcq+prefq] + mova m3, [srcq+prefq+mmsize] + paddw m0, m2 + paddw m1, m3 + mova m6, m0 + mova m7, m1 + mova m2, [srcq] + mova m3, [srcq+mmsize] + paddw m0, m2 + paddw m1, m3 + psllw m2, 1 + psllw m3, 1 + paddw m0, m2 + paddw m1, m3 + psllw m0, 1 + psllw m1, 1 + pcmpgtw m6, m2 + pcmpgtw m7, m3 + mova m2, [srcq+2*mrefq] + mova m3, [srcq+2*mrefq+mmsize] + mova m4, [srcq+2*prefq] + mova m5, [srcq+2*prefq+mmsize] + paddw m2, m4 + paddw m3, m5 + paddw m0, [pw_4] + paddw m1, [pw_4] + psubusw m0, m2 + psubusw m1, m3 + psrlw m0, 3 + psrlw m1, 3 + pminsw m0, [rsp] + pminsw m1, [rsp] + mova m2, m0 + mova m3, m1 + pmaxsw m0, [srcq] + pmaxsw m1, [srcq+mmsize] + pminsw m2, [srcq] + pminsw m3, [srcq+mmsize] + pand m0, m6 + pand m1, m7 + pandn m6, m2 + pandn m7, m3 + por m0, m6 + por m1, m7 + mova [dstq], m0 + mova [dstq+mmsize], m1 + + add dstq, 2*mmsize + add srcq, 2*mmsize + sub hd, mmsize + jg .loop +REP_RET %endmacro INIT_XMM sse2 diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c index c0f04dcd97..70fe86ccff 100644 --- a/libavfilter/x86/vf_interlace_init.c +++ b/libavfilter/x86/vf_interlace_init.c @@ -27,27 +27,50 @@ #include "libavfilter/interlace.h" void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, - const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref); + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, - const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref); + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); + +void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); +void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, - const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref); + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); + +void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); av_cold void ff_interlace_init_x86(InterlaceContext *s) { int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_SSE2(cpu_flags)) { - if (s->lowpass == VLPF_LIN) - s->lowpass_line = ff_lowpass_line_sse2; - else if (s->lowpass == VLPF_CMP) - s->lowpass_line = ff_lowpass_line_complex_sse2; + if (s->csp->comp[0].depth > 8) { + if (EXTERNAL_SSE2(cpu_flags)) { + if (s->lowpass == VLPF_LIN) + s->lowpass_line = ff_lowpass_line_16_sse2; + else if (s->lowpass == VLPF_CMP) + s->lowpass_line = ff_lowpass_line_complex_12_sse2; + } + if (EXTERNAL_AVX(cpu_flags)) + if (s->lowpass == VLPF_LIN) + s->lowpass_line = ff_lowpass_line_16_avx; + } else { + if (EXTERNAL_SSE2(cpu_flags)) { + if (s->lowpass == VLPF_LIN) + s->lowpass_line = ff_lowpass_line_sse2; + else if (s->lowpass == VLPF_CMP) + s->lowpass_line = ff_lowpass_line_complex_sse2; + } + if (EXTERNAL_AVX(cpu_flags)) + if (s->lowpass == VLPF_LIN) + s->lowpass_line = ff_lowpass_line_avx; } - if (EXTERNAL_AVX(cpu_flags)) - if (s->lowpass == VLPF_LIN) - s->lowpass_line = ff_lowpass_line_avx; } diff --git a/libavfilter/x86/vf_tinterlace_init.c b/libavfilter/x86/vf_tinterlace_init.c index 2b10e1b74c..209812964d 100644 --- a/libavfilter/x86/vf_tinterlace_init.c +++ b/libavfilter/x86/vf_tinterlace_init.c @@ -28,27 +28,50 @@ #include "libavfilter/tinterlace.h" void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize, - const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref); + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize, - const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref); + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); + +void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); +void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize, - const uint8_t *srcp, - ptrdiff_t mref, ptrdiff_t pref); + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); + +void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize, + const uint8_t *srcp, ptrdiff_t mref, + ptrdiff_t pref, int clip_max); av_cold void ff_tinterlace_init_x86(TInterlaceContext *s) { int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_SSE2(cpu_flags)) { - if (!(s->flags & TINTERLACE_FLAG_CVLPF)) - s->lowpass_line = ff_lowpass_line_sse2; - else - s->lowpass_line = ff_lowpass_line_complex_sse2; + if (s->csp->comp[0].depth > 8) { + if (EXTERNAL_SSE2(cpu_flags)) { + if (!(s->flags & TINTERLACE_FLAG_CVLPF)) + s->lowpass_line = ff_lowpass_line_16_sse2; + else + s->lowpass_line = ff_lowpass_line_complex_12_sse2; + } + if (EXTERNAL_AVX(cpu_flags)) + if (!(s->flags & TINTERLACE_FLAG_CVLPF)) + s->lowpass_line = ff_lowpass_line_16_avx; + } else { + if (EXTERNAL_SSE2(cpu_flags)) { + if (!(s->flags & TINTERLACE_FLAG_CVLPF)) + s->lowpass_line = ff_lowpass_line_sse2; + else + s->lowpass_line = ff_lowpass_line_complex_sse2; + } + if (EXTERNAL_AVX(cpu_flags)) + if (!(s->flags & TINTERLACE_FLAG_CVLPF)) + s->lowpass_line = ff_lowpass_line_avx; } - if (EXTERNAL_AVX(cpu_flags)) - if (!(s->flags & TINTERLACE_FLAG_CVLPF)) - s->lowpass_line = ff_lowpass_line_avx; } |