summaryrefslogtreecommitdiff
path: root/libavfilter
diff options
context:
space:
mode:
authorThomas Mundt <tmundt75@gmail.com>2017-09-19 22:23:23 +0200
committerJames Almer <jamrial@gmail.com>2017-09-23 16:19:58 -0300
commit40bfaa190c61b6eeff1b76b767c12edd6609967d (patch)
tree533340612ea536e60bd9189fb110772e4513a49a /libavfilter
parent58ca446672fec10e851b820ce7df64bd2d1f3a70 (diff)
avfilter/interlace: add support for 10 and 12 bit
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Thomas Mundt <tmundt75@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavfilter')
-rw-r--r--libavfilter/interlace.h5
-rw-r--r--libavfilter/tinterlace.h5
-rw-r--r--libavfilter/vf_interlace.c97
-rw-r--r--libavfilter/vf_tinterlace.c78
-rw-r--r--libavfilter/x86/vf_interlace.asm80
-rw-r--r--libavfilter/x86/vf_interlace_init.c51
-rw-r--r--libavfilter/x86/vf_tinterlace_init.c51
7 files changed, 311 insertions, 56 deletions
diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
index 2101b79939..90a0198bdc 100644
--- a/libavfilter/interlace.h
+++ b/libavfilter/interlace.h
@@ -25,9 +25,11 @@
#ifndef AVFILTER_INTERLACE_H
#define AVFILTER_INTERLACE_H
+#include "libavutil/bswap.h"
#include "libavutil/common.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
#include "avfilter.h"
#include "formats.h"
@@ -55,8 +57,9 @@ typedef struct InterlaceContext {
enum ScanMode scan; // top or bottom field first scanning
int lowpass; // enable or disable low pass filtering
AVFrame *cur, *next; // the two frames from which the new one is obtained
+ const AVPixFmtDescriptor *csp;
void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ ptrdiff_t mref, ptrdiff_t pref, int clip_max);
} InterlaceContext;
void ff_interlace_init_x86(InterlaceContext *interlace);
diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
index cc13a6cc50..b5c39aac52 100644
--- a/libavfilter/tinterlace.h
+++ b/libavfilter/tinterlace.h
@@ -27,7 +27,9 @@
#ifndef AVFILTER_TINTERLACE_H
#define AVFILTER_TINTERLACE_H
+#include "libavutil/bswap.h"
#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
#include "drawutils.h"
#include "avfilter.h"
@@ -60,8 +62,9 @@ typedef struct TInterlaceContext {
int black_linesize[4];
FFDrawContext draw;
FFDrawColor color;
+ const AVPixFmtDescriptor *csp;
void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ ptrdiff_t mref, ptrdiff_t pref, int clip_max);
} TInterlaceContext;
void ff_tinterlace_init_x86(TInterlaceContext *interlace);
diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
index 55bf782af8..0fdfe70f4c 100644
--- a/libavfilter/vf_interlace.c
+++ b/libavfilter/vf_interlace.c
@@ -61,8 +61,8 @@ static const AVOption interlace_options[] = {
AVFILTER_DEFINE_CLASS(interlace);
static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref)
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max)
{
const uint8_t *srcp_above = srcp + mref;
const uint8_t *srcp_below = srcp + pref;
@@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
}
}
+static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize,
+ const uint8_t *src8, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max)
+{
+ uint16_t *dstp = (uint16_t *)dst8;
+ const uint16_t *srcp = (const uint16_t *)src8;
+ const uint16_t *srcp_above = srcp + mref / 2;
+ const uint16_t *srcp_below = srcp + pref / 2;
+ int i, src_x;
+ for (i = 0; i < linesize; i++) {
+ // this calculation is an integer representation of
+ // '0.5 * current + 0.25 * above + 0.25 * below'
+ // '1 +' is for rounding.
+ src_x = av_le2ne16(srcp[i]) << 1;
+ dstp[i] = av_le2ne16((1 + src_x + av_le2ne16(srcp_above[i])
+ + av_le2ne16(srcp_below[i])) >> 2);
+ }
+}
+
static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref)
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max)
{
const uint8_t *srcp_above = srcp + mref;
const uint8_t *srcp_below = srcp + pref;
@@ -103,11 +122,51 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
}
}
+static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t linesize,
+ const uint8_t *src8, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max)
+{
+ uint16_t *dstp = (uint16_t *)dst8;
+ const uint16_t *srcp = (const uint16_t *)src8;
+ const uint16_t *srcp_above = srcp + mref / 2;
+ const uint16_t *srcp_below = srcp + pref / 2;
+ const uint16_t *srcp_above2 = srcp + mref;
+ const uint16_t *srcp_below2 = srcp + pref;
+ int i, dst_le, src_le, src_x, src_ab;
+ for (i = 0; i < linesize; i++) {
+ // this calculation is an integer representation of
+ // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
+ // '4 +' is for rounding.
+ src_le = av_le2ne16(srcp[i]);
+ src_x = src_le << 1;
+ src_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
+ dst_le = av_clip((4 + ((src_le + src_x + src_ab) << 1)
+ - av_le2ne16(srcp_above2[i])
+ - av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max);
+ // Prevent over-sharpening:
+ // dst must not exceed src when the average of above and below
+ // is less than src. And the other way around.
+ if (src_ab > src_x) {
+ if (dst_le < src_le)
+ dstp[i] = av_le2ne16(src_le);
+ else
+ dstp[i] = av_le2ne16(dst_le);
+ } else if (dst_le > src_le) {
+ dstp[i] = av_le2ne16(src_le);
+ } else
+ dstp[i] = av_le2ne16(dst_le);
+ }
+}
+
static const enum AVPixelFormat formats_supported[] = {
- AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
- AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P,
- AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
- AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE
+ AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+ AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+ AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV444P10LE,
+ AV_PIX_FMT_YUV420P12LE, AV_PIX_FMT_YUV422P12LE, AV_PIX_FMT_YUV444P12LE,
+ AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
+ AV_PIX_FMT_YUVA420P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA444P10LE,
+ AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+ AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE
};
static int query_formats(AVFilterContext *ctx)
@@ -150,12 +209,19 @@ static int config_out_props(AVFilterLink *outlink)
outlink->time_base.num *= 2;
outlink->frame_rate.den *= 2;
-
+ s->csp = av_pix_fmt_desc_get(outlink->format);
if (s->lowpass) {
- if (s->lowpass == VLPF_LIN)
- s->lowpass_line = lowpass_line_c;
- else if (s->lowpass == VLPF_CMP)
- s->lowpass_line = lowpass_line_complex_c;
+ if (s->lowpass == VLPF_LIN) {
+ if (s->csp->comp[0].depth > 8)
+ s->lowpass_line = lowpass_line_c_16;
+ else
+ s->lowpass_line = lowpass_line_c;
+ } else if (s->lowpass == VLPF_CMP) {
+ if (s->csp->comp[0].depth > 8)
+ s->lowpass_line = lowpass_line_complex_c_16;
+ else
+ s->lowpass_line = lowpass_line_complex_c;
+ }
if (ARCH_X86)
ff_interlace_init_x86(s);
}
@@ -183,6 +249,7 @@ static void copy_picture_field(InterlaceContext *s,
const uint8_t *srcp = src_frame->data[plane];
int srcp_linesize = src_frame->linesize[plane] * 2;
int dstp_linesize = dst_frame->linesize[plane] * 2;
+ int clip_max = (1 << s->csp->comp[plane].depth) - 1;
av_assert0(cols >= 0 || lines >= 0);
@@ -202,11 +269,13 @@ static void copy_picture_field(InterlaceContext *s,
mref = 0;
else if (j <= (1 + x))
pref = 0;
- s->lowpass_line(dstp, cols, srcp, mref, pref);
+ s->lowpass_line(dstp, cols, srcp, mref, pref, clip_max);
dstp += dstp_linesize;
srcp += srcp_linesize;
}
} else {
+ if (s->csp->comp[plane].depth > 8)
+ cols *= 2;
av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines);
}
}
diff --git a/libavfilter/vf_tinterlace.c b/libavfilter/vf_tinterlace.c
index f934a06b69..163ab7c184 100644
--- a/libavfilter/vf_tinterlace.c
+++ b/libavfilter/vf_tinterlace.c
@@ -78,7 +78,12 @@ static int query_formats(AVFilterContext *ctx)
AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+ AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV422P10LE,
+ AV_PIX_FMT_YUV440P10LE, AV_PIX_FMT_YUV444P10LE,
+ AV_PIX_FMT_YUV420P12LE, AV_PIX_FMT_YUV422P12LE,
+ AV_PIX_FMT_YUV440P12LE, AV_PIX_FMT_YUV444P12LE,
AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
+ AV_PIX_FMT_YUVA420P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA444P10LE,
AV_PIX_FMT_GRAY8, FULL_SCALE_YUVJ_FORMATS,
AV_PIX_FMT_NONE
};
@@ -90,7 +95,7 @@ static int query_formats(AVFilterContext *ctx)
}
static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref)
+ ptrdiff_t mref, ptrdiff_t pref, int clip_max)
{
const uint8_t *srcp_above = srcp + mref;
const uint8_t *srcp_below = srcp + pref;
@@ -103,8 +108,26 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
}
}
+static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t width, const uint8_t *src8,
+ ptrdiff_t mref, ptrdiff_t pref, int clip_max)
+{
+ uint16_t *dstp = (uint16_t *)dst8;
+ const uint16_t *srcp = (const uint16_t *)src8;
+ const uint16_t *srcp_above = srcp + mref / 2;
+ const uint16_t *srcp_below = srcp + pref / 2;
+ int i, src_x;
+ for (i = 0; i < width; i++) {
+ // this calculation is an integer representation of
+ // '0.5 * current + 0.25 * above + 0.25 * below'
+ // '1 +' is for rounding.
+ src_x = av_le2ne16(srcp[i]) << 1;
+ dstp[i] = av_le2ne16((1 + src_x + av_le2ne16(srcp_above[i])
+ + av_le2ne16(srcp_below[i])) >> 2);
+ }
+}
+
static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref)
+ ptrdiff_t mref, ptrdiff_t pref, int clip_max)
{
const uint8_t *srcp_above = srcp + mref;
const uint8_t *srcp_below = srcp + pref;
@@ -130,6 +153,41 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t
}
}
+static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t width, const uint8_t *src8,
+ ptrdiff_t mref, ptrdiff_t pref, int clip_max)
+{
+ uint16_t *dstp = (uint16_t *)dst8;
+ const uint16_t *srcp = (const uint16_t *)src8;
+ const uint16_t *srcp_above = srcp + mref / 2;
+ const uint16_t *srcp_below = srcp + pref / 2;
+ const uint16_t *srcp_above2 = srcp + mref;
+ const uint16_t *srcp_below2 = srcp + pref;
+ int i, dst_le, src_le, src_x, src_ab;
+ for (i = 0; i < width; i++) {
+ // this calculation is an integer representation of
+ // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
+ // '4 +' is for rounding.
+ src_le = av_le2ne16(srcp[i]);
+ src_x = src_le << 1;
+ src_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
+ dst_le = av_clip((4 + ((src_le + src_x + src_ab) << 1)
+ - av_le2ne16(srcp_above2[i])
+ - av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max);
+ // Prevent over-sharpening:
+ // dst must not exceed src when the average of above and below
+ // is less than src. And the other way around.
+ if (src_ab > src_x) {
+ if (dst_le < src_le)
+ dstp[i] = av_le2ne16(src_le);
+ else
+ dstp[i] = av_le2ne16(dst_le);
+ } else if (dst_le > src_le) {
+ dstp[i] = av_le2ne16(src_le);
+ } else
+ dstp[i] = av_le2ne16(dst_le);
+ }
+}
+
static av_cold void uninit(AVFilterContext *ctx)
{
TInterlaceContext *tinterlace = ctx->priv;
@@ -198,12 +256,19 @@ static int config_out_props(AVFilterLink *outlink)
(tinterlace->flags & TINTERLACE_FLAG_EXACT_TB))
outlink->time_base = tinterlace->preout_time_base;
+ tinterlace->csp = av_pix_fmt_desc_get(outlink->format);
if (tinterlace->flags & TINTERLACE_FLAG_CVLPF) {
- tinterlace->lowpass_line = lowpass_line_complex_c;
+ if (tinterlace->csp->comp[0].depth > 8)
+ tinterlace->lowpass_line = lowpass_line_complex_c_16;
+ else
+ tinterlace->lowpass_line = lowpass_line_complex_c;
if (ARCH_X86)
ff_tinterlace_init_x86(tinterlace);
} else if (tinterlace->flags & TINTERLACE_FLAG_VLPF) {
- tinterlace->lowpass_line = lowpass_line_c;
+ if (tinterlace->csp->comp[0].depth > 8)
+ tinterlace->lowpass_line = lowpass_line_c_16;
+ else
+ tinterlace->lowpass_line = lowpass_line_c;
if (ARCH_X86)
ff_tinterlace_init_x86(tinterlace);
}
@@ -250,6 +315,7 @@ void copy_picture_field(TInterlaceContext *tinterlace,
const uint8_t *srcp = src[plane];
int srcp_linesize = src_linesize[plane] * k;
int dstp_linesize = dst_linesize[plane] * (interleave ? 2 : 1);
+ int clip_max = (1 << tinterlace->csp->comp[plane].depth) - 1;
lines = (lines + (src_field == FIELD_UPPER)) / k;
if (src_field == FIELD_LOWER)
@@ -267,11 +333,13 @@ void copy_picture_field(TInterlaceContext *tinterlace,
if (h >= (lines - x)) mref = 0; // there is no line above
else if (h <= (1 + x)) pref = 0; // there is no line below
- tinterlace->lowpass_line(dstp, cols, srcp, mref, pref);
+ tinterlace->lowpass_line(dstp, cols, srcp, mref, pref, clip_max);
dstp += dstp_linesize;
srcp += srcp_linesize;
}
} else {
+ if (tinterlace->csp->comp[plane].depth > 8)
+ cols *= 2;
av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines);
}
}
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index d0fffd293b..7c0065d4d9 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -30,27 +30,26 @@ pw_4: times 8 dw 4
SECTION .text
-%macro LOWPASS_LINE 0
-cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
+%macro LOWPASS 1
add dstq, hq
add srcq, hq
add mrefq, srcq
add prefq, srcq
neg hq
- pcmpeqb m6, m6
+ pcmpeq%1 m6, m6
.loop:
mova m0, [mrefq+hq]
mova m1, [mrefq+hq+mmsize]
- pavgb m0, [prefq+hq]
- pavgb m1, [prefq+hq+mmsize]
+ pavg%1 m0, [prefq+hq]
+ pavg%1 m1, [prefq+hq+mmsize]
pxor m0, m6
pxor m1, m6
pxor m2, m6, [srcq+hq]
pxor m3, m6, [srcq+hq+mmsize]
- pavgb m0, m2
- pavgb m1, m3
+ pavg%1 m0, m2
+ pavg%1 m1, m3
pxor m0, m6
pxor m1, m6
mova [dstq+hq], m0
@@ -59,7 +58,15 @@ cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
add hq, 2*mmsize
jl .loop
REP_RET
+%endmacro
+
+%macro LOWPASS_LINE 0
+cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
+ LOWPASS b
+cglobal lowpass_line_16, 5, 5, 7, dst, h, src, mref, pref
+ shl hq, 1
+ LOWPASS w
%endmacro
%macro LOWPASS_LINE_COMPLEX 0
@@ -124,6 +131,65 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
jg .loop
REP_RET
+cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
+ movd m7, DWORD clip_maxm
+ SPLATW m7, m7, 0
+ mova [rsp], m7
+.loop:
+ mova m0, [srcq+mrefq]
+ mova m1, [srcq+mrefq+mmsize]
+ mova m2, [srcq+prefq]
+ mova m3, [srcq+prefq+mmsize]
+ paddw m0, m2
+ paddw m1, m3
+ mova m6, m0
+ mova m7, m1
+ mova m2, [srcq]
+ mova m3, [srcq+mmsize]
+ paddw m0, m2
+ paddw m1, m3
+ psllw m2, 1
+ psllw m3, 1
+ paddw m0, m2
+ paddw m1, m3
+ psllw m0, 1
+ psllw m1, 1
+ pcmpgtw m6, m2
+ pcmpgtw m7, m3
+ mova m2, [srcq+2*mrefq]
+ mova m3, [srcq+2*mrefq+mmsize]
+ mova m4, [srcq+2*prefq]
+ mova m5, [srcq+2*prefq+mmsize]
+ paddw m2, m4
+ paddw m3, m5
+ paddw m0, [pw_4]
+ paddw m1, [pw_4]
+ psubusw m0, m2
+ psubusw m1, m3
+ psrlw m0, 3
+ psrlw m1, 3
+ pminsw m0, [rsp]
+ pminsw m1, [rsp]
+ mova m2, m0
+ mova m3, m1
+ pmaxsw m0, [srcq]
+ pmaxsw m1, [srcq+mmsize]
+ pminsw m2, [srcq]
+ pminsw m3, [srcq+mmsize]
+ pand m0, m6
+ pand m1, m7
+ pandn m6, m2
+ pandn m7, m3
+ por m0, m6
+ por m1, m7
+ mova [dstq], m0
+ mova [dstq+mmsize], m1
+
+ add dstq, 2*mmsize
+ add srcq, 2*mmsize
+ sub hd, mmsize
+ jg .loop
+REP_RET
%endmacro
INIT_XMM sse2
diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c
index c0f04dcd97..70fe86ccff 100644
--- a/libavfilter/x86/vf_interlace_init.c
+++ b/libavfilter/x86/vf_interlace_init.c
@@ -27,27 +27,50 @@
#include "libavfilter/interlace.h"
void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+
+void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+
+void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
av_cold void ff_interlace_init_x86(InterlaceContext *s)
{
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_SSE2(cpu_flags)) {
- if (s->lowpass == VLPF_LIN)
- s->lowpass_line = ff_lowpass_line_sse2;
- else if (s->lowpass == VLPF_CMP)
- s->lowpass_line = ff_lowpass_line_complex_sse2;
+ if (s->csp->comp[0].depth > 8) {
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_16_sse2;
+ else if (s->lowpass == VLPF_CMP)
+ s->lowpass_line = ff_lowpass_line_complex_12_sse2;
+ }
+ if (EXTERNAL_AVX(cpu_flags))
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_16_avx;
+ } else {
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_sse2;
+ else if (s->lowpass == VLPF_CMP)
+ s->lowpass_line = ff_lowpass_line_complex_sse2;
+ }
+ if (EXTERNAL_AVX(cpu_flags))
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_avx;
}
- if (EXTERNAL_AVX(cpu_flags))
- if (s->lowpass == VLPF_LIN)
- s->lowpass_line = ff_lowpass_line_avx;
}
diff --git a/libavfilter/x86/vf_tinterlace_init.c b/libavfilter/x86/vf_tinterlace_init.c
index 2b10e1b74c..209812964d 100644
--- a/libavfilter/x86/vf_tinterlace_init.c
+++ b/libavfilter/x86/vf_tinterlace_init.c
@@ -28,27 +28,50 @@
#include "libavfilter/tinterlace.h"
void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+
+void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
- const uint8_t *srcp,
- ptrdiff_t mref, ptrdiff_t pref);
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
+
+void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
{
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_SSE2(cpu_flags)) {
- if (!(s->flags & TINTERLACE_FLAG_CVLPF))
- s->lowpass_line = ff_lowpass_line_sse2;
- else
- s->lowpass_line = ff_lowpass_line_complex_sse2;
+ if (s->csp->comp[0].depth > 8) {
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF))
+ s->lowpass_line = ff_lowpass_line_16_sse2;
+ else
+ s->lowpass_line = ff_lowpass_line_complex_12_sse2;
+ }
+ if (EXTERNAL_AVX(cpu_flags))
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF))
+ s->lowpass_line = ff_lowpass_line_16_avx;
+ } else {
+ if (EXTERNAL_SSE2(cpu_flags)) {
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF))
+ s->lowpass_line = ff_lowpass_line_sse2;
+ else
+ s->lowpass_line = ff_lowpass_line_complex_sse2;
+ }
+ if (EXTERNAL_AVX(cpu_flags))
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF))
+ s->lowpass_line = ff_lowpass_line_avx;
}
- if (EXTERNAL_AVX(cpu_flags))
- if (!(s->flags & TINTERLACE_FLAG_CVLPF))
- s->lowpass_line = ff_lowpass_line_avx;
}