diff options
Diffstat (limited to 'libavfilter/libmpcodecs')
-rw-r--r-- | libavfilter/libmpcodecs/av_helpers.h | 27 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/cpudetect.h | 60 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/img_format.c | 244 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/img_format.h | 309 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/libvo/fastmemcpy.h | 99 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/libvo/video_out.h | 300 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/mp_image.c | 257 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/mp_image.h | 159 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/mp_msg.h | 166 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/mpc_info.h | 43 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/vf.h | 169 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/vf_eq.c | 240 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/vf_eq2.c | 519 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/vf_fspp.c | 2125 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/vf_ilpack.c | 458 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/vf_pp7.c | 491 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/vf_softpulldown.c | 163 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/vf_uspp.c | 394 | ||||
-rw-r--r-- | libavfilter/libmpcodecs/vfcap.h | 56 |
19 files changed, 6279 insertions, 0 deletions
diff --git a/libavfilter/libmpcodecs/av_helpers.h b/libavfilter/libmpcodecs/av_helpers.h new file mode 100644 index 0000000000..90b67d5a0f --- /dev/null +++ b/libavfilter/libmpcodecs/av_helpers.h @@ -0,0 +1,27 @@ +/* + * Generic libav* helpers + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_AV_HELPERS_H +#define MPLAYER_AV_HELPERS_H + +void ff_init_avcodec(void); +void ff_init_avformat(void); + +#endif /* MPLAYER_AV_HELPERS_H */ diff --git a/libavfilter/libmpcodecs/cpudetect.h b/libavfilter/libmpcodecs/cpudetect.h new file mode 100644 index 0000000000..710f6e6513 --- /dev/null +++ b/libavfilter/libmpcodecs/cpudetect.h @@ -0,0 +1,60 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_CPUDETECT_H +#define MPLAYER_CPUDETECT_H + +#define CPUTYPE_I386 3 +#define CPUTYPE_I486 4 +#define CPUTYPE_I586 5 +#define CPUTYPE_I686 6 + +#include "libavutil/x86_cpu.h" + +typedef struct cpucaps_s { + int cpuType; + int cpuModel; + int cpuStepping; + int hasMMX; + int hasMMX2; + int has3DNow; + int has3DNowExt; + int hasSSE; + int hasSSE2; + int hasSSE3; + int hasSSSE3; + int hasSSE4; + int hasSSE42; + int hasSSE4a; + int hasAVX; + int isX86; + unsigned cl_size; /* size of cache line */ + int hasAltiVec; + int hasTSC; +} CpuCaps; + +extern CpuCaps ff_gCpuCaps; + +void ff_do_cpuid(unsigned int ax, unsigned int *p); + +void ff_GetCpuCaps(CpuCaps *caps); + +/* returned value is malloc()'ed so free() it after use */ +char *ff_GetCpuFriendlyName(unsigned int regs[], unsigned int regs2[]); + +#endif /* MPLAYER_CPUDETECT_H */ diff --git a/libavfilter/libmpcodecs/img_format.c b/libavfilter/libmpcodecs/img_format.c new file mode 100644 index 0000000000..dd07f00a0a --- /dev/null +++ b/libavfilter/libmpcodecs/img_format.c @@ -0,0 +1,244 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" +#include "img_format.h" +#include "stdio.h" +#include "libavutil/bswap.h" + +const char *ff_vo_format_name(int format) +{ + static char unknown_format[20]; + switch(format) + { + case IMGFMT_RGB1: return "RGB 1-bit"; + case IMGFMT_RGB4: return "RGB 4-bit"; + case IMGFMT_RG4B: return "RGB 4-bit per byte"; + case IMGFMT_RGB8: return "RGB 8-bit"; + case IMGFMT_RGB12: return "RGB 12-bit"; + case IMGFMT_RGB15: return "RGB 15-bit"; + case IMGFMT_RGB16: return "RGB 16-bit"; + case IMGFMT_RGB24: return "RGB 24-bit"; +// case IMGFMT_RGB32: return "RGB 32-bit"; + case IMGFMT_RGB48LE: return "RGB 48-bit LE"; + case IMGFMT_RGB48BE: return "RGB 48-bit BE"; + case IMGFMT_RGB64LE: return "RGB 64-bit LE"; + case IMGFMT_RGB64BE: return "RGB 64-bit BE"; + case IMGFMT_BGR1: return "BGR 1-bit"; + case IMGFMT_BGR4: return "BGR 4-bit"; + case IMGFMT_BG4B: return "BGR 4-bit per byte"; + case IMGFMT_BGR8: return "BGR 8-bit"; + case IMGFMT_BGR12: return "BGR 12-bit"; + case IMGFMT_BGR15: return "BGR 15-bit"; + case IMGFMT_BGR16: return "BGR 16-bit"; + case IMGFMT_BGR24: return "BGR 24-bit"; +// case IMGFMT_BGR32: return "BGR 32-bit"; + case IMGFMT_ABGR: return "ABGR"; + case IMGFMT_BGRA: return "BGRA"; + case IMGFMT_ARGB: return "ARGB"; + case IMGFMT_RGBA: return "RGBA"; + case IMGFMT_XYZ12LE: return "XYZ 36-bit LE"; + case IMGFMT_XYZ12BE: return "XYZ 36-bit BE"; + case IMGFMT_GBR24P: return "Planar GBR 24-bit"; + case IMGFMT_GBR12P: return "Planar GBR 36-bit"; + case IMGFMT_GBR14P: return "Planar GBR 42-bit"; + case IMGFMT_YVU9: return "Planar YVU9"; + case IMGFMT_IF09: return "Planar IF09"; + case IMGFMT_YV12: return "Planar YV12"; + case IMGFMT_I420: return "Planar I420"; + case IMGFMT_IYUV: return "Planar IYUV"; + case IMGFMT_CLPL: return "Planar CLPL"; + case IMGFMT_Y800: return "Planar Y800"; + case IMGFMT_Y8: return "Planar Y8"; + case IMGFMT_Y8A: return "Planar Y8 with alpha"; + case IMGFMT_Y16_LE: return "Planar Y16 little-endian"; + case IMGFMT_Y16_BE: return "Planar Y16 big-endian"; + case IMGFMT_420P16_LE: return "Planar 420P 16-bit little-endian"; + case IMGFMT_420P16_BE: return "Planar 420P 16-bit big-endian"; + case IMGFMT_420P14_LE: return "Planar 420P 14-bit little-endian"; + case IMGFMT_420P14_BE: return "Planar 420P 14-bit big-endian"; + case IMGFMT_420P12_LE: return "Planar 420P 12-bit little-endian"; + case IMGFMT_420P12_BE: return "Planar 420P 12-bit big-endian"; + case IMGFMT_420P10_LE: return "Planar 420P 10-bit little-endian"; + case IMGFMT_420P10_BE: return "Planar 420P 10-bit big-endian"; + case IMGFMT_420P9_LE: return "Planar 420P 9-bit little-endian"; + case IMGFMT_420P9_BE: return "Planar 420P 9-bit big-endian"; + case IMGFMT_422P16_LE: return "Planar 422P 16-bit little-endian"; + case IMGFMT_422P16_BE: return "Planar 422P 16-bit big-endian"; + case IMGFMT_422P14_LE: return "Planar 422P 14-bit little-endian"; + case IMGFMT_422P14_BE: return "Planar 422P 14-bit big-endian"; + case IMGFMT_422P12_LE: return "Planar 422P 12-bit little-endian"; + case IMGFMT_422P12_BE: return "Planar 422P 12-bit big-endian"; + case IMGFMT_422P10_LE: return "Planar 422P 10-bit little-endian"; + case IMGFMT_422P10_BE: return "Planar 422P 10-bit big-endian"; + case IMGFMT_422P9_LE: return "Planar 422P 9-bit little-endian"; + case IMGFMT_422P9_BE: return "Planar 422P 9-bit big-endian"; + case IMGFMT_444P16_LE: return "Planar 444P 16-bit little-endian"; + case IMGFMT_444P16_BE: return "Planar 444P 16-bit big-endian"; + case IMGFMT_444P14_LE: return "Planar 444P 14-bit little-endian"; + case IMGFMT_444P14_BE: return "Planar 444P 14-bit big-endian"; + case IMGFMT_444P12_LE: return "Planar 444P 12-bit little-endian"; + case IMGFMT_444P12_BE: return "Planar 444P 12-bit big-endian"; + case IMGFMT_444P10_LE: return "Planar 444P 10-bit little-endian"; + case IMGFMT_444P10_BE: return "Planar 444P 10-bit big-endian"; + case IMGFMT_444P9_LE: return "Planar 444P 9-bit little-endian"; + case IMGFMT_444P9_BE: return "Planar 444P 9-bit big-endian"; + case IMGFMT_420A: return "Planar 420P with alpha"; + case IMGFMT_444P: return "Planar 444P"; + case IMGFMT_444A: return "Planar 444P with alpha"; + case IMGFMT_422P: return "Planar 422P"; + case IMGFMT_422A: return "Planar 422P with alpha"; + case IMGFMT_411P: return "Planar 411P"; + case IMGFMT_440P: return "Planar 440P"; + case IMGFMT_NV12: return "Planar NV12"; + case IMGFMT_NV21: return "Planar NV21"; + case IMGFMT_HM12: return "Planar NV12 Macroblock"; + case IMGFMT_IUYV: return "Packed IUYV"; + case IMGFMT_IY41: return "Packed IY41"; + case IMGFMT_IYU1: return "Packed IYU1"; + case IMGFMT_IYU2: return "Packed IYU2"; + case IMGFMT_UYVY: return "Packed UYVY"; + case IMGFMT_UYNV: return "Packed UYNV"; + case IMGFMT_cyuv: return "Packed CYUV"; + case IMGFMT_Y422: return "Packed Y422"; + case IMGFMT_YUY2: return "Packed YUY2"; + case IMGFMT_YUNV: return "Packed YUNV"; + case IMGFMT_YVYU: return "Packed YVYU"; + case IMGFMT_Y41P: return "Packed Y41P"; + case IMGFMT_Y211: return "Packed Y211"; + case IMGFMT_Y41T: return "Packed Y41T"; + case IMGFMT_Y42T: return "Packed Y42T"; + case IMGFMT_V422: return "Packed V422"; + case IMGFMT_V655: return "Packed V655"; + case IMGFMT_CLJR: return "Packed CLJR"; + case IMGFMT_YUVP: return "Packed YUVP"; + case IMGFMT_UYVP: return "Packed UYVP"; + case IMGFMT_MPEGPES: return "Mpeg PES"; + case IMGFMT_ZRMJPEGNI: return "Zoran MJPEG non-interlaced"; + case IMGFMT_ZRMJPEGIT: return "Zoran MJPEG top field first"; + case IMGFMT_ZRMJPEGIB: return "Zoran MJPEG bottom field first"; + case IMGFMT_XVMC_MOCO_MPEG2: return "MPEG1/2 Motion Compensation"; + case IMGFMT_XVMC_IDCT_MPEG2: return "MPEG1/2 Motion Compensation and IDCT"; + case IMGFMT_VDPAU_MPEG1: return "MPEG1 VDPAU acceleration"; + case IMGFMT_VDPAU_MPEG2: return "MPEG2 VDPAU acceleration"; + case IMGFMT_VDPAU_H264: return "H.264 VDPAU acceleration"; + case IMGFMT_VDPAU_MPEG4: return "MPEG-4 Part 2 VDPAU acceleration"; + case IMGFMT_VDPAU_WMV3: return "WMV3 VDPAU acceleration"; + case IMGFMT_VDPAU_VC1: return "VC1 VDPAU acceleration"; + } + snprintf(unknown_format,20,"Unknown 0x%04x",format); + return unknown_format; +} + +int ff_mp_get_chroma_shift(int format, int *x_shift, int *y_shift, int *component_bits) +{ + int xs = 0, ys = 0; + int bpp; + int err = 0; + int bits = 8; + if ((format & 0xff0000f0) == 0x34000050) + format = av_bswap32(format); + if ((format & 0xf00000ff) == 0x50000034) { + switch (format >> 24) { + case 0x50: + break; + case 0x51: + bits = 16; + break; + case 0x52: + bits = 10; + break; + case 0x53: + bits = 9; + break; + default: + err = 1; + break; + } + switch (format & 0x00ffffff) { + case 0x00343434: // 444 + xs = 0; + ys = 0; + break; + case 0x00323234: // 422 + xs = 1; + ys = 0; + break; + case 0x00303234: // 420 + xs = 1; + ys = 1; + break; + case 0x00313134: // 411 + xs = 2; + ys = 0; + break; + case 0x00303434: // 440 + xs = 0; + ys = 1; + break; + default: + err = 1; + break; + } + } else switch (format) { + case IMGFMT_444A: + xs = 0; + ys = 0; + break; + case IMGFMT_422A: + xs = 1; + ys = 0; + break; + case IMGFMT_420A: + case IMGFMT_I420: + case IMGFMT_IYUV: + case IMGFMT_YV12: + xs = 1; + ys = 1; + break; + case IMGFMT_IF09: + case IMGFMT_YVU9: + xs = 2; + ys = 2; + break; + case IMGFMT_Y8: + case IMGFMT_Y800: + xs = 31; + ys = 31; + break; + case IMGFMT_NV12: + case IMGFMT_NV21: + xs = 1; + ys = 1; + // TODO: allowing this though currently breaks + // things all over the place. + err = 1; + break; + default: + err = 1; + break; + } + if (x_shift) *x_shift = xs; + if (y_shift) *y_shift = ys; + if (component_bits) *component_bits = bits; + bpp = 8 + ((16 >> xs) >> ys); + if (format == IMGFMT_420A || format == IMGFMT_422A || format == IMGFMT_444A) + bpp += 8; + bpp *= (bits + 7) >> 3; + return err ? 0 : bpp; +} diff --git a/libavfilter/libmpcodecs/img_format.h b/libavfilter/libmpcodecs/img_format.h new file mode 100644 index 0000000000..b5c0b9007f --- /dev/null +++ b/libavfilter/libmpcodecs/img_format.h @@ -0,0 +1,309 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_IMG_FORMAT_H +#define MPLAYER_IMG_FORMAT_H + +#include "config.h" + +/* RGB/BGR Formats */ + +#define IMGFMT_RGB_MASK 0xFFFFFF00 +#define IMGFMT_RGB (('R'<<24)|('G'<<16)|('B'<<8)) +#define IMGFMT_RGB1 (IMGFMT_RGB|1) +#define IMGFMT_RGB4 (IMGFMT_RGB|4) +#define IMGFMT_RGB4_CHAR (IMGFMT_RGB|4|128) // RGB4 with 1 pixel per byte +#define IMGFMT_RGB8 (IMGFMT_RGB|8) +#define IMGFMT_RGB12 (IMGFMT_RGB|12) +#define IMGFMT_RGB15 (IMGFMT_RGB|15) +#define IMGFMT_RGB16 (IMGFMT_RGB|16) +#define IMGFMT_RGB24 (IMGFMT_RGB|24) +#define IMGFMT_RGB32 (IMGFMT_RGB|32) +#define IMGFMT_RGB48LE (IMGFMT_RGB|48) +#define IMGFMT_RGB48BE (IMGFMT_RGB|48|128) +#define IMGFMT_RGB64LE (IMGFMT_RGB|64) +#define IMGFMT_RGB64BE (IMGFMT_RGB|64|128) + +#define IMGFMT_BGR_MASK 0xFFFFFF00 +#define IMGFMT_BGR (('B'<<24)|('G'<<16)|('R'<<8)) +#define IMGFMT_BGR1 (IMGFMT_BGR|1) +#define IMGFMT_BGR4 (IMGFMT_BGR|4) +#define IMGFMT_BGR4_CHAR (IMGFMT_BGR|4|128) // BGR4 with 1 pixel per byte +#define IMGFMT_BGR8 (IMGFMT_BGR|8) +#define IMGFMT_BGR12 (IMGFMT_BGR|12) +#define IMGFMT_BGR15 (IMGFMT_BGR|15) +#define IMGFMT_BGR16 (IMGFMT_BGR|16) +#define IMGFMT_BGR24 (IMGFMT_BGR|24) +#define IMGFMT_BGR32 (IMGFMT_BGR|32) + +#define IMGFMT_XYZ_MASK 0xFFFFFF00 +#define IMGFMT_XYZ (('X'<<24)|('Y'<<16)|('Z'<<8)) +#define IMGFMT_XYZ12LE (IMGFMT_XYZ|12) +#define IMGFMT_XYZ12BE (IMGFMT_XYZ|12|128) + +#define IMGFMT_GBR24P (('G'<<24)|('B'<<16)|('R'<<8)|24) +#define IMGFMT_GBR12PLE (('G'<<24)|('B'<<16)|('R'<<8)|36) +#define IMGFMT_GBR12PBE (('G'<<24)|('B'<<16)|('R'<<8)|36|128) +#define IMGFMT_GBR14PLE (('G'<<24)|('B'<<16)|('R'<<8)|42) +#define IMGFMT_GBR14PBE (('G'<<24)|('B'<<16)|('R'<<8)|42|128) + +#if HAVE_BIGENDIAN +#define IMGFMT_ABGR IMGFMT_RGB32 +#define IMGFMT_BGRA (IMGFMT_RGB32|128) +#define IMGFMT_ARGB IMGFMT_BGR32 +#define IMGFMT_RGBA (IMGFMT_BGR32|128) +#define IMGFMT_RGB64NE IMGFMT_RGB64BE +#define IMGFMT_RGB48NE IMGFMT_RGB48BE +#define IMGFMT_RGB12BE IMGFMT_RGB12 +#define IMGFMT_RGB12LE (IMGFMT_RGB12|128) +#define IMGFMT_RGB15BE IMGFMT_RGB15 +#define IMGFMT_RGB15LE (IMGFMT_RGB15|128) +#define IMGFMT_RGB16BE IMGFMT_RGB16 +#define IMGFMT_RGB16LE (IMGFMT_RGB16|128) +#define IMGFMT_BGR12BE IMGFMT_BGR12 +#define IMGFMT_BGR12LE (IMGFMT_BGR12|128) +#define IMGFMT_BGR15BE IMGFMT_BGR15 +#define IMGFMT_BGR15LE (IMGFMT_BGR15|128) +#define IMGFMT_BGR16BE IMGFMT_BGR16 +#define IMGFMT_BGR16LE (IMGFMT_BGR16|128) +#define IMGFMT_XYZ12 IMGFMT_XYZ12BE +#define IMGFMT_GBR12P IMGFMT_GBR12PBE +#define IMGFMT_GBR14P IMGFMT_GBR14PBE +#else +#define IMGFMT_ABGR (IMGFMT_BGR32|128) +#define IMGFMT_BGRA IMGFMT_BGR32 +#define IMGFMT_ARGB (IMGFMT_RGB32|128) +#define IMGFMT_RGBA IMGFMT_RGB32 +#define IMGFMT_RGB64NE IMGFMT_RGB64LE +#define IMGFMT_RGB48NE IMGFMT_RGB48LE +#define IMGFMT_RGB12BE (IMGFMT_RGB12|128) +#define IMGFMT_RGB12LE IMGFMT_RGB12 +#define IMGFMT_RGB15BE (IMGFMT_RGB15|128) +#define IMGFMT_RGB15LE IMGFMT_RGB15 +#define IMGFMT_RGB16BE (IMGFMT_RGB16|128) +#define IMGFMT_RGB16LE IMGFMT_RGB16 +#define IMGFMT_BGR12BE (IMGFMT_BGR12|128) +#define IMGFMT_BGR12LE IMGFMT_BGR12 +#define IMGFMT_BGR15BE (IMGFMT_BGR15|128) +#define IMGFMT_BGR15LE IMGFMT_BGR15 +#define IMGFMT_BGR16BE (IMGFMT_BGR16|128) +#define IMGFMT_BGR16LE IMGFMT_BGR16 +#define IMGFMT_XYZ12 IMGFMT_XYZ12LE +#define IMGFMT_GBR12P IMGFMT_GBR12PLE +#define IMGFMT_GBR14P IMGFMT_GBR14PLE +#endif + +/* old names for compatibility */ +#define IMGFMT_RG4B IMGFMT_RGB4_CHAR +#define IMGFMT_BG4B IMGFMT_BGR4_CHAR + +#define IMGFMT_IS_RGB(fmt) (((fmt)&IMGFMT_RGB_MASK)==IMGFMT_RGB) +#define IMGFMT_IS_BGR(fmt) (((fmt)&IMGFMT_BGR_MASK)==IMGFMT_BGR) +#define IMGFMT_IS_XYZ(fmt) (((fmt)&IMGFMT_XYZ_MASK)==IMGFMT_XYZ) + +#define IMGFMT_RGB_DEPTH(fmt) ((fmt)&0x7F) +#define IMGFMT_BGR_DEPTH(fmt) ((fmt)&0x7F) +#define IMGFMT_XYZ_DEPTH(fmt) ((fmt)&0x7F) + + +/* Planar YUV Formats */ + +#define IMGFMT_YVU9 0x39555659 +#define IMGFMT_IF09 0x39304649 +#define IMGFMT_YV12 0x32315659 +#define IMGFMT_I420 0x30323449 +#define IMGFMT_IYUV 0x56555949 +#define IMGFMT_CLPL 0x4C504C43 +#define IMGFMT_Y800 0x30303859 +#define IMGFMT_Y8 0x20203859 +#define IMGFMT_NV12 0x3231564E +#define IMGFMT_NV21 0x3132564E +#define IMGFMT_Y16_LE 0x20363159 + +/* unofficial Planar Formats, FIXME if official 4CC exists */ +#define IMGFMT_444P 0x50343434 +#define IMGFMT_422P 0x50323234 +#define IMGFMT_411P 0x50313134 +#define IMGFMT_440P 0x50303434 +#define IMGFMT_HM12 0x32314D48 +#define IMGFMT_Y16_BE 0x59313620 + +// Gray with alpha +#define IMGFMT_Y8A 0x59320008 +// 4:2:0 planar with alpha +#define IMGFMT_420A 0x41303234 +// 4:2:2 planar with alpha +#define IMGFMT_422A 0x41323234 +// 4:4:4 planar with alpha +#define IMGFMT_444A 0x41343434 + +#define IMGFMT_444P16_LE 0x51343434 +#define IMGFMT_444P16_BE 0x34343451 +#define IMGFMT_444P14_LE 0x54343434 +#define IMGFMT_444P14_BE 0x34343454 +#define IMGFMT_444P12_LE 0x55343434 +#define IMGFMT_444P12_BE 0x34343455 +#define IMGFMT_444P10_LE 0x52343434 +#define IMGFMT_444P10_BE 0x34343452 +#define IMGFMT_444P9_LE 0x53343434 +#define IMGFMT_444P9_BE 0x34343453 +#define IMGFMT_422P16_LE 0x51323234 +#define IMGFMT_422P16_BE 0x34323251 +#define IMGFMT_422P14_LE 0x54323234 +#define IMGFMT_422P14_BE 0x34323254 +#define IMGFMT_422P12_LE 0x55323234 +#define IMGFMT_422P12_BE 0x34323255 +#define IMGFMT_422P10_LE 0x52323234 +#define IMGFMT_422P10_BE 0x34323252 +#define IMGFMT_422P9_LE 0x53323234 +#define IMGFMT_422P9_BE 0x34323253 +#define IMGFMT_420P16_LE 0x51303234 +#define IMGFMT_420P16_BE 0x34323051 +#define IMGFMT_420P14_LE 0x54303234 +#define IMGFMT_420P14_BE 0x34323054 +#define IMGFMT_420P12_LE 0x55303234 +#define IMGFMT_420P12_BE 0x34323055 +#define IMGFMT_420P10_LE 0x52303234 +#define IMGFMT_420P10_BE 0x34323052 +#define IMGFMT_420P9_LE 0x53303234 +#define IMGFMT_420P9_BE 0x34323053 +#if HAVE_BIGENDIAN +#define IMGFMT_444P16 IMGFMT_444P16_BE +#define IMGFMT_444P14 IMGFMT_444P14_BE +#define IMGFMT_444P12 IMGFMT_444P12_BE +#define IMGFMT_444P10 IMGFMT_444P10_BE +#define IMGFMT_444P9 IMGFMT_444P9_BE +#define IMGFMT_422P16 IMGFMT_422P16_BE +#define IMGFMT_422P14 IMGFMT_422P14_BE +#define IMGFMT_422P12 IMGFMT_422P12_BE +#define IMGFMT_422P10 IMGFMT_422P10_BE +#define IMGFMT_422P9 IMGFMT_422P9_BE +#define IMGFMT_420P16 IMGFMT_420P16_BE +#define IMGFMT_420P14 IMGFMT_420P14_BE +#define IMGFMT_420P12 IMGFMT_420P12_BE +#define IMGFMT_420P10 IMGFMT_420P10_BE +#define IMGFMT_420P9 IMGFMT_420P9_BE +#define IMGFMT_Y16 IMGFMT_Y16_BE +#define IMGFMT_IS_YUVP16_NE(fmt) IMGFMT_IS_YUVP16_BE(fmt) +#else +#define IMGFMT_444P16 IMGFMT_444P16_LE +#define IMGFMT_444P14 IMGFMT_444P14_LE +#define IMGFMT_444P12 IMGFMT_444P12_LE +#define IMGFMT_444P10 IMGFMT_444P10_LE +#define IMGFMT_444P9 IMGFMT_444P9_LE +#define IMGFMT_422P16 IMGFMT_422P16_LE +#define IMGFMT_422P14 IMGFMT_422P14_LE +#define IMGFMT_422P12 IMGFMT_422P12_LE +#define IMGFMT_422P10 IMGFMT_422P10_LE +#define IMGFMT_422P9 IMGFMT_422P9_LE +#define IMGFMT_420P16 IMGFMT_420P16_LE +#define IMGFMT_420P14 IMGFMT_420P14_LE +#define IMGFMT_420P12 IMGFMT_420P12_LE +#define IMGFMT_420P10 IMGFMT_420P10_LE +#define IMGFMT_420P9 IMGFMT_420P9_LE +#define IMGFMT_Y16 IMGFMT_Y16_LE +#define IMGFMT_IS_YUVP16_NE(fmt) IMGFMT_IS_YUVP16_LE(fmt) +#endif + +#define IMGFMT_IS_YUVP16_LE(fmt) (((fmt - 0x51000034) & 0xfc0000ff) == 0) +#define IMGFMT_IS_YUVP16_BE(fmt) (((fmt - 0x34000051) & 0xff0000fc) == 0) +#define IMGFMT_IS_YUVP16(fmt) (IMGFMT_IS_YUVP16_LE(fmt) || IMGFMT_IS_YUVP16_BE(fmt)) + +/** + * \brief Find the corresponding full 16 bit format, i.e. IMGFMT_420P10_LE -> IMGFMT_420P16_LE + * \return normalized format ID or 0 if none exists. + */ +static inline int normalize_yuvp16(int fmt) { + if (IMGFMT_IS_YUVP16_LE(fmt)) + return (fmt & 0x00ffffff) | 0x51000000; + if (IMGFMT_IS_YUVP16_BE(fmt)) + return (fmt & 0xffffff00) | 0x00000051; + return 0; +} + +/* Packed YUV Formats */ + +#define IMGFMT_IUYV 0x56595549 // Interlaced UYVY +#define IMGFMT_IY41 0x31435949 // Interlaced Y41P +#define IMGFMT_IYU1 0x31555949 +#define IMGFMT_IYU2 0x32555949 +#define IMGFMT_UYVY 0x59565955 +#define IMGFMT_UYNV 0x564E5955 // Exactly same as UYVY +#define IMGFMT_cyuv 0x76757963 // upside-down UYVY +#define IMGFMT_Y422 0x32323459 // Exactly same as UYVY +#define IMGFMT_YUY2 0x32595559 +#define IMGFMT_YUNV 0x564E5559 // Exactly same as YUY2 +#define IMGFMT_YVYU 0x55595659 +#define IMGFMT_Y41P 0x50313459 +#define IMGFMT_Y211 0x31313259 +#define IMGFMT_Y41T 0x54313459 // Y41P, Y lsb = transparency +#define IMGFMT_Y42T 0x54323459 // UYVY, Y lsb = transparency +#define IMGFMT_V422 0x32323456 // upside-down UYVY? +#define IMGFMT_V655 0x35353656 +#define IMGFMT_CLJR 0x524A4C43 +#define IMGFMT_YUVP 0x50565559 // 10-bit YUYV +#define IMGFMT_UYVP 0x50565955 // 10-bit UYVY + +/* Compressed Formats */ +#define IMGFMT_MPEGPES (('M'<<24)|('P'<<16)|('E'<<8)|('S')) +#define IMGFMT_MJPEG (('M')|('J'<<8)|('P'<<16)|('G'<<24)) +/* Formats that are understood by zoran chips, we include + * non-interlaced, interlaced top-first, interlaced bottom-first */ +#define IMGFMT_ZRMJPEGNI (('Z'<<24)|('R'<<16)|('N'<<8)|('I')) +#define IMGFMT_ZRMJPEGIT (('Z'<<24)|('R'<<16)|('I'<<8)|('T')) +#define IMGFMT_ZRMJPEGIB (('Z'<<24)|('R'<<16)|('I'<<8)|('B')) + +// I think that this code could not be used by any other codec/format +#define IMGFMT_XVMC 0x1DC70000 +#define IMGFMT_XVMC_MASK 0xFFFF0000 +#define IMGFMT_IS_XVMC(fmt) (((fmt)&IMGFMT_XVMC_MASK)==IMGFMT_XVMC) +//these are chroma420 +#define IMGFMT_XVMC_MOCO_MPEG2 (IMGFMT_XVMC|0x02) +#define IMGFMT_XVMC_IDCT_MPEG2 (IMGFMT_XVMC|0x82) + +// VDPAU specific format. +#define IMGFMT_VDPAU 0x1DC80000 +#define IMGFMT_VDPAU_MASK 0xFFFF0000 +#define IMGFMT_IS_VDPAU(fmt) (((fmt)&IMGFMT_VDPAU_MASK)==IMGFMT_VDPAU) +#define IMGFMT_VDPAU_MPEG1 (IMGFMT_VDPAU|0x01) +#define IMGFMT_VDPAU_MPEG2 (IMGFMT_VDPAU|0x02) +#define IMGFMT_VDPAU_H264 (IMGFMT_VDPAU|0x03) +#define IMGFMT_VDPAU_WMV3 (IMGFMT_VDPAU|0x04) +#define IMGFMT_VDPAU_VC1 (IMGFMT_VDPAU|0x05) +#define IMGFMT_VDPAU_MPEG4 (IMGFMT_VDPAU|0x06) + +#define IMGFMT_IS_HWACCEL(fmt) (IMGFMT_IS_VDPAU(fmt) || IMGFMT_IS_XVMC(fmt)) + +typedef struct { + void* data; + int size; + int id; // stream id. usually 0x1E0 + int timestamp; // pts, 90000 Hz counter based +} vo_mpegpes_t; + +const char *ff_vo_format_name(int format); + +/** + * Calculates the scale shifts for the chroma planes for planar YUV + * + * \param component_bits bits per component + * \return bits-per-pixel for format if successful (i.e. format is 3 or 4-planes planar YUV), 0 otherwise + */ +int ff_mp_get_chroma_shift(int format, int *x_shift, int *y_shift, int *component_bits); + +#endif /* MPLAYER_IMG_FORMAT_H */ diff --git a/libavfilter/libmpcodecs/libvo/fastmemcpy.h b/libavfilter/libmpcodecs/libvo/fastmemcpy.h new file mode 100644 index 0000000000..5a17d0192a --- /dev/null +++ b/libavfilter/libmpcodecs/libvo/fastmemcpy.h @@ -0,0 +1,99 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with MPlayer; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef MPLAYER_FASTMEMCPY_H +#define MPLAYER_FASTMEMCPY_H + +#include <inttypes.h> +#include <string.h> +#include <stddef.h> + +void * fast_memcpy(void * to, const void * from, size_t len); +void * mem2agpcpy(void * to, const void * from, size_t len); + +#if ! defined(CONFIG_FASTMEMCPY) || ! (HAVE_MMX || HAVE_MMX2 || HAVE_AMD3DNOW /* || HAVE_SSE || HAVE_SSE2 */) +#define mem2agpcpy(a,b,c) memcpy(a,b,c) +#define fast_memcpy(a,b,c) memcpy(a,b,c) +#endif + +static inline void * mem2agpcpy_pic(void * dst, const void * src, int bytesPerLine, int height, int dstStride, int srcStride) +{ + int i; + void *retval=dst; + + if(dstStride == srcStride) + { + if (srcStride < 0) { + src = (const uint8_t*)src + (height-1)*srcStride; + dst = (uint8_t*)dst + (height-1)*dstStride; + srcStride = -srcStride; + } + + mem2agpcpy(dst, src, srcStride*height); + } + else + { + for(i=0; i<height; i++) + { + mem2agpcpy(dst, src, bytesPerLine); + src = (const uint8_t*)src + srcStride; + dst = (uint8_t*)dst + dstStride; + } + } + + return retval; +} + +#define memcpy_pic(d, s, b, h, ds, ss) memcpy_pic2(d, s, b, h, ds, ss, 0) +#define my_memcpy_pic(d, s, b, h, ds, ss) memcpy_pic2(d, s, b, h, ds, ss, 1) + +/** + * \param limit2width always skip data between end of line and start of next + * instead of copying the full block when strides are the same + */ +static inline void * memcpy_pic2(void * dst, const void * src, + int bytesPerLine, int height, + int dstStride, int srcStride, int limit2width) +{ + int i; + void *retval=dst; + + if(!limit2width && dstStride == srcStride) + { + if (srcStride < 0) { + src = (const uint8_t*)src + (height-1)*srcStride; + dst = (uint8_t*)dst + (height-1)*dstStride; + srcStride = -srcStride; + } + + fast_memcpy(dst, src, srcStride*height); + } + else + { + for(i=0; i<height; i++) + { + fast_memcpy(dst, src, bytesPerLine); + src = (const uint8_t*)src + srcStride; + dst = (uint8_t*)dst + dstStride; + } + } + + return retval; +} + +#endif /* MPLAYER_FASTMEMCPY_H */ diff --git a/libavfilter/libmpcodecs/libvo/video_out.h b/libavfilter/libmpcodecs/libvo/video_out.h new file mode 100644 index 0000000000..49d30987ff --- /dev/null +++ b/libavfilter/libmpcodecs/libvo/video_out.h @@ -0,0 +1,300 @@ +/* + * Copyright (C) Aaron Holtzman - Aug 1999 + * Strongly modified, most parts rewritten: A'rpi/ESP-team - 2000-2001 + * (C) MPlayer developers + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_VIDEO_OUT_H +#define MPLAYER_VIDEO_OUT_H + +#include <inttypes.h> +#include <stdarg.h> + +//#include "sub/font_load.h" +#include "../img_format.h" +//#include "vidix/vidix.h" + + +#define ROTATE(t, x, y) do { \ + t rot_tmp = x; \ + x = y; \ + y = -rot_tmp; \ +} while(0) + +#define VO_EVENT_EXPOSE 1 +#define VO_EVENT_RESIZE 2 +#define VO_EVENT_KEYPRESS 4 +#define VO_EVENT_REINIT 8 +#define VO_EVENT_MOVE 16 +#define VO_EVENT_MOUSE 32 + +/* Obsolete: VOCTRL_QUERY_VAA 1 */ +/* does the device support the required format */ +#define VOCTRL_QUERY_FORMAT 2 +/* signal a device reset seek */ +#define VOCTRL_RESET 3 +/* true if vo driver can use GUI created windows */ +#define VOCTRL_GUISUPPORT 4 +/* used to switch to fullscreen */ +#define VOCTRL_FULLSCREEN 5 +/* signal a device pause */ +#define VOCTRL_PAUSE 7 +/* start/resume playback */ +#define VOCTRL_RESUME 8 +/* libmpcodecs direct rendering: */ +#define VOCTRL_GET_IMAGE 9 +#define VOCTRL_DRAW_IMAGE 13 +#define VOCTRL_SET_SPU_PALETTE 14 +/* decoding ahead: */ +#define VOCTRL_GET_NUM_FRAMES 10 +#define VOCTRL_GET_FRAME_NUM 11 +#define VOCTRL_SET_FRAME_NUM 12 +#define VOCTRL_GET_PANSCAN 15 +#define VOCTRL_SET_PANSCAN 16 +/* equalizer controls */ +#define VOCTRL_SET_EQUALIZER 17 +#define VOCTRL_GET_EQUALIZER 18 +/* Frame duplication */ +#define VOCTRL_DUPLICATE_FRAME 20 +// ... 21 +#define VOCTRL_START_SLICE 21 + +#define VOCTRL_ONTOP 25 +#define VOCTRL_ROOTWIN 26 +#define VOCTRL_BORDER 27 +#define VOCTRL_DRAW_EOSD 28 +#define VOCTRL_GET_EOSD_RES 29 + +#define VOCTRL_SET_DEINTERLACE 30 +#define VOCTRL_GET_DEINTERLACE 31 + +#define VOCTRL_UPDATE_SCREENINFO 32 + +// Vo can be used by xover +#define VOCTRL_XOVERLAY_SUPPORT 22 + +#define VOCTRL_XOVERLAY_SET_COLORKEY 24 +typedef struct { + uint32_t x11; // The raw x11 color + uint16_t r,g,b; +} mp_colorkey_t; + +#define VOCTRL_XOVERLAY_SET_WIN 23 +typedef struct { + int x,y; + int w,h; +} mp_win_t; + +#define VO_TRUE 1 +#define VO_FALSE 0 +#define VO_ERROR -1 +#define VO_NOTAVAIL -2 +#define VO_NOTIMPL -3 + +#define VOFLAG_FULLSCREEN 0x01 +#define VOFLAG_MODESWITCHING 0x02 +#define VOFLAG_SWSCALE 0x04 +#define VOFLAG_FLIPPING 0x08 +#define VOFLAG_HIDDEN 0x10 //< Use to create a hidden window +#define VOFLAG_STEREO 0x20 //< Use to create a stereo-capable window +#define VOFLAG_DEPTH 0x40 //< Request a depth buffer +#define VOFLAG_XOVERLAY_SUB_VO 0x10000 + +typedef struct vo_info_s +{ + /* driver name ("Matrox Millennium G200/G400" */ + const char *name; + /* short name (for config strings) ("mga") */ + const char *short_name; + /* author ("Aaron Holtzman <aholtzma@ess.engr.uvic.ca>") */ + const char *author; + /* any additional comments */ + const char *comment; +} vo_info_t; + +typedef struct vo_functions_s +{ + const vo_info_t *info; + /* + * Preinitializes driver (real INITIALIZATION) + * arg - currently it's vo_subdevice + * returns: zero on successful initialization, non-zero on error. + */ + int (*preinit)(const char *arg); + /* + * Initialize (means CONFIGURE) the display driver. + * params: + * width,height: image source size + * d_width,d_height: size of the requested window size, just a hint + * fullscreen: flag, 0=windowd 1=fullscreen, just a hint + * title: window title, if available + * format: fourcc of pixel format + * returns : zero on successful initialization, non-zero on error. + */ + int (*config)(uint32_t width, uint32_t height, uint32_t d_width, + uint32_t d_height, uint32_t fullscreen, char *title, + uint32_t format); + + /* + * Control interface + */ + int (*control)(uint32_t request, void *data, ...); + + /* + * Display a new RGB/BGR frame of the video to the screen. + * params: + * src[0] - pointer to the image + */ + int (*draw_frame)(uint8_t *src[]); + + /* + * Draw a planar YUV slice to the buffer: + * params: + * src[3] = source image planes (Y,U,V) + * stride[3] = source image planes line widths (in bytes) + * w,h = width*height of area to be copied (in Y pixels) + * x,y = position at the destination image (in Y pixels) + */ + int (*draw_slice)(uint8_t *src[], int stride[], int w,int h, int x,int y); + + /* + * Draws OSD to the screen buffer + */ + void (*draw_osd)(void); + + /* + * Blit/Flip buffer to the screen. Must be called after each frame! + */ + void (*flip_page)(void); + + /* + * This func is called after every frames to handle keyboard and + * other events. It's called in PAUSE mode too! + */ + void (*check_events)(void); + + /* + * Closes driver. Should restore the original state of the system. + */ + void (*uninit)(void); +} vo_functions_t; + +const vo_functions_t* init_best_video_out(char** vo_list); +int config_video_out(const vo_functions_t *vo, uint32_t width, uint32_t height, + uint32_t d_width, uint32_t d_height, uint32_t flags, + char *title, uint32_t format); +void list_video_out(void); + +// NULL terminated array of all drivers +extern const vo_functions_t* const video_out_drivers[]; + +extern int vo_flags; + +extern int vo_config_count; + +extern int xinerama_screen; +extern int xinerama_x; +extern int xinerama_y; + +// correct resolution/bpp on screen: (should be autodetected by vo_init()) +extern int vo_depthonscreen; +extern int vo_screenwidth; +extern int vo_screenheight; + +// requested resolution/bpp: (-x -y -bpp options) +extern int vo_dx; +extern int vo_dy; +extern int vo_dwidth; +extern int vo_dheight; +extern int vo_dbpp; + +extern int vo_grabpointer; +extern int vo_doublebuffering; +extern int vo_directrendering; +extern int vo_vsync; +extern int vo_fsmode; +extern float vo_panscan; +extern float vo_border_pos_x; +extern float vo_border_pos_y; +extern int vo_rotate; +extern int vo_adapter_num; +extern int vo_refresh_rate; +extern int vo_keepaspect; +extern int vo_rootwin; +extern int vo_ontop; +extern int vo_border; + +extern int vo_gamma_gamma; +extern int vo_gamma_brightness; +extern int vo_gamma_saturation; +extern int vo_gamma_contrast; +extern int vo_gamma_hue; +extern int vo_gamma_red_intensity; +extern int vo_gamma_green_intensity; +extern int vo_gamma_blue_intensity; + +extern int vo_nomouse_input; +extern int enable_mouse_movements; + +extern int vo_pts; +extern float vo_fps; + +extern char *vo_subdevice; + +extern int vo_colorkey; + +extern char *vo_winname; +extern char *vo_wintitle; + +extern int64_t WinID; + +typedef struct { + float min; + float max; + } range_t; + +float range_max(range_t *r); +int in_range(range_t *r, float f); +range_t *str2range(char *s); +extern char *monitor_hfreq_str; +extern char *monitor_vfreq_str; +extern char *monitor_dotclock_str; + +struct mp_keymap { + int from; + int to; +}; +int lookup_keymap_table(const struct mp_keymap *map, int key); +struct vo_rect { + int left, right, top, bottom, width, height; +}; +void calc_src_dst_rects(int src_width, int src_height, struct vo_rect *src, struct vo_rect *dst, + struct vo_rect *borders, const struct vo_rect *crop); +void vo_mouse_movement(int posx, int posy); + +static inline int apply_border_pos(int full, int part, float pos) { + if (pos >= 0.0 && pos <= 1.0) { + return pos*(full - part); + } + if (pos < 0) + return pos * part; + return full - part + (pos - 1) * part; +} + +#endif /* MPLAYER_VIDEO_OUT_H */ diff --git a/libavfilter/libmpcodecs/mp_image.c b/libavfilter/libmpcodecs/mp_image.c new file mode 100644 index 0000000000..0e4d6d7591 --- /dev/null +++ b/libavfilter/libmpcodecs/mp_image.c @@ -0,0 +1,257 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if HAVE_MALLOC_H +#include <malloc.h> +#endif + +#include "img_format.h" +#include "mp_image.h" + +#include "libvo/fastmemcpy.h" +//#include "libavutil/mem.h" +#include "libavutil/imgutils.h" + +void ff_mp_image_alloc_planes(mp_image_t *mpi) { + uint32_t temp[256]; + if (avpriv_set_systematic_pal2(temp, ff_mp2ff_pix_fmt(mpi->imgfmt)) >= 0) + mpi->flags |= MP_IMGFLAG_RGB_PALETTE; + + // IF09 - allocate space for 4. plane delta info - unused + if (mpi->imgfmt == IMGFMT_IF09) { + mpi->planes[0]=av_malloc(mpi->bpp*mpi->width*(mpi->height+2)/8+ + mpi->chroma_width*mpi->chroma_height); + } else + mpi->planes[0]=av_malloc(mpi->bpp*mpi->width*(mpi->height+2)/8); + if (mpi->flags&MP_IMGFLAG_PLANAR) { + int bpp = IMGFMT_IS_YUVP16(mpi->imgfmt)? 2 : 1; + // YV12/I420/YVU9/IF09. feel free to add other planar formats here... + mpi->stride[0]=mpi->stride[3]=bpp*mpi->width; + if(mpi->num_planes > 2){ + mpi->stride[1]=mpi->stride[2]=bpp*mpi->chroma_width; + if(mpi->flags&MP_IMGFLAG_SWAPPED){ + // I420/IYUV (Y,U,V) + mpi->planes[1]=mpi->planes[0]+mpi->stride[0]*mpi->height; + mpi->planes[2]=mpi->planes[1]+mpi->stride[1]*mpi->chroma_height; + if (mpi->num_planes > 3) + mpi->planes[3]=mpi->planes[2]+mpi->stride[2]*mpi->chroma_height; + } else { + // YV12,YVU9,IF09 (Y,V,U) + mpi->planes[2]=mpi->planes[0]+mpi->stride[0]*mpi->height; + mpi->planes[1]=mpi->planes[2]+mpi->stride[1]*mpi->chroma_height; + if (mpi->num_planes > 3) + mpi->planes[3]=mpi->planes[1]+mpi->stride[1]*mpi->chroma_height; + } + } else { + // NV12/NV21 + mpi->stride[1]=mpi->chroma_width; + mpi->planes[1]=mpi->planes[0]+mpi->stride[0]*mpi->height; + } + } else { + mpi->stride[0]=mpi->width*mpi->bpp/8; + if (mpi->flags & MP_IMGFLAG_RGB_PALETTE) { + mpi->planes[1] = av_malloc(1024); + memcpy(mpi->planes[1], temp, 1024); + } + } + mpi->flags|=MP_IMGFLAG_ALLOCATED; +} + +mp_image_t* ff_alloc_mpi(int w, int h, unsigned long int fmt) { + mp_image_t* mpi = ff_new_mp_image(w,h); + + ff_mp_image_setfmt(mpi,fmt); + ff_mp_image_alloc_planes(mpi); + + return mpi; +} + +void ff_copy_mpi(mp_image_t *dmpi, mp_image_t *mpi) { + if(mpi->flags&MP_IMGFLAG_PLANAR){ + memcpy_pic(dmpi->planes[0],mpi->planes[0], mpi->w, mpi->h, + dmpi->stride[0],mpi->stride[0]); + memcpy_pic(dmpi->planes[1],mpi->planes[1], mpi->chroma_width, mpi->chroma_height, + dmpi->stride[1],mpi->stride[1]); + memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->chroma_width, mpi->chroma_height, + dmpi->stride[2],mpi->stride[2]); + } else { + memcpy_pic(dmpi->planes[0],mpi->planes[0], + mpi->w*(dmpi->bpp/8), mpi->h, + dmpi->stride[0],mpi->stride[0]); + } +} + +void ff_mp_image_setfmt(mp_image_t* mpi,unsigned int out_fmt){ + mpi->flags&=~(MP_IMGFLAG_PLANAR|MP_IMGFLAG_YUV|MP_IMGFLAG_SWAPPED); + mpi->imgfmt=out_fmt; + // compressed formats + if(out_fmt == IMGFMT_MPEGPES || + out_fmt == IMGFMT_ZRMJPEGNI || out_fmt == IMGFMT_ZRMJPEGIT || out_fmt == IMGFMT_ZRMJPEGIB || + IMGFMT_IS_HWACCEL(out_fmt)){ + mpi->bpp=0; + return; + } + mpi->num_planes=1; + if (IMGFMT_IS_RGB(out_fmt)) { + if (IMGFMT_RGB_DEPTH(out_fmt) < 8 && !(out_fmt&128)) + mpi->bpp = IMGFMT_RGB_DEPTH(out_fmt); + else + mpi->bpp=(IMGFMT_RGB_DEPTH(out_fmt)+7)&(~7); + return; + } + if (IMGFMT_IS_BGR(out_fmt)) { + if (IMGFMT_BGR_DEPTH(out_fmt) < 8 && !(out_fmt&128)) + mpi->bpp = IMGFMT_BGR_DEPTH(out_fmt); + else + mpi->bpp=(IMGFMT_BGR_DEPTH(out_fmt)+7)&(~7); + mpi->flags|=MP_IMGFLAG_SWAPPED; + return; + } + if (IMGFMT_IS_XYZ(out_fmt)) { + mpi->bpp=3*((IMGFMT_XYZ_DEPTH(out_fmt) + 7) & ~7); + return; + } + mpi->num_planes=3; + if (out_fmt == IMGFMT_GBR24P) { + mpi->bpp=24; + mpi->flags|=MP_IMGFLAG_PLANAR; + return; + } else if (out_fmt == IMGFMT_GBR12P) { + mpi->bpp=36; + mpi->flags|=MP_IMGFLAG_PLANAR; + return; + } else if (out_fmt == IMGFMT_GBR14P) { + mpi->bpp=42; + mpi->flags|=MP_IMGFLAG_PLANAR; + return; + } + mpi->flags|=MP_IMGFLAG_YUV; + if (ff_mp_get_chroma_shift(out_fmt, NULL, NULL, NULL)) { + mpi->flags|=MP_IMGFLAG_PLANAR; + mpi->bpp = ff_mp_get_chroma_shift(out_fmt, &mpi->chroma_x_shift, &mpi->chroma_y_shift, NULL); + mpi->chroma_width = mpi->width >> mpi->chroma_x_shift; + mpi->chroma_height = mpi->height >> mpi->chroma_y_shift; + } + switch(out_fmt){ + case IMGFMT_I420: + case IMGFMT_IYUV: + mpi->flags|=MP_IMGFLAG_SWAPPED; + case IMGFMT_YV12: + return; + case IMGFMT_420A: + case IMGFMT_422A: + case IMGFMT_444A: + case IMGFMT_IF09: + mpi->num_planes=4; + case IMGFMT_YVU9: + case IMGFMT_444P: + case IMGFMT_422P: + case IMGFMT_411P: + case IMGFMT_440P: + case IMGFMT_444P16_LE: + case IMGFMT_444P16_BE: + case IMGFMT_444P14_LE: + case IMGFMT_444P14_BE: + case IMGFMT_444P12_LE: + case IMGFMT_444P12_BE: + case IMGFMT_444P10_LE: + case IMGFMT_444P10_BE: + case IMGFMT_444P9_LE: + case IMGFMT_444P9_BE: + case IMGFMT_422P16_LE: + case IMGFMT_422P16_BE: + case IMGFMT_422P14_LE: + case IMGFMT_422P14_BE: + case IMGFMT_422P12_LE: + case IMGFMT_422P12_BE: + case IMGFMT_422P10_LE: + case IMGFMT_422P10_BE: + case IMGFMT_422P9_LE: + case IMGFMT_422P9_BE: + case IMGFMT_420P16_LE: + case IMGFMT_420P16_BE: + case IMGFMT_420P14_LE: + case IMGFMT_420P14_BE: + case IMGFMT_420P12_LE: + case IMGFMT_420P12_BE: + case IMGFMT_420P10_LE: + case IMGFMT_420P10_BE: + case IMGFMT_420P9_LE: + case IMGFMT_420P9_BE: + return; + case IMGFMT_Y16_LE: + case IMGFMT_Y16_BE: + mpi->bpp=16; + case IMGFMT_Y800: + case IMGFMT_Y8: + /* they're planar ones, but for easier handling use them as packed */ + mpi->flags&=~MP_IMGFLAG_PLANAR; + mpi->num_planes=1; + return; + case IMGFMT_Y8A: + mpi->num_planes=2; + return; + case IMGFMT_UYVY: + mpi->flags|=MP_IMGFLAG_SWAPPED; + case IMGFMT_YUY2: + mpi->chroma_x_shift = 1; + mpi->bpp=16; + mpi->num_planes=1; + return; + case IMGFMT_NV12: + mpi->flags|=MP_IMGFLAG_SWAPPED; + case IMGFMT_NV21: + mpi->flags|=MP_IMGFLAG_PLANAR; + mpi->bpp=12; + mpi->num_planes=2; + mpi->chroma_width=(mpi->width>>0); + mpi->chroma_height=(mpi->height>>1); + mpi->chroma_x_shift=0; + mpi->chroma_y_shift=1; + return; + } + ff_mp_msg(MSGT_DECVIDEO,MSGL_WARN,"mp_image: unknown out_fmt: 0x%X\n",out_fmt); + mpi->bpp=0; +} + +mp_image_t* ff_new_mp_image(int w,int h){ + mp_image_t* mpi = malloc(sizeof(mp_image_t)); + if(!mpi) return NULL; // error! + memset(mpi,0,sizeof(mp_image_t)); + mpi->width=mpi->w=w; + mpi->height=mpi->h=h; + return mpi; +} + +void ff_free_mp_image(mp_image_t* mpi){ + if(!mpi) return; + if(mpi->flags&MP_IMGFLAG_ALLOCATED){ + /* because we allocate the whole image at once */ + av_free(mpi->planes[0]); + if (mpi->flags & MP_IMGFLAG_RGB_PALETTE) + av_free(mpi->planes[1]); + } + free(mpi); +} + diff --git a/libavfilter/libmpcodecs/mp_image.h b/libavfilter/libmpcodecs/mp_image.h new file mode 100644 index 0000000000..aedf4510cd --- /dev/null +++ b/libavfilter/libmpcodecs/mp_image.h @@ -0,0 +1,159 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_MP_IMAGE_H +#define MPLAYER_MP_IMAGE_H + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#undef printf //FIXME +#undef fprintf //FIXME +#include "mp_msg.h" +#include "libavutil/avutil.h" +#include "libavutil/avassert.h" +#undef realloc +#undef malloc +#undef free +#undef rand +#undef srand +#undef printf +#undef strncpy +#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t" +#define CODEC_FLAG2_MEMC_ONLY 0x00001000 ///< Only do ME/MC (I frames -> ref, P frame -> ME+MC). + +enum AVPixelFormat ff_mp2ff_pix_fmt(int mp); + +//--------- codec's requirements (filled by the codec/vf) --------- + +//--- buffer content restrictions: +// set if buffer content shouldn't be modified: +#define MP_IMGFLAG_PRESERVE 0x01 +// set if buffer content will be READ. +// This can be e.g. for next frame's MC: (I/P mpeg frames) - +// then in combination with MP_IMGFLAG_PRESERVE - or it +// can be because a video filter or codec will read a significant +// amount of data while processing that frame (e.g. blending something +// onto the frame, MV based intra prediction). +// A frame marked like this should not be placed in to uncachable +// video RAM for example. +#define MP_IMGFLAG_READABLE 0x02 + +//--- buffer width/stride/plane restrictions: (used for direct rendering) +// stride _have_to_ be aligned to MB boundary: [for DR restrictions] +#define MP_IMGFLAG_ACCEPT_ALIGNED_STRIDE 0x4 +// stride should be aligned to MB boundary: [for buffer allocation] +#define MP_IMGFLAG_PREFER_ALIGNED_STRIDE 0x8 +// codec accept any stride (>=width): +#define MP_IMGFLAG_ACCEPT_STRIDE 0x10 +// codec accept any width (width*bpp=stride -> stride%bpp==0) (>=width): +#define MP_IMGFLAG_ACCEPT_WIDTH 0x20 +//--- for planar formats only: +// uses only stride[0], and stride[1]=stride[2]=stride[0]>>mpi->chroma_x_shift +#define MP_IMGFLAG_COMMON_STRIDE 0x40 +// uses only planes[0], and calculates planes[1,2] from width,height,imgfmt +#define MP_IMGFLAG_COMMON_PLANE 0x80 + +#define MP_IMGFLAGMASK_RESTRICTIONS 0xFF + +//--------- color info (filled by ff_mp_image_setfmt() ) ----------- +// set if number of planes > 1 +#define MP_IMGFLAG_PLANAR 0x100 +// set if it's YUV colorspace +#define MP_IMGFLAG_YUV 0x200 +// set if it's swapped (BGR or YVU) plane/byteorder +#define MP_IMGFLAG_SWAPPED 0x400 +// set if you want memory for palette allocated and managed by ff_vf_get_image etc. +#define MP_IMGFLAG_RGB_PALETTE 0x800 + +#define MP_IMGFLAGMASK_COLORS 0xF00 + +// codec uses drawing/rendering callbacks (draw_slice()-like thing, DR method 2) +// [the codec will set this flag if it supports callbacks, and the vo _may_ +// clear it in get_image() if draw_slice() not implemented] +#define MP_IMGFLAG_DRAW_CALLBACK 0x1000 +// set if it's in video buffer/memory: [set by vo/vf's get_image() !!!] +#define MP_IMGFLAG_DIRECT 0x2000 +// set if buffer is allocated (used in destination images): +#define MP_IMGFLAG_ALLOCATED 0x4000 + +// buffer type was printed (do NOT set this flag - it's for INTERNAL USE!!!) +#define MP_IMGFLAG_TYPE_DISPLAYED 0x8000 + +// codec doesn't support any form of direct rendering - it has own buffer +// allocation. so we just export its buffer pointers: +#define MP_IMGTYPE_EXPORT 0 +// codec requires a static WO buffer, but it does only partial updates later: +#define MP_IMGTYPE_STATIC 1 +// codec just needs some WO memory, where it writes/copies the whole frame to: +#define MP_IMGTYPE_TEMP 2 +// I+P type, requires 2+ independent static R/W buffers +#define MP_IMGTYPE_IP 3 +// I+P+B type, requires 2+ independent static R/W and 1+ temp WO buffers +#define MP_IMGTYPE_IPB 4 +// Upper 16 bits give desired buffer number, -1 means get next available +#define MP_IMGTYPE_NUMBERED 5 +// Doesn't need any buffer, incomplete image (probably a first field only) +// we need this type to be able to differentiate between half frames and +// all other cases +#define MP_IMGTYPE_INCOMPLETE 6 + +#define MP_MAX_PLANES 4 + +#define MP_IMGFIELD_ORDERED 0x01 +#define MP_IMGFIELD_TOP_FIRST 0x02 +#define MP_IMGFIELD_REPEAT_FIRST 0x04 +#define MP_IMGFIELD_TOP 0x08 +#define MP_IMGFIELD_BOTTOM 0x10 +#define MP_IMGFIELD_INTERLACED 0x20 + +typedef struct mp_image { + unsigned int flags; + unsigned char type; + int number; + unsigned char bpp; // bits/pixel. NOT depth! for RGB it will be n*8 + unsigned int imgfmt; + int width,height; // stored dimensions + int x,y,w,h; // visible dimensions + unsigned char* planes[MP_MAX_PLANES]; + int stride[MP_MAX_PLANES]; + char * qscale; + int qstride; + int pict_type; // 0->unknown, 1->I, 2->P, 3->B + int fields; + int qscale_type; // 0->mpeg1/4/h263, 1->mpeg2 + int num_planes; + /* these are only used by planar formats Y,U(Cb),V(Cr) */ + int chroma_width; + int chroma_height; + int chroma_x_shift; // horizontal + int chroma_y_shift; // vertical + int usage_count; + /* for private use by filter or vo driver (to store buffer id or dmpi) */ + void* priv; +} mp_image_t; + +void ff_mp_image_setfmt(mp_image_t* mpi,unsigned int out_fmt); +mp_image_t* ff_new_mp_image(int w,int h); +void ff_free_mp_image(mp_image_t* mpi); + +mp_image_t* ff_alloc_mpi(int w, int h, unsigned long int fmt); +void ff_mp_image_alloc_planes(mp_image_t *mpi); +void ff_copy_mpi(mp_image_t *dmpi, mp_image_t *mpi); + +#endif /* MPLAYER_MP_IMAGE_H */ diff --git a/libavfilter/libmpcodecs/mp_msg.h b/libavfilter/libmpcodecs/mp_msg.h new file mode 100644 index 0000000000..51cdff3cef --- /dev/null +++ b/libavfilter/libmpcodecs/mp_msg.h @@ -0,0 +1,166 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_MP_MSG_H +#define MPLAYER_MP_MSG_H + +#include <stdarg.h> + +// defined in mplayer.c and mencoder.c +extern int verbose; + +// verbosity elevel: + +/* Only messages level MSGL_FATAL-MSGL_STATUS should be translated, + * messages level MSGL_V and above should not be translated. */ + +#define MSGL_FATAL 0 // will exit/abort +#define MSGL_ERR 1 // continues +#define MSGL_WARN 2 // only warning +#define MSGL_HINT 3 // short help message +#define MSGL_INFO 4 // -quiet +#define MSGL_STATUS 5 // v=0 +#define MSGL_V 6 // v=1 +#define MSGL_DBG2 7 // v=2 +#define MSGL_DBG3 8 // v=3 +#define MSGL_DBG4 9 // v=4 +#define MSGL_DBG5 10 // v=5 + +#define MSGL_FIXME 1 // for conversions from printf where the appropriate MSGL is not known; set equal to ERR for obtrusiveness +#define MSGT_FIXME 0 // for conversions from printf where the appropriate MSGT is not known; set equal to GLOBAL for obtrusiveness + +// code/module: + +#define MSGT_GLOBAL 0 // common player stuff errors +#define MSGT_CPLAYER 1 // console player (mplayer.c) +#define MSGT_GPLAYER 2 // gui player + +#define MSGT_VO 3 // libvo +#define MSGT_AO 4 // libao + +#define MSGT_DEMUXER 5 // demuxer.c (general stuff) +#define MSGT_DS 6 // demux stream (add/read packet etc) +#define MSGT_DEMUX 7 // fileformat-specific stuff (demux_*.c) +#define MSGT_HEADER 8 // fileformat-specific header (*header.c) + +#define MSGT_AVSYNC 9 // mplayer.c timer stuff +#define MSGT_AUTOQ 10 // mplayer.c auto-quality stuff + +#define MSGT_CFGPARSER 11 // cfgparser.c + +#define MSGT_DECAUDIO 12 // av decoder +#define MSGT_DECVIDEO 13 + +#define MSGT_SEEK 14 // seeking code +#define MSGT_WIN32 15 // win32 dll stuff +#define MSGT_OPEN 16 // open.c (stream opening) +#define MSGT_DVD 17 // open.c (DVD init/read/seek) + +#define MSGT_PARSEES 18 // parse_es.c (mpeg stream parser) +#define MSGT_LIRC 19 // lirc_mp.c and input lirc driver + +#define MSGT_STREAM 20 // stream.c +#define MSGT_CACHE 21 // cache2.c + +#define MSGT_MENCODER 22 + +#define MSGT_XACODEC 23 // XAnim codecs + +#define MSGT_TV 24 // TV input subsystem + +#define MSGT_OSDEP 25 // OS-dependent parts + +#define MSGT_SPUDEC 26 // spudec.c + +#define MSGT_PLAYTREE 27 // Playtree handeling (playtree.c, playtreeparser.c) + +#define MSGT_INPUT 28 + +#define MSGT_VFILTER 29 + +#define MSGT_OSD 30 + +#define MSGT_NETWORK 31 + +#define MSGT_CPUDETECT 32 + +#define MSGT_CODECCFG 33 + +#define MSGT_SWS 34 + +#define MSGT_VOBSUB 35 +#define MSGT_SUBREADER 36 + +#define MSGT_AFILTER 37 // Audio filter messages + +#define MSGT_NETST 38 // Netstream + +#define MSGT_MUXER 39 // muxer layer + +#define MSGT_OSD_MENU 40 + +#define MSGT_IDENTIFY 41 // -identify output + +#define MSGT_RADIO 42 + +#define MSGT_ASS 43 // libass messages + +#define MSGT_LOADER 44 // dll loader messages + +#define MSGT_STATUSLINE 45 // playback/encoding status line + +#define MSGT_TELETEXT 46 // Teletext decoder + +#define MSGT_MAX 64 + + +extern char *ff_mp_msg_charset; +extern int ff_mp_msg_color; +extern int ff_mp_msg_module; + +extern int ff_mp_msg_levels[MSGT_MAX]; +extern int ff_mp_msg_level_all; + + +void ff_mp_msg_init(void); +int ff_mp_msg_test(int mod, int lev); + +#include "config.h" + +void ff_mp_msg_va(int mod, int lev, const char *format, va_list va); +#ifdef __GNUC__ +void ff_mp_msg(int mod, int lev, const char *format, ... ) __attribute__ ((format (printf, 3, 4))); +# ifdef MP_DEBUG +# define mp_dbg(mod,lev, args... ) ff_mp_msg(mod, lev, ## args ) +# else + // only useful for developers, disable but check syntax +# define mp_dbg(mod,lev, args... ) do { if (0) ff_mp_msg(mod, lev, ## args ); } while (0) +# endif +#else // not GNU C +void ff_mp_msg(int mod, int lev, const char *format, ... ); +# ifdef MP_DEBUG +# define mp_dbg(mod,lev, ... ) ff_mp_msg(mod, lev, __VA_ARGS__) +# else + // only useful for developers, disable but check syntax +# define mp_dbg(mod,lev, ... ) do { if (0) ff_mp_msg(mod, lev, __VA_ARGS__); } while (0) +# endif +#endif /* __GNUC__ */ + +const char* ff_filename_recode(const char* filename); + +#endif /* MPLAYER_MP_MSG_H */ diff --git a/libavfilter/libmpcodecs/mpc_info.h b/libavfilter/libmpcodecs/mpc_info.h new file mode 100644 index 0000000000..8554699120 --- /dev/null +++ b/libavfilter/libmpcodecs/mpc_info.h @@ -0,0 +1,43 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_MPC_INFO_H +#define MPLAYER_MPC_INFO_H + +typedef struct mp_codec_info_s +{ + /* codec long name ("Autodesk FLI/FLC Animation decoder" */ + const char *name; + /* short name (same as driver name in codecs.conf) ("dshow") */ + const char *short_name; + /* interface author/maintainer */ + const char *maintainer; + /* codec author ("Aaron Holtzman <aholtzma@ess.engr.uvic.ca>") */ + const char *author; + /* any additional comments */ + const char *comment; +} mp_codec_info_t; + +#define CONTROL_OK 1 +#define CONTROL_TRUE 1 +#define CONTROL_FALSE 0 +#define CONTROL_UNKNOWN -1 +#define CONTROL_ERROR -2 +#define CONTROL_NA -3 + +#endif /* MPLAYER_MPC_INFO_H */ diff --git a/libavfilter/libmpcodecs/vf.h b/libavfilter/libmpcodecs/vf.h new file mode 100644 index 0000000000..d8fc66be47 --- /dev/null +++ b/libavfilter/libmpcodecs/vf.h @@ -0,0 +1,169 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_VF_H +#define MPLAYER_VF_H + +//#include "m_option.h" +#include "mp_image.h" + +//extern m_obj_settings_t* vf_settings; +//extern const m_obj_list_t vf_obj_list; + +struct vf_instance; +struct vf_priv_s; + +typedef struct vf_info_s { + const char *info; + const char *name; + const char *author; + const char *comment; + int (*vf_open)(struct vf_instance *vf,char* args); + // Ptr to a struct dscribing the options + const void* opts; +} vf_info_t; + +#define NUM_NUMBERED_MPI 50 + +typedef struct vf_image_context_s { + mp_image_t* static_images[2]; + mp_image_t* temp_images[1]; + mp_image_t* export_images[1]; + mp_image_t* numbered_images[NUM_NUMBERED_MPI]; + int static_idx; +} vf_image_context_t; + +typedef struct vf_format_context_t { + int have_configured; + int orig_width, orig_height, orig_fmt; +} vf_format_context_t; + +typedef struct vf_instance { + const vf_info_t* info; + // funcs: + int (*config)(struct vf_instance *vf, + int width, int height, int d_width, int d_height, + unsigned int flags, unsigned int outfmt); + int (*control)(struct vf_instance *vf, + int request, void* data); + int (*query_format)(struct vf_instance *vf, + unsigned int fmt); + void (*get_image)(struct vf_instance *vf, + mp_image_t *mpi); + int (*put_image)(struct vf_instance *vf, + mp_image_t *mpi, double pts); + void (*start_slice)(struct vf_instance *vf, + mp_image_t *mpi); + void (*draw_slice)(struct vf_instance *vf, + unsigned char** src, int* stride, int w,int h, int x, int y); + void (*uninit)(struct vf_instance *vf); + + int (*continue_buffered_image)(struct vf_instance *vf); + // caps: + unsigned int default_caps; // used by default query_format() + unsigned int default_reqs; // used by default config() + // data: + int w, h; + vf_image_context_t imgctx; + vf_format_context_t fmt; + struct vf_instance *next; + mp_image_t *dmpi; + struct vf_priv_s* priv; +} vf_instance_t; + +// control codes: +#include "mpc_info.h" + +typedef struct vf_seteq_s +{ + const char *item; + int value; +} vf_equalizer_t; + +#define VFCTRL_QUERY_MAX_PP_LEVEL 4 /* test for postprocessing support (max level) */ +#define VFCTRL_SET_PP_LEVEL 5 /* set postprocessing level */ +#define VFCTRL_SET_EQUALIZER 6 /* set color options (brightness,contrast etc) */ +#define VFCTRL_GET_EQUALIZER 8 /* gset color options (brightness,contrast etc) */ +#define VFCTRL_DRAW_OSD 7 +#define VFCTRL_CHANGE_RECTANGLE 9 /* Change the rectangle boundaries */ +#define VFCTRL_FLIP_PAGE 10 /* Tell the vo to flip pages */ +#define VFCTRL_DUPLICATE_FRAME 11 /* For encoding - encode zero-change frame */ +#define VFCTRL_SKIP_NEXT_FRAME 12 /* For encoding - drop the next frame that passes through */ +#define VFCTRL_FLUSH_FRAMES 13 /* For encoding - flush delayed frames */ +#define VFCTRL_SCREENSHOT 14 /* Make a screenshot */ +#define VFCTRL_INIT_EOSD 15 /* Select EOSD renderer */ +#define VFCTRL_DRAW_EOSD 16 /* Render EOSD */ +#define VFCTRL_GET_PTS 17 /* Return last pts value that reached vf_vo*/ +#define VFCTRL_SET_DEINTERLACE 18 /* Set deinterlacing status */ +#define VFCTRL_GET_DEINTERLACE 19 /* Get deinterlacing status */ + +#include "vfcap.h" + +//FIXME this should be in a common header, but i dunno which +#define MP_NOPTS_VALUE (-1LL<<63) //both int64_t and double should be able to represent this exactly + + +// functions: +void ff_vf_mpi_clear(mp_image_t* mpi,int x0,int y0,int w,int h); +mp_image_t* ff_vf_get_image(vf_instance_t* vf, unsigned int outfmt, int mp_imgtype, int mp_imgflag, int w, int h); + +vf_instance_t* vf_open_plugin(const vf_info_t* const* filter_list, vf_instance_t* next, const char *name, char **args); +vf_instance_t* vf_open_filter(vf_instance_t* next, const char *name, char **args); +vf_instance_t* ff_vf_add_before_vo(vf_instance_t **vf, char *name, char **args); +vf_instance_t* vf_open_encoder(vf_instance_t* next, const char *name, char *args); + +unsigned int ff_vf_match_csp(vf_instance_t** vfp,const unsigned int* list,unsigned int preferred); +void ff_vf_clone_mpi_attributes(mp_image_t* dst, mp_image_t* src); +void ff_vf_queue_frame(vf_instance_t *vf, int (*)(vf_instance_t *)); +int ff_vf_output_queued_frame(vf_instance_t *vf); + +// default wrappers: +int ff_vf_next_config(struct vf_instance *vf, + int width, int height, int d_width, int d_height, + unsigned int flags, unsigned int outfmt); +int ff_vf_next_control(struct vf_instance *vf, int request, void* data); +void ff_vf_extra_flip(struct vf_instance *vf); +int ff_vf_next_query_format(struct vf_instance *vf, unsigned int fmt); +int ff_vf_next_put_image(struct vf_instance *vf,mp_image_t *mpi, double pts); +void ff_vf_next_draw_slice (struct vf_instance *vf, unsigned char** src, int* stride, int w,int h, int x, int y); + +vf_instance_t* ff_append_filters(vf_instance_t* last); + +void ff_vf_uninit_filter(vf_instance_t* vf); +void ff_vf_uninit_filter_chain(vf_instance_t* vf); + +int ff_vf_config_wrapper(struct vf_instance *vf, + int width, int height, int d_width, int d_height, + unsigned int flags, unsigned int outfmt); + +static inline int norm_qscale(int qscale, int type) +{ + switch (type) { + case 0: // MPEG-1 + return qscale; + case 1: // MPEG-2 + return qscale >> 1; + case 2: // H264 + return qscale >> 2; + case 3: // VP56 + return (63 - qscale + 2) >> 2; + } + return qscale; +} + +#endif /* MPLAYER_VF_H */ diff --git a/libavfilter/libmpcodecs/vf_eq.c b/libavfilter/libmpcodecs/vf_eq.c new file mode 100644 index 0000000000..f8efa846c9 --- /dev/null +++ b/libavfilter/libmpcodecs/vf_eq.c @@ -0,0 +1,240 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> + +#include "config.h" +#include "mp_msg.h" +#include "cpudetect.h" + +#include "img_format.h" +#include "mp_image.h" +#include "vf.h" + +#include "libvo/video_out.h" + +struct vf_priv_s { + unsigned char *buf; + int brightness; + int contrast; +}; + +#if HAVE_MMX && HAVE_6REGS +static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, int sstride, + int w, int h, int brightness, int contrast) +{ + int i; + int pel; + int dstep = dstride-w; + int sstep = sstride-w; + short brvec[4]; + short contvec[4]; + + contrast = ((contrast+100)*256*16)/100; + brightness = ((brightness+100)*511)/200-128 - contrast/32; + + brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness; + contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast; + + while (h--) { + __asm__ volatile ( + "movq (%5), %%mm3 \n\t" + "movq (%6), %%mm4 \n\t" + "pxor %%mm0, %%mm0 \n\t" + "movl %4, %%eax\n\t" + ASMALIGN(4) + "1: \n\t" + "movq (%0), %%mm1 \n\t" + "movq (%0), %%mm2 \n\t" + "punpcklbw %%mm0, %%mm1 \n\t" + "punpckhbw %%mm0, %%mm2 \n\t" + "psllw $4, %%mm1 \n\t" + "psllw $4, %%mm2 \n\t" + "pmulhw %%mm4, %%mm1 \n\t" + "pmulhw %%mm4, %%mm2 \n\t" + "paddw %%mm3, %%mm1 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "packuswb %%mm2, %%mm1 \n\t" + "add $8, %0 \n\t" + "movq %%mm1, (%1) \n\t" + "add $8, %1 \n\t" + "decl %%eax \n\t" + "jnz 1b \n\t" + : "=r" (src), "=r" (dest) + : "0" (src), "1" (dest), "r" (w>>3), "r" (brvec), "r" (contvec) + : "%eax" + ); + + for (i = w&7; i; i--) + { + pel = ((*src++* contrast)>>12) + brightness; + if(pel&768) pel = (-pel)>>31; + *dest++ = pel; + } + + src += sstep; + dest += dstep; + } + __asm__ volatile ( "emms \n\t" ::: "memory" ); +} +#endif + +static void process_C(unsigned char *dest, int dstride, unsigned char *src, int sstride, + int w, int h, int brightness, int contrast) +{ + int i; + int pel; + int dstep = dstride-w; + int sstep = sstride-w; + + contrast = ((contrast+100)*256*256)/100; + brightness = ((brightness+100)*511)/200-128 - contrast/512; + + while (h--) { + for (i = w; i; i--) + { + pel = ((*src++* contrast)>>16) + brightness; + if(pel&768) pel = (-pel)>>31; + *dest++ = pel; + } + src += sstep; + dest += dstep; + } +} + +static void (*process)(unsigned char *dest, int dstride, unsigned char *src, int sstride, + int w, int h, int brightness, int contrast); + +/* FIXME: add packed yuv version of process */ + +static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) +{ + mp_image_t *dmpi; + + dmpi=ff_vf_get_image(vf->next, mpi->imgfmt, + MP_IMGTYPE_EXPORT, 0, + mpi->w, mpi->h); + + dmpi->stride[0] = mpi->stride[0]; + dmpi->planes[1] = mpi->planes[1]; + dmpi->planes[2] = mpi->planes[2]; + dmpi->stride[1] = mpi->stride[1]; + dmpi->stride[2] = mpi->stride[2]; + + if (!vf->priv->buf) vf->priv->buf = malloc(mpi->stride[0]*mpi->h); + + if ((vf->priv->brightness == 0) && (vf->priv->contrast == 0)) + dmpi->planes[0] = mpi->planes[0]; + else { + dmpi->planes[0] = vf->priv->buf; + process(dmpi->planes[0], dmpi->stride[0], + mpi->planes[0], mpi->stride[0], + mpi->w, mpi->h, vf->priv->brightness, + vf->priv->contrast); + } + + return ff_vf_next_put_image(vf,dmpi, pts); +} + +static int control(struct vf_instance *vf, int request, void* data) +{ + vf_equalizer_t *eq; + + switch (request) { + case VFCTRL_SET_EQUALIZER: + eq = data; + if (!strcmp(eq->item,"brightness")) { + vf->priv->brightness = eq->value; + return CONTROL_TRUE; + } + else if (!strcmp(eq->item,"contrast")) { + vf->priv->contrast = eq->value; + return CONTROL_TRUE; + } + break; + case VFCTRL_GET_EQUALIZER: + eq = data; + if (!strcmp(eq->item,"brightness")) { + eq->value = vf->priv->brightness; + return CONTROL_TRUE; + } + else if (!strcmp(eq->item,"contrast")) { + eq->value = vf->priv->contrast; + return CONTROL_TRUE; + } + break; + } + return ff_vf_next_control(vf, request, data); +} + +static int query_format(struct vf_instance *vf, unsigned int fmt) +{ + switch (fmt) { + case IMGFMT_YVU9: + case IMGFMT_IF09: + case IMGFMT_YV12: + case IMGFMT_I420: + case IMGFMT_IYUV: + case IMGFMT_CLPL: + case IMGFMT_Y800: + case IMGFMT_Y8: + case IMGFMT_NV12: + case IMGFMT_NV21: + case IMGFMT_444P: + case IMGFMT_422P: + case IMGFMT_411P: + return ff_vf_next_query_format(vf, fmt); + } + return 0; +} + +static void uninit(struct vf_instance *vf) +{ + free(vf->priv->buf); + free(vf->priv); +} + +static int vf_open(vf_instance_t *vf, char *args) +{ + vf->control=control; + vf->query_format=query_format; + vf->put_image=put_image; + vf->uninit=uninit; + + vf->priv = malloc(sizeof(struct vf_priv_s)); + memset(vf->priv, 0, sizeof(struct vf_priv_s)); + if (args) sscanf(args, "%d:%d", &vf->priv->brightness, &vf->priv->contrast); + + process = process_C; +#if HAVE_MMX && HAVE_6REGS + if(ff_gCpuCaps.hasMMX) process = process_MMX; +#endif + + return 1; +} + +const vf_info_t ff_vf_info_eq = { + "soft video equalizer", + "eq", + "Richard Felker", + "", + vf_open, +}; diff --git a/libavfilter/libmpcodecs/vf_eq2.c b/libavfilter/libmpcodecs/vf_eq2.c new file mode 100644 index 0000000000..c9c3ff69f4 --- /dev/null +++ b/libavfilter/libmpcodecs/vf_eq2.c @@ -0,0 +1,519 @@ +/* + * Software equalizer (brightness, contrast, gamma, saturation) + * + * Hampa Hug <hampa@hampa.ch> (original LUT gamma/contrast/brightness filter) + * Daniel Moreno <comac@comac.darktech.org> (saturation, R/G/B gamma support) + * Richard Felker (original MMX contrast/brightness code (vf_eq.c)) + * Michael Niedermayer <michalni@gmx.at> (LUT16) + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <inttypes.h> + +#include "config.h" +#include "mp_msg.h" +#include "cpudetect.h" + +#include "img_format.h" +#include "mp_image.h" +#include "vf.h" + +#define LUT16 + +/* Per channel parameters */ +typedef struct eq2_param_t { + unsigned char lut[256]; +#ifdef LUT16 + uint16_t lut16[256*256]; +#endif + int lut_clean; + + void (*adjust) (struct eq2_param_t *par, unsigned char *dst, unsigned char *src, + unsigned w, unsigned h, unsigned dstride, unsigned sstride); + + double c; + double b; + double g; + double w; +} eq2_param_t; + +typedef struct vf_priv_s { + eq2_param_t param[3]; + + double contrast; + double brightness; + double saturation; + + double gamma; + double gamma_weight; + double rgamma; + double ggamma; + double bgamma; + + unsigned buf_w[3]; + unsigned buf_h[3]; + unsigned char *buf[3]; +} vf_eq2_t; + + +static +void create_lut (eq2_param_t *par) +{ + unsigned i; + double g, v; + double lw, gw; + + g = par->g; + gw = par->w; + lw = 1.0 - gw; + + if ((g < 0.001) || (g > 1000.0)) { + g = 1.0; + } + + g = 1.0 / g; + + for (i = 0; i < 256; i++) { + v = (double) i / 255.0; + v = par->c * (v - 0.5) + 0.5 + par->b; + + if (v <= 0.0) { + par->lut[i] = 0; + } + else { + v = v*lw + pow(v, g)*gw; + + if (v >= 1.0) { + par->lut[i] = 255; + } + else { + par->lut[i] = (unsigned char) (256.0 * v); + } + } + } + +#ifdef LUT16 + for(i=0; i<256*256; i++){ + par->lut16[i]= par->lut[i&0xFF] + (par->lut[i>>8]<<8); + } +#endif + + par->lut_clean = 1; +} + +#if HAVE_MMX && HAVE_6REGS +static +void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src, + unsigned w, unsigned h, unsigned dstride, unsigned sstride) +{ + unsigned i; + int contrast, brightness; + unsigned dstep, sstep; + int pel; + short brvec[4]; + short contvec[4]; + +// printf("\nmmx: src=%p dst=%p w=%d h=%d ds=%d ss=%d\n",src,dst,w,h,dstride,sstride); + + contrast = (int) (par->c * 256 * 16); + brightness = ((int) (100.0 * par->b + 100.0) * 511) / 200 - 128 - contrast / 32; + + brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness; + contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast; + + sstep = sstride - w; + dstep = dstride - w; + + while (h-- > 0) { + __asm__ volatile ( + "movq (%5), %%mm3 \n\t" + "movq (%6), %%mm4 \n\t" + "pxor %%mm0, %%mm0 \n\t" + "movl %4, %%eax\n\t" + ASMALIGN(4) + "1: \n\t" + "movq (%0), %%mm1 \n\t" + "movq (%0), %%mm2 \n\t" + "punpcklbw %%mm0, %%mm1 \n\t" + "punpckhbw %%mm0, %%mm2 \n\t" + "psllw $4, %%mm1 \n\t" + "psllw $4, %%mm2 \n\t" + "pmulhw %%mm4, %%mm1 \n\t" + "pmulhw %%mm4, %%mm2 \n\t" + "paddw %%mm3, %%mm1 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "packuswb %%mm2, %%mm1 \n\t" + "add $8, %0 \n\t" + "movq %%mm1, (%1) \n\t" + "add $8, %1 \n\t" + "decl %%eax \n\t" + "jnz 1b \n\t" + : "=r" (src), "=r" (dst) + : "0" (src), "1" (dst), "r" (w >> 3), "r" (brvec), "r" (contvec) + : "%eax" + ); + + for (i = w & 7; i > 0; i--) { + pel = ((*src++ * contrast) >> 12) + brightness; + if (pel & 768) { + pel = (-pel) >> 31; + } + *dst++ = pel; + } + + src += sstep; + dst += dstep; + } + + __asm__ volatile ( "emms \n\t" ::: "memory" ); +} +#endif + +static +void apply_lut (eq2_param_t *par, unsigned char *dst, unsigned char *src, + unsigned w, unsigned h, unsigned dstride, unsigned sstride) +{ + unsigned i, j, w2; + unsigned char *lut; + uint16_t *lut16; + + if (!par->lut_clean) { + create_lut (par); + } + + lut = par->lut; +#ifdef LUT16 + lut16 = par->lut16; + w2= (w>>3)<<2; + for (j = 0; j < h; j++) { + uint16_t *src16= (uint16_t*)src; + uint16_t *dst16= (uint16_t*)dst; + for (i = 0; i < w2; i+=4) { + dst16[i+0] = lut16[src16[i+0]]; + dst16[i+1] = lut16[src16[i+1]]; + dst16[i+2] = lut16[src16[i+2]]; + dst16[i+3] = lut16[src16[i+3]]; + } + i <<= 1; +#else + w2= (w>>3)<<3; + for (j = 0; j < h; j++) { + for (i = 0; i < w2; i+=8) { + dst[i+0] = lut[src[i+0]]; + dst[i+1] = lut[src[i+1]]; + dst[i+2] = lut[src[i+2]]; + dst[i+3] = lut[src[i+3]]; + dst[i+4] = lut[src[i+4]]; + dst[i+5] = lut[src[i+5]]; + dst[i+6] = lut[src[i+6]]; + dst[i+7] = lut[src[i+7]]; + } +#endif + for (; i < w; i++) { + dst[i] = lut[src[i]]; + } + + src += sstride; + dst += dstride; + } +} + +static +int put_image (vf_instance_t *vf, mp_image_t *src, double pts) +{ + unsigned i; + vf_eq2_t *eq2; + mp_image_t *dst; + unsigned long img_n,img_c; + + eq2 = vf->priv; + + if ((eq2->buf_w[0] != src->w) || (eq2->buf_h[0] != src->h)) { + eq2->buf_w[0] = src->w; + eq2->buf_h[0] = src->h; + eq2->buf_w[1] = eq2->buf_w[2] = src->w >> src->chroma_x_shift; + eq2->buf_h[1] = eq2->buf_h[2] = src->h >> src->chroma_y_shift; + img_n = eq2->buf_w[0]*eq2->buf_h[0]; + if(src->num_planes>1){ + img_c = eq2->buf_w[1]*eq2->buf_h[1]; + eq2->buf[0] = realloc (eq2->buf[0], img_n + 2*img_c); + eq2->buf[1] = eq2->buf[0] + img_n; + eq2->buf[2] = eq2->buf[1] + img_c; + } else + eq2->buf[0] = realloc (eq2->buf[0], img_n); + } + + dst = ff_vf_get_image (vf->next, src->imgfmt, MP_IMGTYPE_EXPORT, 0, src->w, src->h); + + for (i = 0; i < ((src->num_planes>1)?3:1); i++) { + if (eq2->param[i].adjust != NULL) { + dst->planes[i] = eq2->buf[i]; + dst->stride[i] = eq2->buf_w[i]; + + eq2->param[i].adjust (&eq2->param[i], dst->planes[i], src->planes[i], + eq2->buf_w[i], eq2->buf_h[i], dst->stride[i], src->stride[i]); + } + else { + dst->planes[i] = src->planes[i]; + dst->stride[i] = src->stride[i]; + } + } + + return ff_vf_next_put_image (vf, dst, pts); +} + +static +void check_values (eq2_param_t *par) +{ + /* yuck! floating point comparisons... */ + + if ((par->c == 1.0) && (par->b == 0.0) && (par->g == 1.0)) { + par->adjust = NULL; + } +#if HAVE_MMX && HAVE_6REGS + else if (par->g == 1.0 && ff_gCpuCaps.hasMMX) { + par->adjust = &affine_1d_MMX; + } +#endif + else { + par->adjust = &apply_lut; + } +} + +static +void print_values (vf_eq2_t *eq2) +{ + ff_mp_msg (MSGT_VFILTER, MSGL_V, "vf_eq2: c=%.2f b=%.2f g=%.4f s=%.2f \n", + eq2->contrast, eq2->brightness, eq2->gamma, eq2->saturation + ); +} + +static +void set_contrast (vf_eq2_t *eq2, double c) +{ + eq2->contrast = c; + eq2->param[0].c = c; + eq2->param[0].lut_clean = 0; + check_values (&eq2->param[0]); + print_values (eq2); +} + +static +void set_brightness (vf_eq2_t *eq2, double b) +{ + eq2->brightness = b; + eq2->param[0].b = b; + eq2->param[0].lut_clean = 0; + check_values (&eq2->param[0]); + print_values (eq2); +} + +static +void set_gamma (vf_eq2_t *eq2, double g) +{ + eq2->gamma = g; + + eq2->param[0].g = eq2->gamma * eq2->ggamma; + eq2->param[1].g = sqrt (eq2->bgamma / eq2->ggamma); + eq2->param[2].g = sqrt (eq2->rgamma / eq2->ggamma); + eq2->param[0].w = eq2->param[1].w = eq2->param[2].w = eq2->gamma_weight; + + eq2->param[0].lut_clean = 0; + eq2->param[1].lut_clean = 0; + eq2->param[2].lut_clean = 0; + + check_values (&eq2->param[0]); + check_values (&eq2->param[1]); + check_values (&eq2->param[2]); + + print_values (eq2); +} + +static +void set_saturation (vf_eq2_t *eq2, double s) +{ + eq2->saturation = s; + + eq2->param[1].c = s; + eq2->param[2].c = s; + + eq2->param[1].lut_clean = 0; + eq2->param[2].lut_clean = 0; + + check_values (&eq2->param[1]); + check_values (&eq2->param[2]); + + print_values (eq2); +} + +static +int control (vf_instance_t *vf, int request, void *data) +{ + vf_equalizer_t *eq; + + switch (request) { + case VFCTRL_SET_EQUALIZER: + eq = (vf_equalizer_t *) data; + + if (strcmp (eq->item, "gamma") == 0) { + set_gamma (vf->priv, exp (log (8.0) * eq->value / 100.0)); + return CONTROL_TRUE; + } + else if (strcmp (eq->item, "contrast") == 0) { + set_contrast (vf->priv, (1.0 / 100.0) * (eq->value + 100)); + return CONTROL_TRUE; + } + else if (strcmp (eq->item, "brightness") == 0) { + set_brightness (vf->priv, (1.0 / 100.0) * eq->value); + return CONTROL_TRUE; + } + else if (strcmp (eq->item, "saturation") == 0) { + set_saturation (vf->priv, (double) (eq->value + 100) / 100.0); + return CONTROL_TRUE; + } + break; + + case VFCTRL_GET_EQUALIZER: + eq = (vf_equalizer_t *) data; + if (strcmp (eq->item, "gamma") == 0) { + eq->value = (int) (100.0 * log (vf->priv->gamma) / log (8.0)); + return CONTROL_TRUE; + } + else if (strcmp (eq->item, "contrast") == 0) { + eq->value = (int) (100.0 * vf->priv->contrast) - 100; + return CONTROL_TRUE; + } + else if (strcmp (eq->item, "brightness") == 0) { + eq->value = (int) (100.0 * vf->priv->brightness); + return CONTROL_TRUE; + } + else if (strcmp (eq->item, "saturation") == 0) { + eq->value = (int) (100.0 * vf->priv->saturation) - 100; + return CONTROL_TRUE; + } + break; + } + + return ff_vf_next_control (vf, request, data); +} + +static +int query_format (vf_instance_t *vf, unsigned fmt) +{ + switch (fmt) { + case IMGFMT_YVU9: + case IMGFMT_IF09: + case IMGFMT_YV12: + case IMGFMT_I420: + case IMGFMT_IYUV: + case IMGFMT_Y800: + case IMGFMT_Y8: + case IMGFMT_444P: + case IMGFMT_422P: + case IMGFMT_411P: + return ff_vf_next_query_format (vf, fmt); + } + + return 0; +} + +static +void uninit (vf_instance_t *vf) +{ + if (vf->priv != NULL) { + free (vf->priv->buf[0]); + free (vf->priv); + } +} + +static +int vf_open(vf_instance_t *vf, char *args) +{ + unsigned i; + vf_eq2_t *eq2; + double par[8]; + + vf->control = control; + vf->query_format = query_format; + vf->put_image = put_image; + vf->uninit = uninit; + + vf->priv = malloc (sizeof (vf_eq2_t)); + eq2 = vf->priv; + + for (i = 0; i < 3; i++) { + eq2->buf[i] = NULL; + eq2->buf_w[i] = 0; + eq2->buf_h[i] = 0; + + eq2->param[i].adjust = NULL; + eq2->param[i].c = 1.0; + eq2->param[i].b = 0.0; + eq2->param[i].g = 1.0; + eq2->param[i].lut_clean = 0; + } + + eq2->contrast = 1.0; + eq2->brightness = 0.0; + eq2->saturation = 1.0; + + eq2->gamma = 1.0; + eq2->gamma_weight = 1.0; + eq2->rgamma = 1.0; + eq2->ggamma = 1.0; + eq2->bgamma = 1.0; + + if (args != NULL) { + par[0] = 1.0; + par[1] = 1.0; + par[2] = 0.0; + par[3] = 1.0; + par[4] = 1.0; + par[5] = 1.0; + par[6] = 1.0; + par[7] = 1.0; + sscanf (args, "%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf", + par, par + 1, par + 2, par + 3, par + 4, par + 5, par + 6, par + 7 + ); + + eq2->rgamma = par[4]; + eq2->ggamma = par[5]; + eq2->bgamma = par[6]; + eq2->gamma_weight = par[7]; + + set_gamma (eq2, par[0]); + set_contrast (eq2, par[1]); + set_brightness (eq2, par[2]); + set_saturation (eq2, par[3]); + } + + return 1; +} + +const vf_info_t ff_vf_info_eq2 = { + "Software equalizer", + "eq2", + "Hampa Hug, Daniel Moreno, Richard Felker", + "", + &vf_open, + NULL +}; diff --git a/libavfilter/libmpcodecs/vf_fspp.c b/libavfilter/libmpcodecs/vf_fspp.c new file mode 100644 index 0000000000..e1b26bc625 --- /dev/null +++ b/libavfilter/libmpcodecs/vf_fspp.c @@ -0,0 +1,2125 @@ +/* + * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru> + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * This implementation is based on an algorithm described in + * "Aria Nosratinia Embedded Post-Processing for + * Enhancement of Compressed Images (1999)" + * (http://citeseer.nj.nec.com/nosratinia99embedded.html) + * Further, with splitting (i)dct into hor/ver passes, one of them can be + * performed once per block, not pixel. This allows for much better speed. + */ + +/* + Heavily optimized version of SPP filter by Nikolaj + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <math.h> + +#include "config.h" + +#include "mp_msg.h" +#include "cpudetect.h" +#include "img_format.h" +#include "mp_image.h" +#include "vf.h" +#include "av_helpers.h" +#include "libvo/fastmemcpy.h" + +#include "libavutil/internal.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/mem.h" +#include "libavutil/x86/asm.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/dsputil.h" + +#undef free +#undef malloc + +//===========================================================================// +#define BLOCKSZ 12 + +static const short custom_threshold[64]= +// values (296) can't be too high +// -it causes too big quant dependence +// or maybe overflow(check), which results in some flashing +{ 71, 296, 295, 237, 71, 40, 38, 19, + 245, 193, 185, 121, 102, 73, 53, 27, + 158, 129, 141, 107, 97, 73, 50, 26, + 102, 116, 109, 98, 82, 66, 45, 23, + 71, 94, 95, 81, 70, 56, 38, 20, + 56, 77, 74, 66, 56, 44, 30, 15, + 38, 53, 50, 45, 38, 30, 21, 11, + 20, 27, 26, 23, 20, 15, 11, 5 +}; + +DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = { + { 0, 48, 12, 60, 3, 51, 15, 63, }, + { 32, 16, 44, 28, 35, 19, 47, 31, }, + { 8, 56, 4, 52, 11, 59, 7, 55, }, + { 40, 24, 36, 20, 43, 27, 39, 23, }, + { 2, 50, 14, 62, 1, 49, 13, 61, }, + { 34, 18, 46, 30, 33, 17, 45, 29, }, + { 10, 58, 6, 54, 9, 57, 5, 53, }, + { 42, 26, 38, 22, 41, 25, 37, 21, }, +}; + +struct vf_priv_s { //align 16 ! + uint64_t threshold_mtx_noq[8*2]; + uint64_t threshold_mtx[8*2];//used in both C & MMX (& later SSE2) versions + + int log2_count; + int temp_stride; + int qp; + int mpeg2; + int prev_q; + uint8_t *src; + int16_t *temp; + int bframes; + char *non_b_qp; +}; + + +#if !HAVE_MMX + +//This func reads from 1 slice, 1 and clears 0 & 1 +static void store_slice_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale) +{int y, x; +#define STORE(pos) \ + temp= (src[x + pos] + (d[pos]>>log2_scale))>>(6-log2_scale); \ + src[x + pos]=src[x + pos - 8*src_stride]=0; \ + if(temp & 0x100) temp= ~(temp>>31); \ + dst[x + pos]= temp; + + for(y=0; y<height; y++){ + const uint8_t *d= dither[y]; + for(x=0; x<width; x+=8){ + int temp; + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + STORE(6); + STORE(7); + } + src+=src_stride; + dst+=dst_stride; + } +} + +//This func reads from 2 slices, 0 & 2 and clears 2-nd +static void store_slice2_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale) +{int y, x; +#define STORE2(pos) \ + temp= (src[x + pos] + src[x + pos + 16*src_stride] + (d[pos]>>log2_scale))>>(6-log2_scale); \ + src[x + pos + 16*src_stride]=0; \ + if(temp & 0x100) temp= ~(temp>>31); \ + dst[x + pos]= temp; + + for(y=0; y<height; y++){ + const uint8_t *d= dither[y]; + for(x=0; x<width; x+=8){ + int temp; + STORE2(0); + STORE2(1); + STORE2(2); + STORE2(3); + STORE2(4); + STORE2(5); + STORE2(6); + STORE2(7); + } + src+=src_stride; + dst+=dst_stride; + } +} + +static void mul_thrmat_c(struct vf_priv_s *p,int q) +{ + int a; + for(a=0;a<64;a++) + ((short*)p->threshold_mtx)[a]=q * ((short*)p->threshold_mtx_noq)[a];//ints faster in C +} + +static void column_fidct_c(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt); +static void row_idct_c(int16_t* workspace, + int16_t* output_adr, int output_stride, int cnt); +static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int cnt); + +//this is rather ugly, but there is no need for function pointers +#define store_slice_s store_slice_c +#define store_slice2_s store_slice2_c +#define mul_thrmat_s mul_thrmat_c +#define column_fidct_s column_fidct_c +#define row_idct_s row_idct_c +#define row_fdct_s row_fdct_c + +#else /* HAVE_MMX */ + +//This func reads from 1 slice, 1 and clears 0 & 1 +static void store_slice_mmx(uint8_t *dst, int16_t *src, long dst_stride, long src_stride, long width, long height, long log2_scale) +{ + const uint8_t *od=&dither[0][0]; + const uint8_t *end=&dither[height][0]; + width = (width+7)&~7; + dst_stride-=width; + //src_stride=(src_stride-width)*2; + __asm__ volatile( + "mov %5, %%"REG_d" \n\t" + "mov %6, %%"REG_S" \n\t" + "mov %7, %%"REG_D" \n\t" + "mov %1, %%"REG_a" \n\t" + "movd %%"REG_d", %%mm5 \n\t" + "xor $-1, %%"REG_d" \n\t" + "mov %%"REG_a", %%"REG_c" \n\t" + "add $7, %%"REG_d" \n\t" + "neg %%"REG_a" \n\t" + "sub %0, %%"REG_c" \n\t" + "add %%"REG_c", %%"REG_c" \n\t" + "movd %%"REG_d", %%mm2 \n\t" + "mov %%"REG_c", %1 \n\t" + "mov %2, %%"REG_d" \n\t" + "shl $4, %%"REG_a" \n\t" + + "2: \n\t" + "movq (%%"REG_d"), %%mm3 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pxor %%mm7, %%mm7 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm4 \n\t" + "mov %0, %%"REG_c" \n\t" + "psraw %%mm5, %%mm3 \n\t" + "psraw %%mm5, %%mm4 \n\t" + "1: \n\t" + "movq %%mm7, (%%"REG_S",%%"REG_a") \n\t" + "movq (%%"REG_S"), %%mm0 \n\t" + "movq 8(%%"REG_S"), %%mm1 \n\t" + + "movq %%mm7, 8(%%"REG_S",%%"REG_a") \n\t" + "paddw %%mm3, %%mm0 \n\t" + "paddw %%mm4, %%mm1 \n\t" + + "movq %%mm7, (%%"REG_S") \n\t" + "psraw %%mm2, %%mm0 \n\t" + "psraw %%mm2, %%mm1 \n\t" + + "movq %%mm7, 8(%%"REG_S") \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "add $16, %%"REG_S" \n\t" + + "movq %%mm0, (%%"REG_D") \n\t" + "add $8, %%"REG_D" \n\t" + "sub $8, %%"REG_c" \n\t" + "jg 1b \n\t" + "add %1, %%"REG_S" \n\t" + "add $8, %%"REG_d" \n\t" + "add %3, %%"REG_D" \n\t" + "cmp %4, %%"REG_d" \n\t" + "jl 2b \n\t" + + : + : "m" (width), "m" (src_stride), "erm" (od), "m" (dst_stride), "erm" (end), + "m" (log2_scale), "m" (src), "m" (dst) //input + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D + ); +} + +//This func reads from 2 slices, 0 & 2 and clears 2-nd +static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long src_stride, long width, long height, long log2_scale) +{ + const uint8_t *od=&dither[0][0]; + const uint8_t *end=&dither[height][0]; + width = (width+7)&~7; + dst_stride-=width; + //src_stride=(src_stride-width)*2; + __asm__ volatile( + "mov %5, %%"REG_d" \n\t" + "mov %6, %%"REG_S" \n\t" + "mov %7, %%"REG_D" \n\t" + "mov %1, %%"REG_a" \n\t" + "movd %%"REG_d", %%mm5 \n\t" + "xor $-1, %%"REG_d" \n\t" + "mov %%"REG_a", %%"REG_c" \n\t" + "add $7, %%"REG_d" \n\t" + "sub %0, %%"REG_c" \n\t" + "add %%"REG_c", %%"REG_c" \n\t" + "movd %%"REG_d", %%mm2 \n\t" + "mov %%"REG_c", %1 \n\t" + "mov %2, %%"REG_d" \n\t" + "shl $5, %%"REG_a" \n\t" + + "2: \n\t" + "movq (%%"REG_d"), %%mm3 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pxor %%mm7, %%mm7 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm4 \n\t" + "mov %0, %%"REG_c" \n\t" + "psraw %%mm5, %%mm3 \n\t" + "psraw %%mm5, %%mm4 \n\t" + "1: \n\t" + "movq (%%"REG_S"), %%mm0 \n\t" + "movq 8(%%"REG_S"), %%mm1 \n\t" + "paddw %%mm3, %%mm0 \n\t" + + "paddw (%%"REG_S",%%"REG_a"), %%mm0 \n\t" + "paddw %%mm4, %%mm1 \n\t" + "movq 8(%%"REG_S",%%"REG_a"), %%mm6 \n\t" + + "movq %%mm7, (%%"REG_S",%%"REG_a") \n\t" + "psraw %%mm2, %%mm0 \n\t" + "paddw %%mm6, %%mm1 \n\t" + + "movq %%mm7, 8(%%"REG_S",%%"REG_a") \n\t" + "psraw %%mm2, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + + "movq %%mm0, (%%"REG_D") \n\t" + "add $16, %%"REG_S" \n\t" + "add $8, %%"REG_D" \n\t" + "sub $8, %%"REG_c" \n\t" + "jg 1b \n\t" + "add %1, %%"REG_S" \n\t" + "add $8, %%"REG_d" \n\t" + "add %3, %%"REG_D" \n\t" + "cmp %4, %%"REG_d" \n\t" + "jl 2b \n\t" + + : + : "m" (width), "m" (src_stride), "erm" (od), "m" (dst_stride), "erm" (end), + "m" (log2_scale), "m" (src), "m" (dst) //input + : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_D, "%"REG_S + ); +} + +static void mul_thrmat_mmx(struct vf_priv_s *p, int q) +{ + uint64_t *adr=&p->threshold_mtx_noq[0]; + __asm__ volatile( + "movd %0, %%mm7 \n\t" + "add $8*8*2, %%"REG_D" \n\t" + "movq 0*8(%%"REG_S"), %%mm0 \n\t" + "punpcklwd %%mm7, %%mm7 \n\t" + "movq 1*8(%%"REG_S"), %%mm1 \n\t" + "punpckldq %%mm7, %%mm7 \n\t" + "pmullw %%mm7, %%mm0 \n\t" + + "movq 2*8(%%"REG_S"), %%mm2 \n\t" + "pmullw %%mm7, %%mm1 \n\t" + + "movq 3*8(%%"REG_S"), %%mm3 \n\t" + "pmullw %%mm7, %%mm2 \n\t" + + "movq %%mm0, 0*8(%%"REG_D") \n\t" + "movq 4*8(%%"REG_S"), %%mm4 \n\t" + "pmullw %%mm7, %%mm3 \n\t" + + "movq %%mm1, 1*8(%%"REG_D") \n\t" + "movq 5*8(%%"REG_S"), %%mm5 \n\t" + "pmullw %%mm7, %%mm4 \n\t" + + "movq %%mm2, 2*8(%%"REG_D") \n\t" + "movq 6*8(%%"REG_S"), %%mm6 \n\t" + "pmullw %%mm7, %%mm5 \n\t" + + "movq %%mm3, 3*8(%%"REG_D") \n\t" + "movq 7*8+0*8(%%"REG_S"), %%mm0 \n\t" + "pmullw %%mm7, %%mm6 \n\t" + + "movq %%mm4, 4*8(%%"REG_D") \n\t" + "movq 7*8+1*8(%%"REG_S"), %%mm1 \n\t" + "pmullw %%mm7, %%mm0 \n\t" + + "movq %%mm5, 5*8(%%"REG_D") \n\t" + "movq 7*8+2*8(%%"REG_S"), %%mm2 \n\t" + "pmullw %%mm7, %%mm1 \n\t" + + "movq %%mm6, 6*8(%%"REG_D") \n\t" + "movq 7*8+3*8(%%"REG_S"), %%mm3 \n\t" + "pmullw %%mm7, %%mm2 \n\t" + + "movq %%mm0, 7*8+0*8(%%"REG_D") \n\t" + "movq 7*8+4*8(%%"REG_S"), %%mm4 \n\t" + "pmullw %%mm7, %%mm3 \n\t" + + "movq %%mm1, 7*8+1*8(%%"REG_D") \n\t" + "movq 7*8+5*8(%%"REG_S"), %%mm5 \n\t" + "pmullw %%mm7, %%mm4 \n\t" + + "movq %%mm2, 7*8+2*8(%%"REG_D") \n\t" + "movq 7*8+6*8(%%"REG_S"), %%mm6 \n\t" + "pmullw %%mm7, %%mm5 \n\t" + + "movq %%mm3, 7*8+3*8(%%"REG_D") \n\t" + "movq 14*8+0*8(%%"REG_S"), %%mm0 \n\t" + "pmullw %%mm7, %%mm6 \n\t" + + "movq %%mm4, 7*8+4*8(%%"REG_D") \n\t" + "movq 14*8+1*8(%%"REG_S"), %%mm1 \n\t" + "pmullw %%mm7, %%mm0 \n\t" + + "movq %%mm5, 7*8+5*8(%%"REG_D") \n\t" + "pmullw %%mm7, %%mm1 \n\t" + + "movq %%mm6, 7*8+6*8(%%"REG_D") \n\t" + "movq %%mm0, 14*8+0*8(%%"REG_D") \n\t" + "movq %%mm1, 14*8+1*8(%%"REG_D") \n\t" + + : "+g" (q), "+S" (adr), "+D" (adr) + : + ); +} + +static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt); +static void row_idct_mmx(int16_t* workspace, + int16_t* output_adr, int output_stride, int cnt); +static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, int cnt); + +#define store_slice_s store_slice_mmx +#define store_slice2_s store_slice2_mmx +#define mul_thrmat_s mul_thrmat_mmx +#define column_fidct_s column_fidct_mmx +#define row_idct_s row_idct_mmx +#define row_fdct_s row_fdct_mmx +#endif // HAVE_MMX + +static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, + int dst_stride, int src_stride, + int width, int height, + uint8_t *qp_store, int qp_stride, int is_luma) +{ + int x, x0, y, es, qy, t; + const int stride= is_luma ? p->temp_stride : (width+16);//((width+16+15)&(~15)) + const int step=6-p->log2_count; + const int qps= 3 + is_luma; + DECLARE_ALIGNED(32, int32_t, block_align)[4*8*BLOCKSZ+ 4*8*BLOCKSZ]; + int16_t *block= (int16_t *)block_align; + int16_t *block3=(int16_t *)(block_align+4*8*BLOCKSZ); + + memset(block3, 0, 4*8*BLOCKSZ); + + //p->src=src-src_stride*8-8;//! + if (!src || !dst) return; // HACK avoid crash for Y8 colourspace + for(y=0; y<height; y++){ + int index= 8 + 8*stride + y*stride; + fast_memcpy(p->src + index, src + y*src_stride, width);//this line can be avoided by using DR & user fr.buffers + for(x=0; x<8; x++){ + p->src[index - x - 1]= p->src[index + x ]; + p->src[index + width + x ]= p->src[index + width - x - 1]; + } + } + for(y=0; y<8; y++){ + fast_memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride); + fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride); + } + //FIXME (try edge emu) + + for(y=8; y<24; y++) + memset(p->temp+ 8 +y*stride, 0,width*sizeof(int16_t)); + + for(y=step; y<height+8; y+=step){ //step= 1,2 + qy=y-4; + if (qy>height-1) qy=height-1; + if (qy<0) qy=0; + qy=(qy>>qps)*qp_stride; + row_fdct_s(block, p->src + y*stride +2-(y&1), stride, 2); + for(x0=0; x0<width+8-8*(BLOCKSZ-1); x0+=8*(BLOCKSZ-1)){ + row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, 2*(BLOCKSZ-1)); + if(p->qp) + column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+0*8, block3+0*8, 8*(BLOCKSZ-1)); //yes, this is a HOTSPOT + else + for (x=0; x<8*(BLOCKSZ-1); x+=8) { + t=x+x0-2; //correct t=x+x0-2-(y&1), but its the same + if (t<0) t=0;//t always < width-2 + t=qp_store[qy+(t>>qps)]; + t=norm_qscale(t, p->mpeg2); + if (t!=p->prev_q) p->prev_q=t, mul_thrmat_s(p, t); + column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+x*8, block3+x*8, 8); //yes, this is a HOTSPOT + } + row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, 2*(BLOCKSZ-1)); + memmove(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(int16_t)); //cycling + memmove(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(int16_t)); + } + // + es=width+8-x0; // 8, ... + if (es>8) + row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, (es-4)>>2); + column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block, block3, es&(~1)); + row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, es>>2); + {const int y1=y-8+step;//l5-7 l4-6 + if (!(y1&7) && y1) { + if (y1&8) store_slice_s(dst + (y1-8)*dst_stride, p->temp+ 8 +8*stride, + dst_stride, stride, width, 8, 5-p->log2_count); + else store_slice2_s(dst + (y1-8)*dst_stride, p->temp+ 8 +0*stride, + dst_stride, stride, width, 8, 5-p->log2_count); + } } + } + + if (y&7) { // == height & 7 + if (y&8) store_slice_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +8*stride, + dst_stride, stride, width, y&7, 5-p->log2_count); + else store_slice2_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +0*stride, + dst_stride, stride, width, y&7, 5-p->log2_count); + } +} + +static int config(struct vf_instance *vf, + int width, int height, int d_width, int d_height, + unsigned int flags, unsigned int outfmt) +{ + int h= (height+16+15)&(~15); + + vf->priv->temp_stride= (width+16+15)&(~15); + vf->priv->temp= (int16_t*)av_mallocz(vf->priv->temp_stride*3*8*sizeof(int16_t)); + //this can also be avoided, see above + vf->priv->src = (uint8_t*)av_malloc(vf->priv->temp_stride*h*sizeof(uint8_t)); + + return ff_vf_next_config(vf,width,height,d_width,d_height,flags,outfmt); +} + +static void get_image(struct vf_instance *vf, mp_image_t *mpi) +{ + if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change + // ok, we can do pp in-place (or pp disabled): + vf->dmpi=ff_vf_get_image(vf->next,mpi->imgfmt, + mpi->type, mpi->flags, mpi->width, mpi->height); + mpi->planes[0]=vf->dmpi->planes[0]; + mpi->stride[0]=vf->dmpi->stride[0]; + mpi->width=vf->dmpi->width; + if(mpi->flags&MP_IMGFLAG_PLANAR){ + mpi->planes[1]=vf->dmpi->planes[1]; + mpi->planes[2]=vf->dmpi->planes[2]; + mpi->stride[1]=vf->dmpi->stride[1]; + mpi->stride[2]=vf->dmpi->stride[2]; + } + mpi->flags|=MP_IMGFLAG_DIRECT; +} + +static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) +{ + mp_image_t *dmpi; + if(!(mpi->flags&MP_IMGFLAG_DIRECT)){ + // no DR, so get a new image! hope we'll get DR buffer: + dmpi=ff_vf_get_image(vf->next,mpi->imgfmt, + MP_IMGTYPE_TEMP, + MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE, + mpi->width,mpi->height); + ff_vf_clone_mpi_attributes(dmpi, mpi); + }else{ + dmpi=vf->dmpi; + } + + vf->priv->mpeg2= mpi->qscale_type; + if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){ + int w = mpi->qstride; + int h = (mpi->h + 15) >> 4; + if (!w) { + w = (mpi->w + 15) >> 4; + h = 1; + } + if(!vf->priv->non_b_qp) + vf->priv->non_b_qp= malloc(w*h); + fast_memcpy(vf->priv->non_b_qp, mpi->qscale, w*h); + } + if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){ + char *qp_tab= vf->priv->non_b_qp; + if(vf->priv->bframes || !qp_tab) + qp_tab= mpi->qscale; + + if(qp_tab || vf->priv->qp){ + filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0], + mpi->w, mpi->h, qp_tab, mpi->qstride, 1); + filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1], + mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0); + filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2], + mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0); + }else{ + memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]); + memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]); + memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]); + } + } + +#if HAVE_MMX + if(ff_gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t"); +#endif +#if HAVE_MMX2 + if(ff_gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t"); +#endif + return ff_vf_next_put_image(vf,dmpi, pts); +} + +static void uninit(struct vf_instance *vf) +{ + if(!vf->priv) return; + + av_free(vf->priv->temp); + vf->priv->temp= NULL; + av_free(vf->priv->src); + vf->priv->src= NULL; + //free(vf->priv->avctx); + //vf->priv->avctx= NULL; + free(vf->priv->non_b_qp); + vf->priv->non_b_qp= NULL; + + av_free(vf->priv); + vf->priv=NULL; +} + +//===========================================================================// + +static int query_format(struct vf_instance *vf, unsigned int fmt) +{ + switch(fmt){ + case IMGFMT_YVU9: + case IMGFMT_IF09: + case IMGFMT_YV12: + case IMGFMT_I420: + case IMGFMT_IYUV: + case IMGFMT_CLPL: + case IMGFMT_Y800: + case IMGFMT_Y8: + case IMGFMT_444P: + case IMGFMT_422P: + case IMGFMT_411P: + return ff_vf_next_query_format(vf,fmt); + } + return 0; +} + +static int control(struct vf_instance *vf, int request, void* data) +{ + switch(request){ + case VFCTRL_QUERY_MAX_PP_LEVEL: + return 5; + case VFCTRL_SET_PP_LEVEL: + vf->priv->log2_count= *((unsigned int*)data); + if (vf->priv->log2_count < 4) vf->priv->log2_count=4; + return CONTROL_TRUE; + } + return ff_vf_next_control(vf,request,data); +} + +static int vf_open(vf_instance_t *vf, char *args) +{ + int i=0, bias; + int custom_threshold_m[64]; + int log2c=-1; + + vf->config=config; + vf->put_image=put_image; + vf->get_image=get_image; + vf->query_format=query_format; + vf->uninit=uninit; + vf->control= control; + vf->priv=av_mallocz(sizeof(struct vf_priv_s));//assumes align 16 ! + + ff_init_avcodec(); + + //vf->priv->avctx= avcodec_alloc_context(); + //dsputil_init(&vf->priv->dsp, vf->priv->avctx); + + vf->priv->log2_count= 4; + vf->priv->bframes = 0; + + if (args) sscanf(args, "%d:%d:%d:%d", &log2c, &vf->priv->qp, &i, &vf->priv->bframes); + + if( log2c >=4 && log2c <=5 ) + vf->priv->log2_count = log2c; + else if( log2c >= 6 ) + vf->priv->log2_count = 5; + + if(vf->priv->qp < 0) + vf->priv->qp = 0; + + if (i < -15) i = -15; + if (i > 32) i = 32; + + bias= (1<<4)+i; //regulable + vf->priv->prev_q=0; + // + for(i=0;i<64;i++) //FIXME: tune custom_threshold[] and remove this ! + custom_threshold_m[i]=(int)(custom_threshold[i]*(bias/71.)+ 0.5); + for(i=0;i<8;i++){ + vf->priv->threshold_mtx_noq[2*i]=(uint64_t)custom_threshold_m[i*8+2] + |(((uint64_t)custom_threshold_m[i*8+6])<<16) + |(((uint64_t)custom_threshold_m[i*8+0])<<32) + |(((uint64_t)custom_threshold_m[i*8+4])<<48); + vf->priv->threshold_mtx_noq[2*i+1]=(uint64_t)custom_threshold_m[i*8+5] + |(((uint64_t)custom_threshold_m[i*8+3])<<16) + |(((uint64_t)custom_threshold_m[i*8+1])<<32) + |(((uint64_t)custom_threshold_m[i*8+7])<<48); + } + + if (vf->priv->qp) vf->priv->prev_q=vf->priv->qp, mul_thrmat_s(vf->priv, vf->priv->qp); + + return 1; +} + +const vf_info_t ff_vf_info_fspp = { + "fast simple postprocess", + "fspp", + "Michael Niedermayer, Nikolaj Poroshin", + "", + vf_open, + NULL +}; + +//==================================================================== +//Specific spp's dct, idct and threshold functions +//I'd prefer to have them in the separate file. + +//#define MANGLE(a) #a + +//typedef int16_t int16_t; //! only int16_t + +#define DCTSIZE 8 +#define DCTSIZE_S "8" + +#define FIX(x,s) ((int) ((x) * (1<<s) + 0.5)&0xffff) +#define C64(x) ((uint64_t)((x)|(x)<<16))<<32 | (uint64_t)(x) | (uint64_t)(x)<<16 +#define FIX64(x,s) C64(FIX(x,s)) + +#define MULTIPLY16H(x,k) (((x)*(k))>>16) +#define THRESHOLD(r,x,t) if(((unsigned)((x)+t))>t*2) r=(x);else r=0; +#define DESCALE(x,n) (((x) + (1 << ((n)-1))) >> n) + +#if HAVE_MMX + +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_382683433)=FIX64(0.382683433, 14); +DECLARE_ALIGNED(8, uint64_t, ff_MM_FIX_0_541196100)=FIX64(0.541196100, 14); +DECLARE_ALIGNED(8, uint64_t, ff_MM_FIX_0_707106781)=FIX64(0.707106781, 14); +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_306562965)=FIX64(1.306562965, 14); + +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_414213562_A)=FIX64(1.414213562, 14); + +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_847759065)=FIX64(1.847759065, 13); +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_2_613125930)=FIX64(-2.613125930, 13); //- +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_414213562)=FIX64(1.414213562, 13); +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_082392200)=FIX64(1.082392200, 13); +//for t3,t5,t7 == 0 shortcut +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_847759065)=FIX64(0.847759065, 14); +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_566454497)=FIX64(0.566454497, 14); +DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_198912367)=FIX64(0.198912367, 14); + +DECLARE_ASM_CONST(8, uint64_t, MM_DESCALE_RND)=C64(4); +DECLARE_ASM_CONST(8, uint64_t, MM_2)=C64(2); + +#else /* !HAVE_MMX */ + +typedef int32_t int_simd16_t; +static const int16_t FIX_0_382683433=FIX(0.382683433, 14); +static const int16_t FIX_0_541196100=FIX(0.541196100, 14); +static const int16_t FIX_0_707106781=FIX(0.707106781, 14); +static const int16_t FIX_1_306562965=FIX(1.306562965, 14); +static const int16_t FIX_1_414213562_A=FIX(1.414213562, 14); +static const int16_t FIX_1_847759065=FIX(1.847759065, 13); +static const int16_t FIX_2_613125930=FIX(-2.613125930, 13); //- +static const int16_t FIX_1_414213562=FIX(1.414213562, 13); +static const int16_t FIX_1_082392200=FIX(1.082392200, 13); + +#endif + +#if !HAVE_MMX + +static void column_fidct_c(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt) +{ + int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int_simd16_t tmp10, tmp11, tmp12, tmp13; + int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13; + int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7; + + int16_t* dataptr; + int16_t* wsptr; + int16_t *threshold; + int ctr; + + dataptr = data; + wsptr = output; + + for (; cnt > 0; cnt-=2) { //start positions + threshold=(int16_t*)thr_adr;//threshold_mtx + for (ctr = DCTSIZE; ctr > 0; ctr--) { + // Process columns from input, add to output. + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + // Even part of FDCT + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + d0 = tmp10 + tmp11; + d4 = tmp10 - tmp11; + + z1 = MULTIPLY16H((tmp12 + tmp13) <<2, FIX_0_707106781); + d2 = tmp13 + z1; + d6 = tmp13 - z1; + + // Even part of IDCT + + THRESHOLD(tmp0, d0, threshold[0*8]); + THRESHOLD(tmp1, d2, threshold[2*8]); + THRESHOLD(tmp2, d4, threshold[4*8]); + THRESHOLD(tmp3, d6, threshold[6*8]); + tmp0+=2; + tmp10 = (tmp0 + tmp2)>>2; + tmp11 = (tmp0 - tmp2)>>2; + + tmp13 = (tmp1 + tmp3)>>2; //+2 ! (psnr decides) + tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2 + + tmp0 = tmp10 + tmp13; //->temps + tmp3 = tmp10 - tmp13; //->temps + tmp1 = tmp11 + tmp12; //->temps + tmp2 = tmp11 - tmp12; //->temps + + // Odd part of FDCT + + tmp10 = tmp4 + tmp5; + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + z5 = MULTIPLY16H((tmp10 - tmp12)<<2, FIX_0_382683433); + z2 = MULTIPLY16H(tmp10 <<2, FIX_0_541196100) + z5; + z4 = MULTIPLY16H(tmp12 <<2, FIX_1_306562965) + z5; + z3 = MULTIPLY16H(tmp11 <<2, FIX_0_707106781); + + z11 = tmp7 + z3; + z13 = tmp7 - z3; + + d5 = z13 + z2; + d3 = z13 - z2; + d1 = z11 + z4; + d7 = z11 - z4; + + // Odd part of IDCT + + THRESHOLD(tmp4, d1, threshold[1*8]); + THRESHOLD(tmp5, d3, threshold[3*8]); + THRESHOLD(tmp6, d5, threshold[5*8]); + THRESHOLD(tmp7, d7, threshold[7*8]); + + //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0 + z13 = tmp6 + tmp5; + z10 = (tmp6 - tmp5)<<1; + z11 = tmp4 + tmp7; + z12 = (tmp4 - tmp7)<<1; + + tmp7 = (z11 + z13)>>2; //+2 ! + tmp11 = MULTIPLY16H((z11 - z13)<<1, FIX_1_414213562); + z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065); + tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5; + tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !! + + tmp6 = tmp12 - tmp7; + tmp5 = tmp11 - tmp6; + tmp4 = tmp10 + tmp5; + + wsptr[DCTSIZE*0]+= (tmp0 + tmp7); + wsptr[DCTSIZE*1]+= (tmp1 + tmp6); + wsptr[DCTSIZE*2]+= (tmp2 + tmp5); + wsptr[DCTSIZE*3]+= (tmp3 - tmp4); + wsptr[DCTSIZE*4]+= (tmp3 + tmp4); + wsptr[DCTSIZE*5]+= (tmp2 - tmp5); + wsptr[DCTSIZE*6]= (tmp1 - tmp6); + wsptr[DCTSIZE*7]= (tmp0 - tmp7); + // + dataptr++; //next column + wsptr++; + threshold++; + } + dataptr+=8; //skip each second start pos + wsptr +=8; + } +} + +#else /* HAVE_MMX */ + +static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt) +{ + DECLARE_ALIGNED(8, uint64_t, temps)[4]; + __asm__ volatile( + ASMALIGN(4) + "1: \n\t" + "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t" + // + "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t" + "movq %%mm1, %%mm0 \n\t" + + "paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0 + "movq %%mm7, %%mm3 \n\t" + + "paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3 + "movq %%mm1, %%mm5 \n\t" + + "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t" + "psubw %%mm7, %%mm1 \n\t" //t13 + + "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t" + "movq %%mm6, %%mm4 \n\t" + + "paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1 + "paddw %%mm7, %%mm5 \n\t" //t10 + + "paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2 + "movq %%mm6, %%mm7 \n\t" + + "paddw %%mm2, %%mm6 \n\t" //t11 + "psubw %%mm2, %%mm7 \n\t" //t12 + + "movq %%mm5, %%mm2 \n\t" + "paddw %%mm6, %%mm5 \n\t" //d0 + // i0 t13 t12 i3 i1 d0 - d4 + "psubw %%mm6, %%mm2 \n\t" //d4 + "paddw %%mm1, %%mm7 \n\t" + + "movq 4*16(%%"REG_d"), %%mm6 \n\t" + "psllw $2, %%mm7 \n\t" + + "psubw 0*16(%%"REG_d"), %%mm5 \n\t" + "psubw %%mm6, %%mm2 \n\t" + + "paddusw 0*16(%%"REG_d"), %%mm5 \n\t" + "paddusw %%mm6, %%mm2 \n\t" + + "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm7 \n\t" + // + "paddw 0*16(%%"REG_d"), %%mm5 \n\t" + "paddw %%mm6, %%mm2 \n\t" + + "psubusw 0*16(%%"REG_d"), %%mm5 \n\t" + "psubusw %%mm6, %%mm2 \n\t" + +//This func is totally compute-bound, operates at huge speed. So, DC shortcut +// at this place isn't worthwhile due to BTB miss penalty (checked on Pent. 3). +//However, typical numbers: nondc - 29%%, dc - 46%%, zero - 25%%. All <> 0 case is very rare. + "paddw "MANGLE(MM_2)", %%mm5 \n\t" + "movq %%mm2, %%mm6 \n\t" + + "paddw %%mm5, %%mm2 \n\t" + "psubw %%mm6, %%mm5 \n\t" + + "movq %%mm1, %%mm6 \n\t" + "paddw %%mm7, %%mm1 \n\t" //d2 + + "psubw 2*16(%%"REG_d"), %%mm1 \n\t" + "psubw %%mm7, %%mm6 \n\t" //d6 + + "movq 6*16(%%"REG_d"), %%mm7 \n\t" + "psraw $2, %%mm5 \n\t" + + "paddusw 2*16(%%"REG_d"), %%mm1 \n\t" + "psubw %%mm7, %%mm6 \n\t" + // t7 d2 /t11 t4 t6 - d6 /t10 + + "paddw 2*16(%%"REG_d"), %%mm1 \n\t" + "paddusw %%mm7, %%mm6 \n\t" + + "psubusw 2*16(%%"REG_d"), %%mm1 \n\t" + "paddw %%mm7, %%mm6 \n\t" + + "psubw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm3 \n\t" + "psubusw %%mm7, %%mm6 \n\t" + + //movq [edi+"DCTSIZE_S"*2*2], mm1 + //movq [edi+"DCTSIZE_S"*6*2], mm6 + "movq %%mm1, %%mm7 \n\t" + "psraw $2, %%mm2 \n\t" + + "psubw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm4 \n\t" + "psubw %%mm6, %%mm1 \n\t" + + "psubw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm0 \n\t" + "paddw %%mm7, %%mm6 \n\t" //'t13 + + "psraw $2, %%mm6 \n\t" //paddw mm6, MM_2 !! --- + "movq %%mm2, %%mm7 \n\t" + + "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t" + "paddw %%mm6, %%mm2 \n\t" //'t0 + + "movq %%mm2, 0*8+%3 \n\t" //! + "psubw %%mm6, %%mm7 \n\t" //'t3 + + "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t" + "psubw %%mm6, %%mm1 \n\t" //'t12 + + "psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5 + "movq %%mm5, %%mm6 \n\t" + + "movq %%mm7, 3*8+%3 \n\t" + "paddw %%mm2, %%mm3 \n\t" //t10 + + "paddw %%mm4, %%mm2 \n\t" //t11 + "paddw %%mm0, %%mm4 \n\t" //t12 + + "movq %%mm3, %%mm7 \n\t" + "psubw %%mm4, %%mm3 \n\t" + + "psllw $2, %%mm3 \n\t" + "psllw $2, %%mm7 \n\t" //opt for P6 + + "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t" + "psllw $2, %%mm4 \n\t" + + "pmulhw "MANGLE(ff_MM_FIX_0_541196100)", %%mm7 \n\t" + "psllw $2, %%mm2 \n\t" + + "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm4 \n\t" + "paddw %%mm1, %%mm5 \n\t" //'t1 + + "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm2 \n\t" + "psubw %%mm1, %%mm6 \n\t" //'t2 + // t7 't12 't11 t4 t6 - 't13 't10 --- + + "paddw %%mm3, %%mm7 \n\t" //z2 + + "movq %%mm5, 1*8+%3 \n\t" + "paddw %%mm3, %%mm4 \n\t" //z4 + + "movq 3*16(%%"REG_d"), %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + + "movq %%mm6, 2*8+%3 \n\t" + "psubw %%mm2, %%mm1 \n\t" //z13 + +//=== + "paddw %%mm2, %%mm0 \n\t" //z11 + "movq %%mm1, %%mm5 \n\t" + + "movq 5*16(%%"REG_d"), %%mm2 \n\t" + "psubw %%mm7, %%mm1 \n\t" //d3 + + "paddw %%mm7, %%mm5 \n\t" //d5 + "psubw %%mm3, %%mm1 \n\t" + + "movq 1*16(%%"REG_d"), %%mm7 \n\t" + "psubw %%mm2, %%mm5 \n\t" + + "movq %%mm0, %%mm6 \n\t" + "paddw %%mm4, %%mm0 \n\t" //d1 + + "paddusw %%mm3, %%mm1 \n\t" + "psubw %%mm4, %%mm6 \n\t" //d7 + + // d1 d3 - - - d5 d7 - + "movq 7*16(%%"REG_d"), %%mm4 \n\t" + "psubw %%mm7, %%mm0 \n\t" + + "psubw %%mm4, %%mm6 \n\t" + "paddusw %%mm2, %%mm5 \n\t" + + "paddusw %%mm4, %%mm6 \n\t" + "paddw %%mm3, %%mm1 \n\t" + + "paddw %%mm2, %%mm5 \n\t" + "paddw %%mm4, %%mm6 \n\t" + + "psubusw %%mm3, %%mm1 \n\t" + "psubusw %%mm2, %%mm5 \n\t" + + "psubusw %%mm4, %%mm6 \n\t" + "movq %%mm1, %%mm4 \n\t" + + "por %%mm5, %%mm4 \n\t" + "paddusw %%mm7, %%mm0 \n\t" + + "por %%mm6, %%mm4 \n\t" + "paddw %%mm7, %%mm0 \n\t" + + "packssdw %%mm4, %%mm4 \n\t" + "psubusw %%mm7, %%mm0 \n\t" + + "movd %%mm4, %%"REG_a" \n\t" + "or %%"REG_a", %%"REG_a" \n\t" + "jnz 2f \n\t" + //movq [edi+"DCTSIZE_S"*3*2], mm1 + //movq [edi+"DCTSIZE_S"*5*2], mm5 + //movq [edi+"DCTSIZE_S"*1*2], mm0 + //movq [edi+"DCTSIZE_S"*7*2], mm6 + // t4 t5 - - - t6 t7 - + //--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0 +//Typical numbers: nondc - 19%%, dc - 26%%, zero - 55%%. zero case alone isn't worthwhile + "movq 0*8+%3, %%mm4 \n\t" + "movq %%mm0, %%mm1 \n\t" + + "pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6 + "movq %%mm1, %%mm2 \n\t" + + "movq "DCTSIZE_S"*0*2(%%"REG_D"), %%mm5 \n\t" + "movq %%mm2, %%mm3 \n\t" + + "pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5 + "paddw %%mm4, %%mm5 \n\t" + + "movq 1*8+%3, %%mm6 \n\t" + //paddw mm3, MM_2 + "psraw $2, %%mm3 \n\t" //tmp7 + + "pmulhw "MANGLE(MM_FIX_0_198912367)", %%mm2 \n\t" //-tmp4 + "psubw %%mm3, %%mm4 \n\t" + + "movq "DCTSIZE_S"*1*2(%%"REG_D"), %%mm7 \n\t" + "paddw %%mm3, %%mm5 \n\t" + + "movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t" + "paddw %%mm6, %%mm7 \n\t" + + "movq 2*8+%3, %%mm3 \n\t" + "psubw %%mm0, %%mm6 \n\t" + + "movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t" + "paddw %%mm0, %%mm7 \n\t" + + "movq %%mm5, "DCTSIZE_S"*0*2(%%"REG_D") \n\t" + "paddw %%mm3, %%mm4 \n\t" + + "movq %%mm6, "DCTSIZE_S"*6*2(%%"REG_D") \n\t" + "psubw %%mm1, %%mm3 \n\t" + + "movq "DCTSIZE_S"*5*2(%%"REG_D"), %%mm5 \n\t" + "paddw %%mm1, %%mm4 \n\t" + + "movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t" + "paddw %%mm3, %%mm5 \n\t" + + "movq 3*8+%3, %%mm0 \n\t" + "add $8, %%"REG_S" \n\t" + + "movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t" + "paddw %%mm0, %%mm6 \n\t" + + "movq %%mm4, "DCTSIZE_S"*2*2(%%"REG_D") \n\t" + "psubw %%mm2, %%mm0 \n\t" + + "movq "DCTSIZE_S"*4*2(%%"REG_D"), %%mm7 \n\t" + "paddw %%mm2, %%mm6 \n\t" + + "movq %%mm5, "DCTSIZE_S"*5*2(%%"REG_D") \n\t" + "paddw %%mm0, %%mm7 \n\t" + + "movq %%mm6, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" + + "movq %%mm7, "DCTSIZE_S"*4*2(%%"REG_D") \n\t" + "add $8, %%"REG_D" \n\t" + "jmp 4f \n\t" + + "2: \n\t" + //--- non DC2 + //psraw mm1, 2 w/o it -> offset. thr1, thr1, thr1 (actually thr1, thr1, thr1-1) + //psraw mm5, 2 + //psraw mm0, 2 + //psraw mm6, 2 + "movq %%mm5, %%mm3 \n\t" + "psubw %%mm1, %%mm5 \n\t" + + "psllw $1, %%mm5 \n\t" //'z10 + "paddw %%mm1, %%mm3 \n\t" //'z13 + + "movq %%mm0, %%mm2 \n\t" + "psubw %%mm6, %%mm0 \n\t" + + "movq %%mm5, %%mm1 \n\t" + "psllw $1, %%mm0 \n\t" //'z12 + + "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm1 \n\t" //- + "paddw %%mm0, %%mm5 \n\t" + + "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm5 \n\t" //'z5 + "paddw %%mm6, %%mm2 \n\t" //'z11 + + "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm0 \n\t" + "movq %%mm2, %%mm7 \n\t" + + //--- + "movq 0*8+%3, %%mm4 \n\t" + "psubw %%mm3, %%mm2 \n\t" + + "psllw $1, %%mm2 \n\t" + "paddw %%mm3, %%mm7 \n\t" //'t7 + + "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //'t11 + "movq %%mm4, %%mm6 \n\t" + //paddw mm7, MM_2 + "psraw $2, %%mm7 \n\t" + + "paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t" + "psubw %%mm7, %%mm6 \n\t" + + "movq 1*8+%3, %%mm3 \n\t" + "paddw %%mm7, %%mm4 \n\t" + + "movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t" + "paddw %%mm5, %%mm1 \n\t" //'t12 + + "movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t" + "psubw %%mm7, %%mm1 \n\t" //'t6 + + "movq 2*8+%3, %%mm7 \n\t" + "psubw %%mm5, %%mm0 \n\t" //'t10 + + "movq 3*8+%3, %%mm6 \n\t" + "movq %%mm3, %%mm5 \n\t" + + "paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t" + "psubw %%mm1, %%mm5 \n\t" + + "psubw %%mm1, %%mm2 \n\t" //'t5 + "paddw %%mm1, %%mm3 \n\t" + + "movq %%mm5, "DCTSIZE_S"*6*2(%%"REG_D") \n\t" + "movq %%mm7, %%mm4 \n\t" + + "paddw "DCTSIZE_S"*2*2(%%"REG_D"), %%mm7 \n\t" + "psubw %%mm2, %%mm4 \n\t" + + "paddw "DCTSIZE_S"*5*2(%%"REG_D"), %%mm4 \n\t" + "paddw %%mm2, %%mm7 \n\t" + + "movq %%mm3, "DCTSIZE_S"*1*2(%%"REG_D") \n\t" + "paddw %%mm2, %%mm0 \n\t" //'t4 + + // 't4 't6 't5 - - - - 't7 + "movq %%mm7, "DCTSIZE_S"*2*2(%%"REG_D") \n\t" + "movq %%mm6, %%mm1 \n\t" + + "paddw "DCTSIZE_S"*4*2(%%"REG_D"), %%mm6 \n\t" + "psubw %%mm0, %%mm1 \n\t" + + "paddw "DCTSIZE_S"*3*2(%%"REG_D"), %%mm1 \n\t" + "paddw %%mm0, %%mm6 \n\t" + + "movq %%mm4, "DCTSIZE_S"*5*2(%%"REG_D") \n\t" + "add $8, %%"REG_S" \n\t" + + "movq %%mm6, "DCTSIZE_S"*4*2(%%"REG_D") \n\t" + + "movq %%mm1, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" + "add $8, %%"REG_D" \n\t" + + "4: \n\t" +//=part 2 (the same)=========================================================== + "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t" + // + "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t" + "movq %%mm1, %%mm0 \n\t" + + "paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0 + "movq %%mm7, %%mm3 \n\t" + + "paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3 + "movq %%mm1, %%mm5 \n\t" + + "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t" + "psubw %%mm7, %%mm1 \n\t" //t13 + + "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t" + "movq %%mm6, %%mm4 \n\t" + + "paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1 + "paddw %%mm7, %%mm5 \n\t" //t10 + + "paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2 + "movq %%mm6, %%mm7 \n\t" + + "paddw %%mm2, %%mm6 \n\t" //t11 + "psubw %%mm2, %%mm7 \n\t" //t12 + + "movq %%mm5, %%mm2 \n\t" + "paddw %%mm6, %%mm5 \n\t" //d0 + // i0 t13 t12 i3 i1 d0 - d4 + "psubw %%mm6, %%mm2 \n\t" //d4 + "paddw %%mm1, %%mm7 \n\t" + + "movq 1*8+4*16(%%"REG_d"), %%mm6 \n\t" + "psllw $2, %%mm7 \n\t" + + "psubw 1*8+0*16(%%"REG_d"), %%mm5 \n\t" + "psubw %%mm6, %%mm2 \n\t" + + "paddusw 1*8+0*16(%%"REG_d"), %%mm5 \n\t" + "paddusw %%mm6, %%mm2 \n\t" + + "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm7 \n\t" + // + "paddw 1*8+0*16(%%"REG_d"), %%mm5 \n\t" + "paddw %%mm6, %%mm2 \n\t" + + "psubusw 1*8+0*16(%%"REG_d"), %%mm5 \n\t" + "psubusw %%mm6, %%mm2 \n\t" + +//This func is totally compute-bound, operates at huge speed. So, DC shortcut +// at this place isn't worthwhile due to BTB miss penalty (checked on Pent. 3). +//However, typical numbers: nondc - 29%%, dc - 46%%, zero - 25%%. All <> 0 case is very rare. + "paddw "MANGLE(MM_2)", %%mm5 \n\t" + "movq %%mm2, %%mm6 \n\t" + + "paddw %%mm5, %%mm2 \n\t" + "psubw %%mm6, %%mm5 \n\t" + + "movq %%mm1, %%mm6 \n\t" + "paddw %%mm7, %%mm1 \n\t" //d2 + + "psubw 1*8+2*16(%%"REG_d"), %%mm1 \n\t" + "psubw %%mm7, %%mm6 \n\t" //d6 + + "movq 1*8+6*16(%%"REG_d"), %%mm7 \n\t" + "psraw $2, %%mm5 \n\t" + + "paddusw 1*8+2*16(%%"REG_d"), %%mm1 \n\t" + "psubw %%mm7, %%mm6 \n\t" + // t7 d2 /t11 t4 t6 - d6 /t10 + + "paddw 1*8+2*16(%%"REG_d"), %%mm1 \n\t" + "paddusw %%mm7, %%mm6 \n\t" + + "psubusw 1*8+2*16(%%"REG_d"), %%mm1 \n\t" + "paddw %%mm7, %%mm6 \n\t" + + "psubw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm3 \n\t" + "psubusw %%mm7, %%mm6 \n\t" + + //movq [edi+"DCTSIZE_S"*2*2], mm1 + //movq [edi+"DCTSIZE_S"*6*2], mm6 + "movq %%mm1, %%mm7 \n\t" + "psraw $2, %%mm2 \n\t" + + "psubw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm4 \n\t" + "psubw %%mm6, %%mm1 \n\t" + + "psubw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm0 \n\t" + "paddw %%mm7, %%mm6 \n\t" //'t13 + + "psraw $2, %%mm6 \n\t" //paddw mm6, MM_2 !! --- + "movq %%mm2, %%mm7 \n\t" + + "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t" + "paddw %%mm6, %%mm2 \n\t" //'t0 + + "movq %%mm2, 0*8+%3 \n\t" //! + "psubw %%mm6, %%mm7 \n\t" //'t3 + + "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t" + "psubw %%mm6, %%mm1 \n\t" //'t12 + + "psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5 + "movq %%mm5, %%mm6 \n\t" + + "movq %%mm7, 3*8+%3 \n\t" + "paddw %%mm2, %%mm3 \n\t" //t10 + + "paddw %%mm4, %%mm2 \n\t" //t11 + "paddw %%mm0, %%mm4 \n\t" //t12 + + "movq %%mm3, %%mm7 \n\t" + "psubw %%mm4, %%mm3 \n\t" + + "psllw $2, %%mm3 \n\t" + "psllw $2, %%mm7 \n\t" //opt for P6 + + "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t" + "psllw $2, %%mm4 \n\t" + + "pmulhw "MANGLE(ff_MM_FIX_0_541196100)", %%mm7 \n\t" + "psllw $2, %%mm2 \n\t" + + "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm4 \n\t" + "paddw %%mm1, %%mm5 \n\t" //'t1 + + "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm2 \n\t" + "psubw %%mm1, %%mm6 \n\t" //'t2 + // t7 't12 't11 t4 t6 - 't13 't10 --- + + "paddw %%mm3, %%mm7 \n\t" //z2 + + "movq %%mm5, 1*8+%3 \n\t" + "paddw %%mm3, %%mm4 \n\t" //z4 + + "movq 1*8+3*16(%%"REG_d"), %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + + "movq %%mm6, 2*8+%3 \n\t" + "psubw %%mm2, %%mm1 \n\t" //z13 + +//=== + "paddw %%mm2, %%mm0 \n\t" //z11 + "movq %%mm1, %%mm5 \n\t" + + "movq 1*8+5*16(%%"REG_d"), %%mm2 \n\t" + "psubw %%mm7, %%mm1 \n\t" //d3 + + "paddw %%mm7, %%mm5 \n\t" //d5 + "psubw %%mm3, %%mm1 \n\t" + + "movq 1*8+1*16(%%"REG_d"), %%mm7 \n\t" + "psubw %%mm2, %%mm5 \n\t" + + "movq %%mm0, %%mm6 \n\t" + "paddw %%mm4, %%mm0 \n\t" //d1 + + "paddusw %%mm3, %%mm1 \n\t" + "psubw %%mm4, %%mm6 \n\t" //d7 + + // d1 d3 - - - d5 d7 - + "movq 1*8+7*16(%%"REG_d"), %%mm4 \n\t" + "psubw %%mm7, %%mm0 \n\t" + + "psubw %%mm4, %%mm6 \n\t" + "paddusw %%mm2, %%mm5 \n\t" + + "paddusw %%mm4, %%mm6 \n\t" + "paddw %%mm3, %%mm1 \n\t" + + "paddw %%mm2, %%mm5 \n\t" + "paddw %%mm4, %%mm6 \n\t" + + "psubusw %%mm3, %%mm1 \n\t" + "psubusw %%mm2, %%mm5 \n\t" + + "psubusw %%mm4, %%mm6 \n\t" + "movq %%mm1, %%mm4 \n\t" + + "por %%mm5, %%mm4 \n\t" + "paddusw %%mm7, %%mm0 \n\t" + + "por %%mm6, %%mm4 \n\t" + "paddw %%mm7, %%mm0 \n\t" + + "packssdw %%mm4, %%mm4 \n\t" + "psubusw %%mm7, %%mm0 \n\t" + + "movd %%mm4, %%"REG_a" \n\t" + "or %%"REG_a", %%"REG_a" \n\t" + "jnz 3f \n\t" + //movq [edi+"DCTSIZE_S"*3*2], mm1 + //movq [edi+"DCTSIZE_S"*5*2], mm5 + //movq [edi+"DCTSIZE_S"*1*2], mm0 + //movq [edi+"DCTSIZE_S"*7*2], mm6 + // t4 t5 - - - t6 t7 - + //--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0 +//Typical numbers: nondc - 19%%, dc - 26%%, zero - 55%%. zero case alone isn't worthwhile + "movq 0*8+%3, %%mm4 \n\t" + "movq %%mm0, %%mm1 \n\t" + + "pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6 + "movq %%mm1, %%mm2 \n\t" + + "movq "DCTSIZE_S"*0*2(%%"REG_D"), %%mm5 \n\t" + "movq %%mm2, %%mm3 \n\t" + + "pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5 + "paddw %%mm4, %%mm5 \n\t" + + "movq 1*8+%3, %%mm6 \n\t" + //paddw mm3, MM_2 + "psraw $2, %%mm3 \n\t" //tmp7 + + "pmulhw "MANGLE(MM_FIX_0_198912367)", %%mm2 \n\t" //-tmp4 + "psubw %%mm3, %%mm4 \n\t" + + "movq "DCTSIZE_S"*1*2(%%"REG_D"), %%mm7 \n\t" + "paddw %%mm3, %%mm5 \n\t" + + "movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t" + "paddw %%mm6, %%mm7 \n\t" + + "movq 2*8+%3, %%mm3 \n\t" + "psubw %%mm0, %%mm6 \n\t" + + "movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t" + "paddw %%mm0, %%mm7 \n\t" + + "movq %%mm5, "DCTSIZE_S"*0*2(%%"REG_D") \n\t" + "paddw %%mm3, %%mm4 \n\t" + + "movq %%mm6, "DCTSIZE_S"*6*2(%%"REG_D") \n\t" + "psubw %%mm1, %%mm3 \n\t" + + "movq "DCTSIZE_S"*5*2(%%"REG_D"), %%mm5 \n\t" + "paddw %%mm1, %%mm4 \n\t" + + "movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t" + "paddw %%mm3, %%mm5 \n\t" + + "movq 3*8+%3, %%mm0 \n\t" + "add $24, %%"REG_S" \n\t" + + "movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t" + "paddw %%mm0, %%mm6 \n\t" + + "movq %%mm4, "DCTSIZE_S"*2*2(%%"REG_D") \n\t" + "psubw %%mm2, %%mm0 \n\t" + + "movq "DCTSIZE_S"*4*2(%%"REG_D"), %%mm7 \n\t" + "paddw %%mm2, %%mm6 \n\t" + + "movq %%mm5, "DCTSIZE_S"*5*2(%%"REG_D") \n\t" + "paddw %%mm0, %%mm7 \n\t" + + "movq %%mm6, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" + + "movq %%mm7, "DCTSIZE_S"*4*2(%%"REG_D") \n\t" + "add $24, %%"REG_D" \n\t" + "sub $2, %%"REG_c" \n\t" + "jnz 1b \n\t" + "jmp 5f \n\t" + + "3: \n\t" + //--- non DC2 + //psraw mm1, 2 w/o it -> offset. thr1, thr1, thr1 (actually thr1, thr1, thr1-1) + //psraw mm5, 2 + //psraw mm0, 2 + //psraw mm6, 2 + "movq %%mm5, %%mm3 \n\t" + "psubw %%mm1, %%mm5 \n\t" + + "psllw $1, %%mm5 \n\t" //'z10 + "paddw %%mm1, %%mm3 \n\t" //'z13 + + "movq %%mm0, %%mm2 \n\t" + "psubw %%mm6, %%mm0 \n\t" + + "movq %%mm5, %%mm1 \n\t" + "psllw $1, %%mm0 \n\t" //'z12 + + "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm1 \n\t" //- + "paddw %%mm0, %%mm5 \n\t" + + "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm5 \n\t" //'z5 + "paddw %%mm6, %%mm2 \n\t" //'z11 + + "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm0 \n\t" + "movq %%mm2, %%mm7 \n\t" + + //--- + "movq 0*8+%3, %%mm4 \n\t" + "psubw %%mm3, %%mm2 \n\t" + + "psllw $1, %%mm2 \n\t" + "paddw %%mm3, %%mm7 \n\t" //'t7 + + "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //'t11 + "movq %%mm4, %%mm6 \n\t" + //paddw mm7, MM_2 + "psraw $2, %%mm7 \n\t" + + "paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t" + "psubw %%mm7, %%mm6 \n\t" + + "movq 1*8+%3, %%mm3 \n\t" + "paddw %%mm7, %%mm4 \n\t" + + "movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t" + "paddw %%mm5, %%mm1 \n\t" //'t12 + + "movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t" + "psubw %%mm7, %%mm1 \n\t" //'t6 + + "movq 2*8+%3, %%mm7 \n\t" + "psubw %%mm5, %%mm0 \n\t" //'t10 + + "movq 3*8+%3, %%mm6 \n\t" + "movq %%mm3, %%mm5 \n\t" + + "paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t" + "psubw %%mm1, %%mm5 \n\t" + + "psubw %%mm1, %%mm2 \n\t" //'t5 + "paddw %%mm1, %%mm3 \n\t" + + "movq %%mm5, "DCTSIZE_S"*6*2(%%"REG_D") \n\t" + "movq %%mm7, %%mm4 \n\t" + + "paddw "DCTSIZE_S"*2*2(%%"REG_D"), %%mm7 \n\t" + "psubw %%mm2, %%mm4 \n\t" + + "paddw "DCTSIZE_S"*5*2(%%"REG_D"), %%mm4 \n\t" + "paddw %%mm2, %%mm7 \n\t" + + "movq %%mm3, "DCTSIZE_S"*1*2(%%"REG_D") \n\t" + "paddw %%mm2, %%mm0 \n\t" //'t4 + + // 't4 't6 't5 - - - - 't7 + "movq %%mm7, "DCTSIZE_S"*2*2(%%"REG_D") \n\t" + "movq %%mm6, %%mm1 \n\t" + + "paddw "DCTSIZE_S"*4*2(%%"REG_D"), %%mm6 \n\t" + "psubw %%mm0, %%mm1 \n\t" + + "paddw "DCTSIZE_S"*3*2(%%"REG_D"), %%mm1 \n\t" + "paddw %%mm0, %%mm6 \n\t" + + "movq %%mm4, "DCTSIZE_S"*5*2(%%"REG_D") \n\t" + "add $24, %%"REG_S" \n\t" + + "movq %%mm6, "DCTSIZE_S"*4*2(%%"REG_D") \n\t" + + "movq %%mm1, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" + "add $24, %%"REG_D" \n\t" + "sub $2, %%"REG_c" \n\t" + "jnz 1b \n\t" + "5: \n\t" + + : "+S"(data), "+D"(output), "+c"(cnt), "=o"(temps) + : "d"(thr_adr) + NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,MM_2,MM_FIX_1_414213562_A,MM_FIX_1_414213562,MM_FIX_0_382683433, + ff_MM_FIX_0_541196100,MM_FIX_1_306562965,MM_FIX_0_847759065) + NAMED_CONSTRAINTS_ADD(MM_FIX_0_566454497,MM_FIX_0_198912367,MM_FIX_2_613125930,MM_FIX_1_847759065, + MM_FIX_1_082392200) + : "%"REG_a + ); +} + +#endif // HAVE_MMX + +#if !HAVE_MMX + +static void row_idct_c(int16_t* workspace, + int16_t* output_adr, int output_stride, int cnt) +{ + int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int_simd16_t tmp10, tmp11, tmp12, tmp13; + int_simd16_t z5, z10, z11, z12, z13; + int16_t* outptr; + int16_t* wsptr; + + cnt*=4; + wsptr = workspace; + outptr = output_adr; + for (; cnt > 0; cnt--) { + // Even part + //Simd version reads 4x4 block and transposes it + tmp10 = ( wsptr[2] + wsptr[3]); + tmp11 = ( wsptr[2] - wsptr[3]); + + tmp13 = ( wsptr[0] + wsptr[1]); + tmp12 = (MULTIPLY16H( wsptr[0] - wsptr[1], FIX_1_414213562_A)<<2) - tmp13;//this shift order to avoid overflow + + tmp0 = tmp10 + tmp13; //->temps + tmp3 = tmp10 - tmp13; //->temps + tmp1 = tmp11 + tmp12; + tmp2 = tmp11 - tmp12; + + // Odd part + //Also transpose, with previous: + // ---- ---- |||| + // ---- ---- idct |||| + // ---- ---- ---> |||| + // ---- ---- |||| + z13 = wsptr[4] + wsptr[5]; + z10 = wsptr[4] - wsptr[5]; + z11 = wsptr[6] + wsptr[7]; + z12 = wsptr[6] - wsptr[7]; + + tmp7 = z11 + z13; + tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562); + + z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065); + tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5; + tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_ + + tmp6 = (tmp12<<3) - tmp7; + tmp5 = (tmp11<<3) - tmp6; + tmp4 = (tmp10<<3) + tmp5; + + // Final output stage: descale and write column + outptr[0*output_stride]+= DESCALE(tmp0 + tmp7, 3); + outptr[1*output_stride]+= DESCALE(tmp1 + tmp6, 3); + outptr[2*output_stride]+= DESCALE(tmp2 + tmp5, 3); + outptr[3*output_stride]+= DESCALE(tmp3 - tmp4, 3); + outptr[4*output_stride]+= DESCALE(tmp3 + tmp4, 3); + outptr[5*output_stride]+= DESCALE(tmp2 - tmp5, 3); + outptr[6*output_stride]+= DESCALE(tmp1 - tmp6, 3); //no += ? + outptr[7*output_stride]+= DESCALE(tmp0 - tmp7, 3); //no += ? + outptr++; + + wsptr += DCTSIZE; // advance pointer to next row + } +} + +#else /* HAVE_MMX */ + +static void row_idct_mmx (int16_t* workspace, + int16_t* output_adr, int output_stride, int cnt) +{ + DECLARE_ALIGNED(8, uint64_t, temps)[4]; + __asm__ volatile( + "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t" + "1: \n\t" + "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm0 \n\t" + // + + "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm1 \n\t" + "movq %%mm0, %%mm4 \n\t" + + "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t" + "punpcklwd %%mm1, %%mm0 \n\t" + + "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm3 \n\t" + "punpckhwd %%mm1, %%mm4 \n\t" + + //transpose 4x4 + "movq %%mm2, %%mm7 \n\t" + "punpcklwd %%mm3, %%mm2 \n\t" + + "movq %%mm0, %%mm6 \n\t" + "punpckldq %%mm2, %%mm0 \n\t" //0 + + "punpckhdq %%mm2, %%mm6 \n\t" //1 + "movq %%mm0, %%mm5 \n\t" + + "punpckhwd %%mm3, %%mm7 \n\t" + "psubw %%mm6, %%mm0 \n\t" + + "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm0 \n\t" + "movq %%mm4, %%mm2 \n\t" + + "punpckldq %%mm7, %%mm4 \n\t" //2 + "paddw %%mm6, %%mm5 \n\t" + + "punpckhdq %%mm7, %%mm2 \n\t" //3 + "movq %%mm4, %%mm1 \n\t" + + "psllw $2, %%mm0 \n\t" + "paddw %%mm2, %%mm4 \n\t" //t10 + + "movq "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_S"), %%mm3 \n\t" + "psubw %%mm2, %%mm1 \n\t" //t11 + + "movq "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_S"), %%mm2 \n\t" + "psubw %%mm5, %%mm0 \n\t" + + "movq %%mm4, %%mm6 \n\t" + "paddw %%mm5, %%mm4 \n\t" //t0 + + "psubw %%mm5, %%mm6 \n\t" //t3 + "movq %%mm1, %%mm7 \n\t" + + "movq "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_S"), %%mm5 \n\t" + "paddw %%mm0, %%mm1 \n\t" //t1 + + "movq %%mm4, 0*8+%3 \n\t" //t0 + "movq %%mm3, %%mm4 \n\t" + + "movq %%mm6, 1*8+%3 \n\t" //t3 + "punpcklwd %%mm2, %%mm3 \n\t" + + //transpose 4x4 + "movq "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_S"), %%mm6 \n\t" + "punpckhwd %%mm2, %%mm4 \n\t" + + "movq %%mm5, %%mm2 \n\t" + "punpcklwd %%mm6, %%mm5 \n\t" + + "psubw %%mm0, %%mm7 \n\t" //t2 + "punpckhwd %%mm6, %%mm2 \n\t" + + "movq %%mm3, %%mm0 \n\t" + "punpckldq %%mm5, %%mm3 \n\t" //4 + + "punpckhdq %%mm5, %%mm0 \n\t" //5 + "movq %%mm4, %%mm5 \n\t" + + // + "movq %%mm3, %%mm6 \n\t" + "punpckldq %%mm2, %%mm4 \n\t" //6 + + "psubw %%mm0, %%mm3 \n\t" //z10 + "punpckhdq %%mm2, %%mm5 \n\t" //7 + + "paddw %%mm0, %%mm6 \n\t" //z13 + "movq %%mm4, %%mm2 \n\t" + + "movq %%mm3, %%mm0 \n\t" + "psubw %%mm5, %%mm4 \n\t" //z12 + + "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm0 \n\t" //- + "paddw %%mm4, %%mm3 \n\t" + + "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm3 \n\t" //z5 + "paddw %%mm5, %%mm2 \n\t" //z11 > + + "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm4 \n\t" + "movq %%mm2, %%mm5 \n\t" + + "psubw %%mm6, %%mm2 \n\t" + "paddw %%mm6, %%mm5 \n\t" //t7 + + "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //t11 + "paddw %%mm3, %%mm0 \n\t" //t12 + + "psllw $3, %%mm0 \n\t" + "psubw %%mm3, %%mm4 \n\t" //t10 + + "movq 0*8+%3, %%mm6 \n\t" + "movq %%mm1, %%mm3 \n\t" + + "psllw $3, %%mm4 \n\t" + "psubw %%mm5, %%mm0 \n\t" //t6 + + "psllw $3, %%mm2 \n\t" + "paddw %%mm0, %%mm1 \n\t" //d1 + + "psubw %%mm0, %%mm2 \n\t" //t5 + "psubw %%mm0, %%mm3 \n\t" //d6 + + "paddw %%mm2, %%mm4 \n\t" //t4 + "movq %%mm7, %%mm0 \n\t" + + "paddw %%mm2, %%mm7 \n\t" //d2 + "psubw %%mm2, %%mm0 \n\t" //d5 + + "movq "MANGLE(MM_DESCALE_RND)", %%mm2 \n\t" //4 + "psubw %%mm5, %%mm6 \n\t" //d7 + + "paddw 0*8+%3, %%mm5 \n\t" //d0 + "paddw %%mm2, %%mm1 \n\t" + + "paddw %%mm2, %%mm5 \n\t" + "psraw $3, %%mm1 \n\t" + + "paddw %%mm2, %%mm7 \n\t" + "psraw $3, %%mm5 \n\t" + + "paddw (%%"REG_D"), %%mm5 \n\t" + "psraw $3, %%mm7 \n\t" + + "paddw (%%"REG_D",%%"REG_a"), %%mm1 \n\t" + "paddw %%mm2, %%mm0 \n\t" + + "paddw (%%"REG_D",%%"REG_a",2), %%mm7 \n\t" + "paddw %%mm2, %%mm3 \n\t" + + "movq %%mm5, (%%"REG_D") \n\t" + "paddw %%mm2, %%mm6 \n\t" + + "movq %%mm1, (%%"REG_D",%%"REG_a") \n\t" + "psraw $3, %%mm0 \n\t" + + "movq %%mm7, (%%"REG_D",%%"REG_a",2) \n\t" + "add %%"REG_d", %%"REG_D" \n\t" //3*ls + + "movq 1*8+%3, %%mm5 \n\t" //t3 + "psraw $3, %%mm3 \n\t" + + "paddw (%%"REG_D",%%"REG_a",2), %%mm0 \n\t" + "psubw %%mm4, %%mm5 \n\t" //d3 + + "paddw (%%"REG_D",%%"REG_d"), %%mm3 \n\t" + "psraw $3, %%mm6 \n\t" + + "paddw 1*8+%3, %%mm4 \n\t" //d4 + "paddw %%mm2, %%mm5 \n\t" + + "paddw (%%"REG_D",%%"REG_a",4), %%mm6 \n\t" + "paddw %%mm2, %%mm4 \n\t" + + "movq %%mm0, (%%"REG_D",%%"REG_a",2) \n\t" + "psraw $3, %%mm5 \n\t" + + "paddw (%%"REG_D"), %%mm5 \n\t" + "psraw $3, %%mm4 \n\t" + + "paddw (%%"REG_D",%%"REG_a"), %%mm4 \n\t" + "add $"DCTSIZE_S"*2*4, %%"REG_S" \n\t" //4 rows + + "movq %%mm3, (%%"REG_D",%%"REG_d") \n\t" + "movq %%mm6, (%%"REG_D",%%"REG_a",4) \n\t" + "movq %%mm5, (%%"REG_D") \n\t" + "movq %%mm4, (%%"REG_D",%%"REG_a") \n\t" + + "sub %%"REG_d", %%"REG_D" \n\t" + "add $8, %%"REG_D" \n\t" + "dec %%"REG_c" \n\t" + "jnz 1b \n\t" + + : "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=o"(temps) + : "a"(output_stride*sizeof(short)) + NAMED_CONSTRAINTS_ADD(MM_FIX_1_414213562_A,MM_FIX_2_613125930,MM_FIX_1_847759065,MM_FIX_1_082392200, + MM_FIX_1_414213562,MM_DESCALE_RND) + : "%"REG_d + ); +} + +#endif // HAVE_MMX + +#if !HAVE_MMX + +static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int cnt) +{ + int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int_simd16_t tmp10, tmp11, tmp12, tmp13; + int_simd16_t z1, z2, z3, z4, z5, z11, z13; + int16_t *dataptr; + + cnt*=4; + // Pass 1: process rows. + + dataptr = data; + for (; cnt > 0; cnt--) { + tmp0 = pixels[line_size*0] + pixels[line_size*7]; + tmp7 = pixels[line_size*0] - pixels[line_size*7]; + tmp1 = pixels[line_size*1] + pixels[line_size*6]; + tmp6 = pixels[line_size*1] - pixels[line_size*6]; + tmp2 = pixels[line_size*2] + pixels[line_size*5]; + tmp5 = pixels[line_size*2] - pixels[line_size*5]; + tmp3 = pixels[line_size*3] + pixels[line_size*4]; + tmp4 = pixels[line_size*3] - pixels[line_size*4]; + + // Even part + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + //Even columns are written first, this leads to different order of columns + //in column_fidct(), but they are processed independently, so all ok. + //Later in the row_idct() columns readed at the same order. + dataptr[2] = tmp10 + tmp11; + dataptr[3] = tmp10 - tmp11; + + z1 = MULTIPLY16H((tmp12 + tmp13)<<2, FIX_0_707106781); + dataptr[0] = tmp13 + z1; + dataptr[1] = tmp13 - z1; + + // Odd part + + tmp10 = (tmp4 + tmp5) <<2; + tmp11 = (tmp5 + tmp6) <<2; + tmp12 = (tmp6 + tmp7) <<2; + + z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433); + z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5; + z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5; + z3 = MULTIPLY16H(tmp11, FIX_0_707106781); + + z11 = tmp7 + z3; + z13 = tmp7 - z3; + + dataptr[4] = z13 + z2; + dataptr[5] = z13 - z2; + dataptr[6] = z11 + z4; + dataptr[7] = z11 - z4; + + pixels++; // advance pointer to next column + dataptr += DCTSIZE; + } +} + +#else /* HAVE_MMX */ + +static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, int cnt) +{ + DECLARE_ALIGNED(8, uint64_t, temps)[4]; + __asm__ volatile( + "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t" + "6: \n\t" + "movd (%%"REG_S"), %%mm0 \n\t" + "pxor %%mm7, %%mm7 \n\t" + + "movd (%%"REG_S",%%"REG_a"), %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + + "movd (%%"REG_S",%%"REG_a",2), %%mm2 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + + "punpcklbw %%mm7, %%mm2 \n\t" + "add %%"REG_d", %%"REG_S" \n\t" + + "movq %%mm0, %%mm5 \n\t" + // + + "movd (%%"REG_S",%%"REG_a",4), %%mm3 \n\t" //7 ;prefetch! + "movq %%mm1, %%mm6 \n\t" + + "movd (%%"REG_S",%%"REG_d"), %%mm4 \n\t" //6 + "punpcklbw %%mm7, %%mm3 \n\t" + + "psubw %%mm3, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + + "paddw %%mm3, %%mm0 \n\t" + "psubw %%mm4, %%mm6 \n\t" + + "movd (%%"REG_S",%%"REG_a",2), %%mm3 \n\t" //5 + "paddw %%mm4, %%mm1 \n\t" + + "movq %%mm5, %3 \n\t" //t7 + "punpcklbw %%mm7, %%mm3 \n\t" + + "movq %%mm6, %4 \n\t" //t6 + "movq %%mm2, %%mm4 \n\t" + + "movd (%%"REG_S"), %%mm5 \n\t" //3 + "paddw %%mm3, %%mm2 \n\t" + + "movd (%%"REG_S",%%"REG_a"), %%mm6 \n\t" //4 + "punpcklbw %%mm7, %%mm5 \n\t" + + "psubw %%mm3, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm6 \n\t" + + "movq %%mm5, %%mm3 \n\t" + "paddw %%mm6, %%mm5 \n\t" //t3 + + "psubw %%mm6, %%mm3 \n\t" //t4 ; t0 t1 t2 t4 t5 t3 - - + "movq %%mm0, %%mm6 \n\t" + + "movq %%mm1, %%mm7 \n\t" + "psubw %%mm5, %%mm0 \n\t" //t13 + + "psubw %%mm2, %%mm1 \n\t" + "paddw %%mm2, %%mm7 \n\t" //t11 + + "paddw %%mm0, %%mm1 \n\t" + "movq %%mm7, %%mm2 \n\t" + + "psllw $2, %%mm1 \n\t" + "paddw %%mm5, %%mm6 \n\t" //t10 + + "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm1 \n\t" + "paddw %%mm6, %%mm7 \n\t" //d2 + + "psubw %%mm2, %%mm6 \n\t" //d3 + "movq %%mm0, %%mm5 \n\t" + + //transpose 4x4 + "movq %%mm7, %%mm2 \n\t" + "punpcklwd %%mm6, %%mm7 \n\t" + + "paddw %%mm1, %%mm0 \n\t" //d0 + "punpckhwd %%mm6, %%mm2 \n\t" + + "psubw %%mm1, %%mm5 \n\t" //d1 + "movq %%mm0, %%mm6 \n\t" + + "movq %4, %%mm1 \n\t" + "punpcklwd %%mm5, %%mm0 \n\t" + + "punpckhwd %%mm5, %%mm6 \n\t" + "movq %%mm0, %%mm5 \n\t" + + "punpckldq %%mm7, %%mm0 \n\t" //0 + "paddw %%mm4, %%mm3 \n\t" + + "punpckhdq %%mm7, %%mm5 \n\t" //1 + "movq %%mm6, %%mm7 \n\t" + + "movq %%mm0, "DCTSIZE_S"*0*2(%%"REG_D") \n\t" + "punpckldq %%mm2, %%mm6 \n\t" //2 + + "movq %%mm5, "DCTSIZE_S"*1*2(%%"REG_D") \n\t" + "punpckhdq %%mm2, %%mm7 \n\t" //3 + + "movq %%mm6, "DCTSIZE_S"*2*2(%%"REG_D") \n\t" + "paddw %%mm1, %%mm4 \n\t" + + "movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" + "psllw $2, %%mm3 \n\t" //t10 + + "movq %3, %%mm2 \n\t" + "psllw $2, %%mm4 \n\t" //t11 + + "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm4 \n\t" //z3 + "paddw %%mm2, %%mm1 \n\t" + + "psllw $2, %%mm1 \n\t" //t12 + "movq %%mm3, %%mm0 \n\t" + + "pmulhw "MANGLE(ff_MM_FIX_0_541196100)", %%mm0 \n\t" + "psubw %%mm1, %%mm3 \n\t" + + "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t" //z5 + "movq %%mm2, %%mm5 \n\t" + + "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm1 \n\t" + "psubw %%mm4, %%mm2 \n\t" //z13 + + "paddw %%mm4, %%mm5 \n\t" //z11 + "movq %%mm2, %%mm6 \n\t" + + "paddw %%mm3, %%mm0 \n\t" //z2 + "movq %%mm5, %%mm7 \n\t" + + "paddw %%mm0, %%mm2 \n\t" //d4 + "psubw %%mm0, %%mm6 \n\t" //d5 + + "movq %%mm2, %%mm4 \n\t" + "paddw %%mm3, %%mm1 \n\t" //z4 + + //transpose 4x4 + "punpcklwd %%mm6, %%mm2 \n\t" + "paddw %%mm1, %%mm5 \n\t" //d6 + + "punpckhwd %%mm6, %%mm4 \n\t" + "psubw %%mm1, %%mm7 \n\t" //d7 + + "movq %%mm5, %%mm6 \n\t" + "punpcklwd %%mm7, %%mm5 \n\t" + + "punpckhwd %%mm7, %%mm6 \n\t" + "movq %%mm2, %%mm7 \n\t" + + "punpckldq %%mm5, %%mm2 \n\t" //4 + "sub %%"REG_d", %%"REG_S" \n\t" + + "punpckhdq %%mm5, %%mm7 \n\t" //5 + "movq %%mm4, %%mm5 \n\t" + + "movq %%mm2, "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_D") \n\t" + "punpckldq %%mm6, %%mm4 \n\t" //6 + + "movq %%mm7, "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_D") \n\t" + "punpckhdq %%mm6, %%mm5 \n\t" //7 + + "movq %%mm4, "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_D") \n\t" + "add $4, %%"REG_S" \n\t" + + "movq %%mm5, "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_D") \n\t" + "add $"DCTSIZE_S"*2*4, %%"REG_D" \n\t" //4 rows + "dec %%"REG_c" \n\t" + "jnz 6b \n\t" + + : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps), "=o"(temps[1]) + : "a"(line_size) + NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,ff_MM_FIX_0_541196100,MM_FIX_0_382683433,MM_FIX_1_306562965) + : "%"REG_d); +} + +#endif // HAVE_MMX diff --git a/libavfilter/libmpcodecs/vf_ilpack.c b/libavfilter/libmpcodecs/vf_ilpack.c new file mode 100644 index 0000000000..fbf5817062 --- /dev/null +++ b/libavfilter/libmpcodecs/vf_ilpack.c @@ -0,0 +1,458 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> + +#include "config.h" +#include "mp_msg.h" +#include "cpudetect.h" + +#include "img_format.h" +#include "mp_image.h" +#include "vf.h" +#include "libavutil/attributes.h" +#include "libavutil/x86/asm.h" + +typedef void (pack_func_t)(unsigned char *dst, unsigned char *y, + unsigned char *u, unsigned char *v, int w, int us, int vs); + +struct vf_priv_s { + int mode; + pack_func_t *pack[2]; +}; + +static void pack_nn_C(unsigned char *dst, unsigned char *y, + unsigned char *u, unsigned char *v, int w, + int av_unused us, int av_unused vs) +{ + int j; + for (j = w/2; j; j--) { + *dst++ = *y++; + *dst++ = *u++; + *dst++ = *y++; + *dst++ = *v++; + } +} + +static void pack_li_0_C(unsigned char *dst, unsigned char *y, + unsigned char *u, unsigned char *v, int w, int us, int vs) +{ + int j; + for (j = w/2; j; j--) { + *dst++ = *y++; + *dst++ = (u[us+us] + 7*u[0])>>3; + *dst++ = *y++; + *dst++ = (v[vs+vs] + 7*v[0])>>3; + u++; v++; + } +} + +static void pack_li_1_C(unsigned char *dst, unsigned char *y, + unsigned char *u, unsigned char *v, int w, int us, int vs) +{ + int j; + for (j = w/2; j; j--) { + *dst++ = *y++; + *dst++ = (3*u[us+us] + 5*u[0])>>3; + *dst++ = *y++; + *dst++ = (3*v[vs+vs] + 5*v[0])>>3; + u++; v++; + } +} + +#if HAVE_MMX +static void pack_nn_MMX(unsigned char *dst, unsigned char *y, + unsigned char *u, unsigned char *v, int w, + int av_unused us, int av_unused vs) +{ + __asm__ volatile ("" + ASMALIGN(4) + "1: \n\t" + "movq (%0), %%mm1 \n\t" + "movq (%0), %%mm2 \n\t" + "movq (%1), %%mm4 \n\t" + "movq (%2), %%mm6 \n\t" + "punpcklbw %%mm6, %%mm4 \n\t" + "punpcklbw %%mm4, %%mm1 \n\t" + "punpckhbw %%mm4, %%mm2 \n\t" + + "add $8, %0 \n\t" + "add $4, %1 \n\t" + "add $4, %2 \n\t" + "movq %%mm1, (%3) \n\t" + "movq %%mm2, 8(%3) \n\t" + "add $16, %3 \n\t" + "decl %4 \n\t" + "jnz 1b \n\t" + "emms \n\t" + : + : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8) + : "memory" + ); + pack_nn_C(dst, y, u, v, (w&7), 0, 0); +} + +#if HAVE_EBX_AVAILABLE +static void pack_li_0_MMX(unsigned char *dst, unsigned char *y, + unsigned char *u, unsigned char *v, int w, int us, int vs) +{ + __asm__ volatile ("" + "push %%"REG_BP" \n\t" +#if ARCH_X86_64 + "mov %6, %%"REG_BP" \n\t" +#else + "movl 4(%%"REG_d"), %%"REG_BP" \n\t" + "movl (%%"REG_d"), %%"REG_d" \n\t" +#endif + "pxor %%mm0, %%mm0 \n\t" + + ASMALIGN(4) + "2: \n\t" + "movq (%%"REG_S"), %%mm1 \n\t" + "movq (%%"REG_S"), %%mm2 \n\t" + + "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" + "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" + "punpcklbw %%mm0, %%mm4 \n\t" + "punpcklbw %%mm0, %%mm6 \n\t" + "movq (%%"REG_a"), %%mm3 \n\t" + "movq (%%"REG_b"), %%mm5 \n\t" + "punpcklbw %%mm0, %%mm3 \n\t" + "punpcklbw %%mm0, %%mm5 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "psrlw $3, %%mm4 \n\t" + "psrlw $3, %%mm6 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "packuswb %%mm6, %%mm6 \n\t" + "punpcklbw %%mm6, %%mm4 \n\t" + "punpcklbw %%mm4, %%mm1 \n\t" + "punpckhbw %%mm4, %%mm2 \n\t" + + "movq %%mm1, (%%"REG_D") \n\t" + "movq %%mm2, 8(%%"REG_D") \n\t" + + "movq 8(%%"REG_S"), %%mm1 \n\t" + "movq 8(%%"REG_S"), %%mm2 \n\t" + + "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" + "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" + "punpckhbw %%mm0, %%mm4 \n\t" + "punpckhbw %%mm0, %%mm6 \n\t" + "movq (%%"REG_a"), %%mm3 \n\t" + "movq (%%"REG_b"), %%mm5 \n\t" + "punpckhbw %%mm0, %%mm3 \n\t" + "punpckhbw %%mm0, %%mm5 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "psrlw $3, %%mm4 \n\t" + "psrlw $3, %%mm6 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "packuswb %%mm6, %%mm6 \n\t" + "punpcklbw %%mm6, %%mm4 \n\t" + "punpcklbw %%mm4, %%mm1 \n\t" + "punpckhbw %%mm4, %%mm2 \n\t" + + "add $16, %%"REG_S" \n\t" + "add $8, %%"REG_a" \n\t" + "add $8, %%"REG_b" \n\t" + + "movq %%mm1, 16(%%"REG_D") \n\t" + "movq %%mm2, 24(%%"REG_D") \n\t" + "add $32, %%"REG_D" \n\t" + + "decl %%ecx \n\t" + "jnz 2b \n\t" + "emms \n\t" + "pop %%"REG_BP" \n\t" + : + : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), +#if ARCH_X86_64 + "d" ((x86_reg)us), "r" ((x86_reg)vs) +#else + "d" (&us) +#endif + : "memory" + ); + pack_li_0_C(dst, y, u, v, (w&15), us, vs); +} + +static void pack_li_1_MMX(unsigned char *dst, unsigned char *y, + unsigned char *u, unsigned char *v, int w, int us, int vs) +{ + __asm__ volatile ("" + "push %%"REG_BP" \n\t" +#if ARCH_X86_64 + "mov %6, %%"REG_BP" \n\t" +#else + "movl 4(%%"REG_d"), %%"REG_BP" \n\t" + "movl (%%"REG_d"), %%"REG_d" \n\t" +#endif + "pxor %%mm0, %%mm0 \n\t" + + ASMALIGN(4) + "3: \n\t" + "movq (%%"REG_S"), %%mm1 \n\t" + "movq (%%"REG_S"), %%mm2 \n\t" + + "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" + "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" + "punpcklbw %%mm0, %%mm4 \n\t" + "punpcklbw %%mm0, %%mm6 \n\t" + "movq (%%"REG_a"), %%mm3 \n\t" + "movq (%%"REG_b"), %%mm5 \n\t" + "punpcklbw %%mm0, %%mm3 \n\t" + "punpcklbw %%mm0, %%mm5 \n\t" + "movq %%mm4, %%mm7 \n\t" + "paddw %%mm4, %%mm4 \n\t" + "paddw %%mm7, %%mm4 \n\t" + "movq %%mm6, %%mm7 \n\t" + "paddw %%mm6, %%mm6 \n\t" + "paddw %%mm7, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "psrlw $3, %%mm4 \n\t" + "psrlw $3, %%mm6 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "packuswb %%mm6, %%mm6 \n\t" + "punpcklbw %%mm6, %%mm4 \n\t" + "punpcklbw %%mm4, %%mm1 \n\t" + "punpckhbw %%mm4, %%mm2 \n\t" + + "movq %%mm1, (%%"REG_D") \n\t" + "movq %%mm2, 8(%%"REG_D") \n\t" + + "movq 8(%%"REG_S"), %%mm1 \n\t" + "movq 8(%%"REG_S"), %%mm2 \n\t" + + "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t" + "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t" + "punpckhbw %%mm0, %%mm4 \n\t" + "punpckhbw %%mm0, %%mm6 \n\t" + "movq (%%"REG_a"), %%mm3 \n\t" + "movq (%%"REG_b"), %%mm5 \n\t" + "punpckhbw %%mm0, %%mm3 \n\t" + "punpckhbw %%mm0, %%mm5 \n\t" + "movq %%mm4, %%mm7 \n\t" + "paddw %%mm4, %%mm4 \n\t" + "paddw %%mm7, %%mm4 \n\t" + "movq %%mm6, %%mm7 \n\t" + "paddw %%mm6, %%mm6 \n\t" + "paddw %%mm7, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "paddw %%mm3, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "psrlw $3, %%mm4 \n\t" + "psrlw $3, %%mm6 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "packuswb %%mm6, %%mm6 \n\t" + "punpcklbw %%mm6, %%mm4 \n\t" + "punpcklbw %%mm4, %%mm1 \n\t" + "punpckhbw %%mm4, %%mm2 \n\t" + + "add $16, %%"REG_S" \n\t" + "add $8, %%"REG_a" \n\t" + "add $8, %%"REG_b" \n\t" + + "movq %%mm1, 16(%%"REG_D") \n\t" + "movq %%mm2, 24(%%"REG_D") \n\t" + "add $32, %%"REG_D" \n\t" + + "decl %%ecx \n\t" + "jnz 3b \n\t" + "emms \n\t" + "pop %%"REG_BP" \n\t" + : + : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16), +#if ARCH_X86_64 + "d" ((x86_reg)us), "r" ((x86_reg)vs) +#else + "d" (&us) +#endif + : "memory" + ); + pack_li_1_C(dst, y, u, v, (w&15), us, vs); +} +#endif /* HAVE_EBX_AVAILABLE */ +#endif + +static pack_func_t *pack_nn; +static pack_func_t *pack_li_0; +static pack_func_t *pack_li_1; + +static void ilpack(unsigned char *dst, unsigned char *src[3], + int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2]) +{ + int i; + unsigned char *y, *u, *v; + int ys = srcstride[0], us = srcstride[1], vs = srcstride[2]; + int a, b; + + y = src[0]; + u = src[1]; + v = src[2]; + + pack_nn(dst, y, u, v, w, 0, 0); + y += ys; dst += dststride; + pack_nn(dst, y, u+us, v+vs, w, 0, 0); + y += ys; dst += dststride; + for (i=2; i<h-2; i++) { + a = (i&2) ? 1 : -1; + b = (i&1) ^ ((i&2)>>1); + pack[b](dst, y, u, v, w, us*a, vs*a); + y += ys; + if ((i&3) == 1) { + u -= us; + v -= vs; + } else { + u += us; + v += vs; + } + dst += dststride; + } + pack_nn(dst, y, u, v, w, 0, 0); + y += ys; dst += dststride; u += us; v += vs; + pack_nn(dst, y, u, v, w, 0, 0); +} + + +static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) +{ + mp_image_t *dmpi; + + // hope we'll get DR buffer: + dmpi=ff_vf_get_image(vf->next, IMGFMT_YUY2, + MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE, + mpi->w, mpi->h); + + ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack); + + return ff_vf_next_put_image(vf,dmpi, pts); +} + +static int config(struct vf_instance *vf, + int width, int height, int d_width, int d_height, + unsigned int flags, unsigned int outfmt) +{ + /* FIXME - also support UYVY output? */ + return ff_vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2); +} + + +static int query_format(struct vf_instance *vf, unsigned int fmt) +{ + /* FIXME - really any YUV 4:2:0 input format should work */ + switch (fmt) { + case IMGFMT_YV12: + case IMGFMT_IYUV: + case IMGFMT_I420: + return ff_vf_next_query_format(vf,IMGFMT_YUY2); + } + return 0; +} + +static int vf_open(vf_instance_t *vf, char *args) +{ + vf->config=config; + vf->query_format=query_format; + vf->put_image=put_image; + vf->priv = calloc(1, sizeof(struct vf_priv_s)); + vf->priv->mode = 1; + if (args) sscanf(args, "%d", &vf->priv->mode); + + pack_nn = pack_nn_C; + pack_li_0 = pack_li_0_C; + pack_li_1 = pack_li_1_C; +#if HAVE_MMX + if(ff_gCpuCaps.hasMMX) { + pack_nn = pack_nn_MMX; +#if HAVE_EBX_AVAILABLE + pack_li_0 = pack_li_0_MMX; + pack_li_1 = pack_li_1_MMX; +#endif + } +#endif + + switch(vf->priv->mode) { + case 0: + vf->priv->pack[0] = vf->priv->pack[1] = pack_nn; + break; + default: + ff_mp_msg(MSGT_VFILTER, MSGL_WARN, + "ilpack: unknown mode %d (fallback to linear)\n", + vf->priv->mode); + /* Fallthrough */ + case 1: + vf->priv->pack[0] = pack_li_0; + vf->priv->pack[1] = pack_li_1; + break; + } + + return 1; +} + +const vf_info_t ff_vf_info_ilpack = { + "4:2:0 planar -> 4:2:2 packed reinterlacer", + "ilpack", + "Richard Felker", + "", + vf_open, + NULL +}; diff --git a/libavfilter/libmpcodecs/vf_pp7.c b/libavfilter/libmpcodecs/vf_pp7.c new file mode 100644 index 0000000000..89ed4fe679 --- /dev/null +++ b/libavfilter/libmpcodecs/vf_pp7.c @@ -0,0 +1,491 @@ +/* + * Copyright (C) 2005 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <math.h> + +#include "config.h" + +#include "mp_msg.h" +#include "cpudetect.h" + +#if HAVE_MALLOC_H +#include <malloc.h> +#endif + +#include "libavutil/mem.h" + +#include "img_format.h" +#include "mp_image.h" +#include "vf.h" +#include "libvo/fastmemcpy.h" + +#define XMIN(a,b) ((a) < (b) ? (a) : (b)) +#define XMAX(a,b) ((a) > (b) ? (a) : (b)) + +//===========================================================================// +DECLARE_ALIGNED(8, static const uint8_t, dither)[8][8] = { +{ 0, 48, 12, 60, 3, 51, 15, 63, }, +{ 32, 16, 44, 28, 35, 19, 47, 31, }, +{ 8, 56, 4, 52, 11, 59, 7, 55, }, +{ 40, 24, 36, 20, 43, 27, 39, 23, }, +{ 2, 50, 14, 62, 1, 49, 13, 61, }, +{ 34, 18, 46, 30, 33, 17, 45, 29, }, +{ 10, 58, 6, 54, 9, 57, 5, 53, }, +{ 42, 26, 38, 22, 41, 25, 37, 21, }, +}; + +struct vf_priv_s { + int qp; + int mode; + int mpeg2; + int temp_stride; + uint8_t *src; +}; +#if 0 +static inline void dct7_c(int16_t *dst, int s0, int s1, int s2, int s3, int step){ + int s, d; + int dst2[64]; +//#define S0 (1024/0.37796447300922719759) +#define C0 ((int)(1024*0.37796447300922719759+0.5)) //sqrt(1/7) +#define C1 ((int)(1024*0.53452248382484879308/6+0.5)) //sqrt(2/7)/6 + +#define C2 ((int)(1024*0.45221175985034745004/2+0.5)) +#define C3 ((int)(1024*0.36264567479870879474/2+0.5)) + +//0.1962505182412941918 0.0149276808419397944-0.2111781990832339584 +#define C4 ((int)(1024*0.1962505182412941918+0.5)) +#define C5 ((int)(1024*0.0149276808419397944+0.5)) +//#define C6 ((int)(1024*0.2111781990832339584+0.5)) +#if 0 + s= s0 + s1 + s2; + dst[0*step] = ((s + s3)*C0 + 512) >> 10; + s= (s - 6*s3)*C1 + 512; + d= (s0-s2)*C4 + (s1-s2)*C5; + dst[1*step] = (s + 2*d)>>10; + s -= d; + d= (s1-s0)*C2 + (s1-s2)*C3; + dst[2*step] = (s + d)>>10; + dst[3*step] = (s - d)>>10; +#elif 1 + s = s3+s3; + s3= s-s0; + s0= s+s0; + s = s2+s1; + s2= s2-s1; + dst[0*step]= s0 + s; + dst[2*step]= s0 - s; + dst[1*step]= 2*s3 + s2; + dst[3*step]= s3 - 2*s2; +#else + int i,j,n=7; + for(i=0; i<7; i+=2){ + dst2[i*step/2]= 0; + for(j=0; j<4; j++) + dst2[i*step/2] += src[j*step] * cos(i*M_PI/n*(j+0.5)) * sqrt((i?2.0:1.0)/n); + if(fabs(dst2[i*step/2] - dst[i*step/2]) > 20) + printf("%d %d %d (%d %d %d %d) -> (%d %d %d %d)\n", i,dst2[i*step/2], dst[i*step/2],src[0*step], src[1*step], src[2*step], src[3*step], dst[0*step], dst[1*step],dst[2*step],dst[3*step]); + } +#endif +} +#endif + +static inline void dctA_c(int16_t *dst, uint8_t *src, int stride){ + int i; + + for(i=0; i<4; i++){ + int s0= src[0*stride] + src[6*stride]; + int s1= src[1*stride] + src[5*stride]; + int s2= src[2*stride] + src[4*stride]; + int s3= src[3*stride]; + int s= s3+s3; + s3= s-s0; + s0= s+s0; + s = s2+s1; + s2= s2-s1; + dst[0]= s0 + s; + dst[2]= s0 - s; + dst[1]= 2*s3 + s2; + dst[3]= s3 - 2*s2; + src++; + dst+=4; + } +} + +static void dctB_c(int16_t *dst, int16_t *src){ + int i; + + for(i=0; i<4; i++){ + int s0= src[0*4] + src[6*4]; + int s1= src[1*4] + src[5*4]; + int s2= src[2*4] + src[4*4]; + int s3= src[3*4]; + int s= s3+s3; + s3= s-s0; + s0= s+s0; + s = s2+s1; + s2= s2-s1; + dst[0*4]= s0 + s; + dst[2*4]= s0 - s; + dst[1*4]= 2*s3 + s2; + dst[3*4]= s3 - 2*s2; + src++; + dst++; + } +} + +#if HAVE_MMX +static void dctB_mmx(int16_t *dst, int16_t *src){ + __asm__ volatile ( + "movq (%0), %%mm0 \n\t" + "movq 1*4*2(%0), %%mm1 \n\t" + "paddw 6*4*2(%0), %%mm0 \n\t" + "paddw 5*4*2(%0), %%mm1 \n\t" + "movq 2*4*2(%0), %%mm2 \n\t" + "movq 3*4*2(%0), %%mm3 \n\t" + "paddw 4*4*2(%0), %%mm2 \n\t" + "paddw %%mm3, %%mm3 \n\t" //s + "movq %%mm3, %%mm4 \n\t" //s + "psubw %%mm0, %%mm3 \n\t" //s-s0 + "paddw %%mm0, %%mm4 \n\t" //s+s0 + "movq %%mm2, %%mm0 \n\t" //s2 + "psubw %%mm1, %%mm2 \n\t" //s2-s1 + "paddw %%mm1, %%mm0 \n\t" //s2+s1 + "movq %%mm4, %%mm1 \n\t" //s0' + "psubw %%mm0, %%mm4 \n\t" //s0'-s' + "paddw %%mm0, %%mm1 \n\t" //s0'+s' + "movq %%mm3, %%mm0 \n\t" //s3' + "psubw %%mm2, %%mm3 \n\t" + "psubw %%mm2, %%mm3 \n\t" + "paddw %%mm0, %%mm2 \n\t" + "paddw %%mm0, %%mm2 \n\t" + "movq %%mm1, (%1) \n\t" + "movq %%mm4, 2*4*2(%1) \n\t" + "movq %%mm2, 1*4*2(%1) \n\t" + "movq %%mm3, 3*4*2(%1) \n\t" + :: "r" (src), "r"(dst) + ); +} +#endif + +static void (*dctB)(int16_t *dst, int16_t *src)= dctB_c; + +#define N0 4 +#define N1 5 +#define N2 10 +#define SN0 2 +#define SN1 2.2360679775 +#define SN2 3.16227766017 +#define N (1<<16) + +static const int factor[16]={ + N/(N0*N0), N/(N0*N1), N/(N0*N0),N/(N0*N2), + N/(N1*N0), N/(N1*N1), N/(N1*N0),N/(N1*N2), + N/(N0*N0), N/(N0*N1), N/(N0*N0),N/(N0*N2), + N/(N2*N0), N/(N2*N1), N/(N2*N0),N/(N2*N2), +}; + +static const int thres[16]={ + N/(SN0*SN0), N/(SN0*SN2), N/(SN0*SN0),N/(SN0*SN2), + N/(SN2*SN0), N/(SN2*SN2), N/(SN2*SN0),N/(SN2*SN2), + N/(SN0*SN0), N/(SN0*SN2), N/(SN0*SN0),N/(SN0*SN2), + N/(SN2*SN0), N/(SN2*SN2), N/(SN2*SN0),N/(SN2*SN2), +}; + +static int thres2[99][16]; + +static void init_thres2(void){ + int qp, i; + int bias= 0; //FIXME + + for(qp=0; qp<99; qp++){ + for(i=0; i<16; i++){ + thres2[qp][i]= ((i&1)?SN2:SN0) * ((i&4)?SN2:SN0) * XMAX(1,qp) * (1<<2) - 1 - bias; + } + } +} + +static int hardthresh_c(int16_t *src, int qp){ + int i; + int a; + + a= src[0] * factor[0]; + for(i=1; i<16; i++){ + unsigned int threshold1= thres2[qp][i]; + unsigned int threshold2= (threshold1<<1); + int level= src[i]; + if(((unsigned)(level+threshold1))>threshold2){ + a += level * factor[i]; + } + } + return (a + (1<<11))>>12; +} + +static int mediumthresh_c(int16_t *src, int qp){ + int i; + int a; + + a= src[0] * factor[0]; + for(i=1; i<16; i++){ + unsigned int threshold1= thres2[qp][i]; + unsigned int threshold2= (threshold1<<1); + int level= src[i]; + if(((unsigned)(level+threshold1))>threshold2){ + if(((unsigned)(level+2*threshold1))>2*threshold2){ + a += level * factor[i]; + }else{ + if(level>0) a+= 2*(level - (int)threshold1)*factor[i]; + else a+= 2*(level + (int)threshold1)*factor[i]; + } + } + } + return (a + (1<<11))>>12; +} + +static int softthresh_c(int16_t *src, int qp){ + int i; + int a; + + a= src[0] * factor[0]; + for(i=1; i<16; i++){ + unsigned int threshold1= thres2[qp][i]; + unsigned int threshold2= (threshold1<<1); + int level= src[i]; + if(((unsigned)(level+threshold1))>threshold2){ + if(level>0) a+= (level - (int)threshold1)*factor[i]; + else a+= (level + (int)threshold1)*factor[i]; + } + } + return (a + (1<<11))>>12; +} + +static int (*requantize)(int16_t *src, int qp)= hardthresh_c; + +static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma){ + int x, y; + const int stride= is_luma ? p->temp_stride : ((width+16+15)&(~15)); + uint8_t *p_src= p->src + 8*stride; + int16_t *block= (int16_t *)p->src; + int16_t *temp= (int16_t *)(p->src + 32); + + if (!src || !dst) return; // HACK avoid crash for Y8 colourspace + for(y=0; y<height; y++){ + int index= 8 + 8*stride + y*stride; + fast_memcpy(p_src + index, src + y*src_stride, width); + for(x=0; x<8; x++){ + p_src[index - x - 1]= p_src[index + x ]; + p_src[index + width + x ]= p_src[index + width - x - 1]; + } + } + for(y=0; y<8; y++){ + fast_memcpy(p_src + ( 7-y)*stride, p_src + ( y+8)*stride, stride); + fast_memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride); + } + //FIXME (try edge emu) + + for(y=0; y<height; y++){ + for(x=-8; x<0; x+=4){ + const int index= x + y*stride + (8-3)*(1+stride) + 8; //FIXME silly offset + uint8_t *src = p_src + index; + int16_t *tp= temp+4*x; + + dctA_c(tp+4*8, src, stride); + } + for(x=0; x<width; ){ + const int qps= 3 + is_luma; + int qp; + int end= XMIN(x+8, width); + + if(p->qp) + qp= p->qp; + else{ + qp= qp_store[ (XMIN(x, width-1)>>qps) + (XMIN(y, height-1)>>qps) * qp_stride]; + qp=norm_qscale(qp, p->mpeg2); + } + for(; x<end; x++){ + const int index= x + y*stride + (8-3)*(1+stride) + 8; //FIXME silly offset + uint8_t *src = p_src + index; + int16_t *tp= temp+4*x; + int v; + + if((x&3)==0) + dctA_c(tp+4*8, src, stride); + + dctB(block, tp); + + v= requantize(block, qp); + v= (v + dither[y&7][x&7])>>6; + if((unsigned)v > 255) + v= (-v)>>31; + dst[x + y*dst_stride]= v; + } + } + } +} + +static int config(struct vf_instance *vf, + int width, int height, int d_width, int d_height, + unsigned int flags, unsigned int outfmt){ + int h= (height+16+15)&(~15); + + vf->priv->temp_stride= (width+16+15)&(~15); + vf->priv->src = av_malloc(vf->priv->temp_stride*(h+8)*sizeof(uint8_t)); + + return ff_vf_next_config(vf,width,height,d_width,d_height,flags,outfmt); +} + +static void get_image(struct vf_instance *vf, mp_image_t *mpi){ + if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change + // ok, we can do pp in-place (or pp disabled): + vf->dmpi=ff_vf_get_image(vf->next,mpi->imgfmt, + mpi->type, mpi->flags | MP_IMGFLAG_READABLE, mpi->width, mpi->height); + mpi->planes[0]=vf->dmpi->planes[0]; + mpi->stride[0]=vf->dmpi->stride[0]; + mpi->width=vf->dmpi->width; + if(mpi->flags&MP_IMGFLAG_PLANAR){ + mpi->planes[1]=vf->dmpi->planes[1]; + mpi->planes[2]=vf->dmpi->planes[2]; + mpi->stride[1]=vf->dmpi->stride[1]; + mpi->stride[2]=vf->dmpi->stride[2]; + } + mpi->flags|=MP_IMGFLAG_DIRECT; +} + +static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){ + mp_image_t *dmpi; + + if(mpi->flags&MP_IMGFLAG_DIRECT){ + dmpi=vf->dmpi; + }else{ + // no DR, so get a new image! hope we'll get DR buffer: + dmpi=ff_vf_get_image(vf->next,mpi->imgfmt, + MP_IMGTYPE_TEMP, + MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE, + mpi->width,mpi->height); + ff_vf_clone_mpi_attributes(dmpi, mpi); + } + + vf->priv->mpeg2= mpi->qscale_type; + if(mpi->qscale || vf->priv->qp){ + filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0], mpi->w, mpi->h, mpi->qscale, mpi->qstride, 1); + filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, mpi->qscale, mpi->qstride, 0); + filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, mpi->qscale, mpi->qstride, 0); + }else{ + memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]); + memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]); + memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]); + } + +#if HAVE_MMX + if(ff_gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t"); +#endif +#if HAVE_MMX2 + if(ff_gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t"); +#endif + + return ff_vf_next_put_image(vf,dmpi, pts); +} + +static void uninit(struct vf_instance *vf){ + if(!vf->priv) return; + + av_free(vf->priv->src); + vf->priv->src= NULL; + + free(vf->priv); + vf->priv=NULL; +} + +//===========================================================================// +static int query_format(struct vf_instance *vf, unsigned int fmt){ + switch(fmt){ + case IMGFMT_YVU9: + case IMGFMT_IF09: + case IMGFMT_YV12: + case IMGFMT_I420: + case IMGFMT_IYUV: + case IMGFMT_CLPL: + case IMGFMT_Y800: + case IMGFMT_Y8: + case IMGFMT_444P: + case IMGFMT_422P: + case IMGFMT_411P: + return ff_vf_next_query_format(vf,fmt); + } + return 0; +} + +static int control(struct vf_instance *vf, int request, void* data){ + return ff_vf_next_control(vf,request,data); +} + +static int vf_open(vf_instance_t *vf, char *args){ + vf->config=config; + vf->put_image=put_image; + vf->get_image=get_image; + vf->query_format=query_format; + vf->uninit=uninit; + vf->control= control; + vf->priv=malloc(sizeof(struct vf_priv_s)); + memset(vf->priv, 0, sizeof(struct vf_priv_s)); + + if (args) sscanf(args, "%d:%d", &vf->priv->qp, &vf->priv->mode); + + if(vf->priv->qp < 0) + vf->priv->qp = 0; + + init_thres2(); + + switch(vf->priv->mode){ + case 0: requantize= hardthresh_c; break; + case 1: requantize= softthresh_c; break; + default: + case 2: requantize= mediumthresh_c; break; + } + +#if HAVE_MMX + if(ff_gCpuCaps.hasMMX){ + dctB= dctB_mmx; + } +#endif +#if 0 + if(ff_gCpuCaps.hasMMX){ + switch(vf->priv->mode){ + case 0: requantize= hardthresh_mmx; break; + case 1: requantize= softthresh_mmx; break; + } + } +#endif + + return 1; +} + +const vf_info_t ff_vf_info_pp7 = { + "postprocess 7", + "pp7", + "Michael Niedermayer", + "", + vf_open, + NULL +}; diff --git a/libavfilter/libmpcodecs/vf_softpulldown.c b/libavfilter/libmpcodecs/vf_softpulldown.c new file mode 100644 index 0000000000..556374eb06 --- /dev/null +++ b/libavfilter/libmpcodecs/vf_softpulldown.c @@ -0,0 +1,163 @@ +/* + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "config.h" +#include "mp_msg.h" + +#include "img_format.h" +#include "mp_image.h" +#include "vf.h" + +#include "libvo/fastmemcpy.h" + +struct vf_priv_s { + int state; + long long in; + long long out; +}; + +static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) +{ + mp_image_t *dmpi; + int ret = 0; + int flags = mpi->fields; + int state = vf->priv->state; + + dmpi = ff_vf_get_image(vf->next, mpi->imgfmt, + MP_IMGTYPE_STATIC, MP_IMGFLAG_ACCEPT_STRIDE | + MP_IMGFLAG_PRESERVE, mpi->width, mpi->height); + + vf->priv->in++; + + if ((state == 0 && + !(flags & MP_IMGFIELD_TOP_FIRST)) || + (state == 1 && + flags & MP_IMGFIELD_TOP_FIRST)) { + ff_mp_msg(MSGT_VFILTER, MSGL_WARN, + "softpulldown: Unexpected field flags: state=%d top_field_first=%d repeat_first_field=%d\n", + state, + (flags & MP_IMGFIELD_TOP_FIRST) != 0, + (flags & MP_IMGFIELD_REPEAT_FIRST) != 0); + state ^= 1; + } + + if (state == 0) { + ret = ff_vf_next_put_image(vf, mpi, MP_NOPTS_VALUE); + vf->priv->out++; + if (flags & MP_IMGFIELD_REPEAT_FIRST) { + my_memcpy_pic(dmpi->planes[0], + mpi->planes[0], mpi->w, mpi->h/2, + dmpi->stride[0]*2, mpi->stride[0]*2); + if (mpi->flags & MP_IMGFLAG_PLANAR) { + my_memcpy_pic(dmpi->planes[1], + mpi->planes[1], + mpi->chroma_width, + mpi->chroma_height/2, + dmpi->stride[1]*2, + mpi->stride[1]*2); + my_memcpy_pic(dmpi->planes[2], + mpi->planes[2], + mpi->chroma_width, + mpi->chroma_height/2, + dmpi->stride[2]*2, + mpi->stride[2]*2); + } + state=1; + } + } else { + my_memcpy_pic(dmpi->planes[0]+dmpi->stride[0], + mpi->planes[0]+mpi->stride[0], mpi->w, mpi->h/2, + dmpi->stride[0]*2, mpi->stride[0]*2); + if (mpi->flags & MP_IMGFLAG_PLANAR) { + my_memcpy_pic(dmpi->planes[1]+dmpi->stride[1], + mpi->planes[1]+mpi->stride[1], + mpi->chroma_width, mpi->chroma_height/2, + dmpi->stride[1]*2, mpi->stride[1]*2); + my_memcpy_pic(dmpi->planes[2]+dmpi->stride[2], + mpi->planes[2]+mpi->stride[2], + mpi->chroma_width, mpi->chroma_height/2, + dmpi->stride[2]*2, mpi->stride[2]*2); + } + ret = ff_vf_next_put_image(vf, dmpi, MP_NOPTS_VALUE); + vf->priv->out++; + if (flags & MP_IMGFIELD_REPEAT_FIRST) { + ret |= ff_vf_next_put_image(vf, mpi, MP_NOPTS_VALUE); + vf->priv->out++; + state=0; + } else { + my_memcpy_pic(dmpi->planes[0], + mpi->planes[0], mpi->w, mpi->h/2, + dmpi->stride[0]*2, mpi->stride[0]*2); + if (mpi->flags & MP_IMGFLAG_PLANAR) { + my_memcpy_pic(dmpi->planes[1], + mpi->planes[1], + mpi->chroma_width, + mpi->chroma_height/2, + dmpi->stride[1]*2, + mpi->stride[1]*2); + my_memcpy_pic(dmpi->planes[2], + mpi->planes[2], + mpi->chroma_width, + mpi->chroma_height/2, + dmpi->stride[2]*2, + mpi->stride[2]*2); + } + } + } + + vf->priv->state = state; + + return ret; +} + +static int config(struct vf_instance *vf, + int width, int height, int d_width, int d_height, + unsigned int flags, unsigned int outfmt) +{ + return ff_vf_next_config(vf,width,height,d_width,d_height,flags,outfmt); +} + +static void uninit(struct vf_instance *vf) +{ + ff_mp_msg(MSGT_VFILTER, MSGL_INFO, "softpulldown: %lld frames in, %lld frames out\n", vf->priv->in, vf->priv->out); + free(vf->priv); +} + +static int vf_open(vf_instance_t *vf, char *args) +{ + vf->config = config; + vf->put_image = put_image; + vf->uninit = uninit; + vf->default_reqs = VFCAP_ACCEPT_STRIDE; + vf->priv = calloc(1, sizeof(struct vf_priv_s)); + vf->priv->state = 0; + return 1; +} + +const vf_info_t ff_vf_info_softpulldown = { + "mpeg2 soft 3:2 pulldown", + "softpulldown", + "Tobias Diedrich <ranma+mplayer@tdiedrich.de>", + "", + vf_open, + NULL +}; diff --git a/libavfilter/libmpcodecs/vf_uspp.c b/libavfilter/libmpcodecs/vf_uspp.c new file mode 100644 index 0000000000..c9d9c1fd16 --- /dev/null +++ b/libavfilter/libmpcodecs/vf_uspp.c @@ -0,0 +1,394 @@ +/* + * Copyright (C) 2005 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <math.h> +#include <assert.h> + +#include "config.h" + +#include "mp_msg.h" +#include "cpudetect.h" + +#include "libavutil/mem.h" +#include "libavcodec/avcodec.h" + +#include "img_format.h" +#include "mp_image.h" +#include "vf.h" +#include "av_helpers.h" +#include "libvo/fastmemcpy.h" + +#define XMIN(a,b) ((a) < (b) ? (a) : (b)) + +#define BLOCK 16 + +//===========================================================================// +DECLARE_ALIGNED(8, static const uint8_t, dither)[8][8] = { +{ 0*4, 48*4, 12*4, 60*4, 3*4, 51*4, 15*4, 63*4, }, +{ 32*4, 16*4, 44*4, 28*4, 35*4, 19*4, 47*4, 31*4, }, +{ 8*4, 56*4, 4*4, 52*4, 11*4, 59*4, 7*4, 55*4, }, +{ 40*4, 24*4, 36*4, 20*4, 43*4, 27*4, 39*4, 23*4, }, +{ 2*4, 50*4, 14*4, 62*4, 1*4, 49*4, 13*4, 61*4, }, +{ 34*4, 18*4, 46*4, 30*4, 33*4, 17*4, 45*4, 29*4, }, +{ 10*4, 58*4, 6*4, 54*4, 9*4, 57*4, 5*4, 53*4, }, +{ 42*4, 26*4, 38*4, 22*4, 41*4, 25*4, 37*4, 21*4, }, +}; + +static const uint8_t offset[511][2]= { +{ 0, 0}, +{ 0, 0}, { 8, 8}, +{ 0, 0}, { 4, 4}, {12, 8}, { 8,12}, +{ 0, 0}, {10, 2}, { 4, 4}, {14, 6}, { 8, 8}, { 2,10}, {12,12}, { 6,14}, + +{ 0, 0}, {10, 2}, { 4, 4}, {14, 6}, { 8, 8}, { 2,10}, {12,12}, { 6,14}, +{ 5, 1}, {15, 3}, { 9, 5}, { 3, 7}, {13, 9}, { 7,11}, { 1,13}, {11,15}, + +{ 0, 0}, { 8, 0}, { 0, 8}, { 8, 8}, { 5, 1}, {13, 1}, { 5, 9}, {13, 9}, +{ 2, 2}, {10, 2}, { 2,10}, {10,10}, { 7, 3}, {15, 3}, { 7,11}, {15,11}, +{ 4, 4}, {12, 4}, { 4,12}, {12,12}, { 1, 5}, { 9, 5}, { 1,13}, { 9,13}, +{ 6, 6}, {14, 6}, { 6,14}, {14,14}, { 3, 7}, {11, 7}, { 3,15}, {11,15}, + +{ 0, 0}, { 8, 0}, { 0, 8}, { 8, 8}, { 4, 0}, {12, 0}, { 4, 8}, {12, 8}, +{ 1, 1}, { 9, 1}, { 1, 9}, { 9, 9}, { 5, 1}, {13, 1}, { 5, 9}, {13, 9}, +{ 3, 2}, {11, 2}, { 3,10}, {11,10}, { 7, 2}, {15, 2}, { 7,10}, {15,10}, +{ 2, 3}, {10, 3}, { 2,11}, {10,11}, { 6, 3}, {14, 3}, { 6,11}, {14,11}, +{ 0, 4}, { 8, 4}, { 0,12}, { 8,12}, { 4, 4}, {12, 4}, { 4,12}, {12,12}, +{ 1, 5}, { 9, 5}, { 1,13}, { 9,13}, { 5, 5}, {13, 5}, { 5,13}, {13,13}, +{ 3, 6}, {11, 6}, { 3,14}, {11,14}, { 7, 6}, {15, 6}, { 7,14}, {15,14}, +{ 2, 7}, {10, 7}, { 2,15}, {10,15}, { 6, 7}, {14, 7}, { 6,15}, {14,15}, + +{ 0, 0}, { 8, 0}, { 0, 8}, { 8, 8}, { 0, 2}, { 8, 2}, { 0,10}, { 8,10}, +{ 0, 4}, { 8, 4}, { 0,12}, { 8,12}, { 0, 6}, { 8, 6}, { 0,14}, { 8,14}, +{ 1, 1}, { 9, 1}, { 1, 9}, { 9, 9}, { 1, 3}, { 9, 3}, { 1,11}, { 9,11}, +{ 1, 5}, { 9, 5}, { 1,13}, { 9,13}, { 1, 7}, { 9, 7}, { 1,15}, { 9,15}, +{ 2, 0}, {10, 0}, { 2, 8}, {10, 8}, { 2, 2}, {10, 2}, { 2,10}, {10,10}, +{ 2, 4}, {10, 4}, { 2,12}, {10,12}, { 2, 6}, {10, 6}, { 2,14}, {10,14}, +{ 3, 1}, {11, 1}, { 3, 9}, {11, 9}, { 3, 3}, {11, 3}, { 3,11}, {11,11}, +{ 3, 5}, {11, 5}, { 3,13}, {11,13}, { 3, 7}, {11, 7}, { 3,15}, {11,15}, +{ 4, 0}, {12, 0}, { 4, 8}, {12, 8}, { 4, 2}, {12, 2}, { 4,10}, {12,10}, +{ 4, 4}, {12, 4}, { 4,12}, {12,12}, { 4, 6}, {12, 6}, { 4,14}, {12,14}, +{ 5, 1}, {13, 1}, { 5, 9}, {13, 9}, { 5, 3}, {13, 3}, { 5,11}, {13,11}, +{ 5, 5}, {13, 5}, { 5,13}, {13,13}, { 5, 7}, {13, 7}, { 5,15}, {13,15}, +{ 6, 0}, {14, 0}, { 6, 8}, {14, 8}, { 6, 2}, {14, 2}, { 6,10}, {14,10}, +{ 6, 4}, {14, 4}, { 6,12}, {14,12}, { 6, 6}, {14, 6}, { 6,14}, {14,14}, +{ 7, 1}, {15, 1}, { 7, 9}, {15, 9}, { 7, 3}, {15, 3}, { 7,11}, {15,11}, +{ 7, 5}, {15, 5}, { 7,13}, {15,13}, { 7, 7}, {15, 7}, { 7,15}, {15,15}, + +{ 0, 0}, { 8, 0}, { 0, 8}, { 8, 8}, { 4, 4}, {12, 4}, { 4,12}, {12,12}, { 0, 4}, { 8, 4}, { 0,12}, { 8,12}, { 4, 0}, {12, 0}, { 4, 8}, {12, 8}, { 2, 2}, {10, 2}, { 2,10}, {10,10}, { 6, 6}, {14, 6}, { 6,14}, {14,14}, { 2, 6}, {10, 6}, { 2,14}, {10,14}, { 6, 2}, {14, 2}, { 6,10}, {14,10}, { 0, 2}, { 8, 2}, { 0,10}, { 8,10}, { 4, 6}, {12, 6}, { 4,14}, {12,14}, { 0, 6}, { 8, 6}, { 0,14}, { 8,14}, { 4, 2}, {12, 2}, { 4,10}, {12,10}, { 2, 0}, {10, 0}, { 2, 8}, {10, 8}, { 6, 4}, {14, 4}, { 6,12}, {14,12}, { 2, 4}, {10, 4}, { 2,12}, {10,12}, { 6, 0}, {14, 0}, { 6, 8}, {14, 8}, { 1, 1}, { 9, 1}, { 1, 9}, { 9, 9}, { 5, 5}, {13, 5}, { 5,13}, {13,13}, { 1, 5}, { 9, 5}, { 1,13}, { 9,13}, { 5, 1}, {13, 1}, { 5, 9}, {13, 9}, { 3, 3}, {11, 3}, { 3,11}, {11,11}, { 7, 7}, {15, 7}, { 7,15}, {15,15}, { 3, 7}, {11, 7}, { 3,15}, {11,15}, { 7, 3}, {15, 3}, { 7,11}, {15,11}, { 1, 3}, { 9, 3}, { 1,11}, { 9,11}, { 5, 7}, {13, 7}, { 5,15}, {13,15}, { 1, 7}, { 9, 7}, { 1,15}, { 9,15}, { 5, 3}, {13, 3}, { 5,11}, {13,11}, { 3, 1}, {11, 1} +, { 3, 9}, {11, 9}, { 7, 5}, {15, 5}, { 7,13}, {15,13}, { 3, 5}, {11, 5}, { 3,13}, {11,13}, { 7, 1}, {15, 1}, { 7, 9}, {15, 9}, { 0, 1}, { 8, 1}, { 0, 9}, { 8, 9}, { 4, 5}, {12, 5}, { 4,13}, {12,13}, { 0, 5}, { 8, 5}, { 0,13}, { 8,13}, { 4, 1}, {12, 1}, { 4, 9}, {12, 9}, { 2, 3}, {10, 3}, { 2,11}, {10,11}, { 6, 7}, {14, 7}, { 6,15}, {14,15}, { 2, 7}, {10, 7}, { 2,15}, {10,15}, { 6, 3}, {14, 3}, { 6,11}, {14,11}, { 0, 3}, { 8, 3}, { 0,11}, { 8,11}, { 4, 7}, {12, 7}, { 4,15}, {12,15}, { 0, 7}, { 8, 7}, { 0,15}, { 8,15}, { 4, 3}, {12, 3}, { 4,11}, {12,11}, { 2, 1}, {10, 1}, { 2, 9}, {10, 9}, { 6, 5}, {14, 5}, { 6,13}, {14,13}, { 2, 5}, {10, 5}, { 2,13}, {10,13}, { 6, 1}, {14, 1}, { 6, 9}, {14, 9}, { 1, 0}, { 9, 0}, { 1, 8}, { 9, 8}, { 5, 4}, {13, 4}, { 5,12}, {13,12}, { 1, 4}, { 9, 4}, { 1,12}, { 9,12}, { 5, 0}, {13, 0}, { 5, 8}, {13, 8}, { 3, 2}, {11, 2}, { 3,10}, {11,10}, { 7, 6}, {15, 6}, { 7,14}, {15,14}, { 3, 6}, {11, 6}, { 3,14}, {11,14}, { 7, 2}, {15, 2}, { 7,10}, {15,10}, { 1, 2}, { 9, 2}, { 1,10}, { 9, +10}, { 5, 6}, {13, 6}, { 5,14}, {13,14}, { 1, 6}, { 9, 6}, { 1,14}, { 9,14}, { 5, 2}, {13, 2}, { 5,10}, {13,10}, { 3, 0}, {11, 0}, { 3, 8}, {11, 8}, { 7, 4}, {15, 4}, { 7,12}, {15,12}, { 3, 4}, {11, 4}, { 3,12}, {11,12}, { 7, 0}, {15, 0}, { 7, 8}, {15, 8}, +}; + +struct vf_priv_s { + int log2_count; + int qp; + int mode; + int mpeg2; + int temp_stride[3]; + uint8_t *src[3]; + int16_t *temp[3]; + int outbuf_size; + uint8_t *outbuf; + AVCodecContext *avctx_enc[BLOCK*BLOCK]; + AVFrame *frame; + AVFrame *frame_dec; +}; + +static void store_slice_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale){ + int y, x; + +#define STORE(pos) \ + temp= ((src[x + y*src_stride + pos]<<log2_scale) + d[pos])>>8;\ + if(temp & 0x100) temp= ~(temp>>31);\ + dst[x + y*dst_stride + pos]= temp; + + for(y=0; y<height; y++){ + const uint8_t *d= dither[y&7]; + for(x=0; x<width; x+=8){ + int temp; + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + STORE(6); + STORE(7); + } + } +} + +static void filter(struct vf_priv_s *p, uint8_t *dst[3], uint8_t *src[3], int dst_stride[3], int src_stride[3], int width, int height, uint8_t *qp_store, int qp_stride){ + int x, y, i, j; + const int count= 1<<p->log2_count; + + for(i=0; i<3; i++){ + int is_chroma= !!i; + int w= width >>is_chroma; + int h= height>>is_chroma; + int stride= p->temp_stride[i]; + int block= BLOCK>>is_chroma; + + if (!src[i] || !dst[i]) + continue; // HACK avoid crash for Y8 colourspace + for(y=0; y<h; y++){ + int index= block + block*stride + y*stride; + fast_memcpy(p->src[i] + index, src[i] + y*src_stride[i], w); + for(x=0; x<block; x++){ + p->src[i][index - x - 1]= p->src[i][index + x ]; + p->src[i][index + w + x ]= p->src[i][index + w - x - 1]; + } + } + for(y=0; y<block; y++){ + fast_memcpy(p->src[i] + ( block-1-y)*stride, p->src[i] + ( y+block )*stride, stride); + fast_memcpy(p->src[i] + (h+block +y)*stride, p->src[i] + (h-y+block-1)*stride, stride); + } + + p->frame->linesize[i]= stride; + memset(p->temp[i], 0, (h+2*block)*stride*sizeof(int16_t)); + } + + if(p->qp) + p->frame->quality= p->qp * FF_QP2LAMBDA; + else + p->frame->quality= norm_qscale(qp_store[0], p->mpeg2) * FF_QP2LAMBDA; +// init per MB qscale stuff FIXME + + for(i=0; i<count; i++){ + const int x1= offset[i+count-1][0]; + const int y1= offset[i+count-1][1]; + int offset; + p->frame->data[0]= p->src[0] + x1 + y1 * p->frame->linesize[0]; + p->frame->data[1]= p->src[1] + x1/2 + y1/2 * p->frame->linesize[1]; + p->frame->data[2]= p->src[2] + x1/2 + y1/2 * p->frame->linesize[2]; + + avcodec_encode_video(p->avctx_enc[i], p->outbuf, p->outbuf_size, p->frame); + p->frame_dec = p->avctx_enc[i]->coded_frame; + + offset= (BLOCK-x1) + (BLOCK-y1)*p->frame_dec->linesize[0]; + //FIXME optimize + for(y=0; y<height; y++){ + for(x=0; x<width; x++){ + p->temp[0][ x + y*p->temp_stride[0] ] += p->frame_dec->data[0][ x + y*p->frame_dec->linesize[0] + offset ]; + } + } + offset= (BLOCK/2-x1/2) + (BLOCK/2-y1/2)*p->frame_dec->linesize[1]; + for(y=0; y<height/2; y++){ + for(x=0; x<width/2; x++){ + p->temp[1][ x + y*p->temp_stride[1] ] += p->frame_dec->data[1][ x + y*p->frame_dec->linesize[1] + offset ]; + p->temp[2][ x + y*p->temp_stride[2] ] += p->frame_dec->data[2][ x + y*p->frame_dec->linesize[2] + offset ]; + } + } + } + + for(j=0; j<3; j++){ + int is_chroma= !!j; + if (!dst[j]) + continue; // HACK avoid crash for Y8 colourspace + store_slice_c(dst[j], p->temp[j], dst_stride[j], p->temp_stride[j], width>>is_chroma, height>>is_chroma, 8-p->log2_count); + } +} + +static int config(struct vf_instance *vf, + int width, int height, int d_width, int d_height, + unsigned int flags, unsigned int outfmt){ + int i; + AVCodec *enc= avcodec_find_encoder(AV_CODEC_ID_SNOW); + + for(i=0; i<3; i++){ + int is_chroma= !!i; + int w= ((width + 4*BLOCK-1) & (~(2*BLOCK-1)))>>is_chroma; + int h= ((height + 4*BLOCK-1) & (~(2*BLOCK-1)))>>is_chroma; + + vf->priv->temp_stride[i]= w; + vf->priv->temp[i]= malloc(vf->priv->temp_stride[i]*h*sizeof(int16_t)); + vf->priv->src [i]= malloc(vf->priv->temp_stride[i]*h*sizeof(uint8_t)); + } + for(i=0; i< (1<<vf->priv->log2_count); i++){ + AVCodecContext *avctx_enc; + AVDictionary *opts = NULL; + + avctx_enc= + vf->priv->avctx_enc[i]= avcodec_alloc_context3(NULL); + avctx_enc->width = width + BLOCK; + avctx_enc->height = height + BLOCK; + avctx_enc->time_base= (AVRational){1,25}; // meaningless + avctx_enc->gop_size = 300; + avctx_enc->max_b_frames= 0; + avctx_enc->pix_fmt = AV_PIX_FMT_YUV420P; + avctx_enc->flags = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY; + avctx_enc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; + avctx_enc->global_quality= 123; + av_dict_set(&opts, "no_bitstream", "1", 0); + if (avcodec_open2(avctx_enc, enc, &opts) < 0) + return 0; + av_dict_free(&opts); + assert(avctx_enc->codec); + } + vf->priv->frame= av_frame_alloc(); + vf->priv->frame_dec= av_frame_alloc(); + + vf->priv->outbuf_size= (width + BLOCK)*(height + BLOCK)*10; + vf->priv->outbuf= malloc(vf->priv->outbuf_size); + + return ff_vf_next_config(vf,width,height,d_width,d_height,flags,outfmt); +} + +static void get_image(struct vf_instance *vf, mp_image_t *mpi){ + if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change + // ok, we can do pp in-place (or pp disabled): + vf->dmpi=ff_vf_get_image(vf->next,mpi->imgfmt, + mpi->type, mpi->flags | MP_IMGFLAG_READABLE, mpi->width, mpi->height); + mpi->planes[0]=vf->dmpi->planes[0]; + mpi->stride[0]=vf->dmpi->stride[0]; + mpi->width=vf->dmpi->width; + if(mpi->flags&MP_IMGFLAG_PLANAR){ + mpi->planes[1]=vf->dmpi->planes[1]; + mpi->planes[2]=vf->dmpi->planes[2]; + mpi->stride[1]=vf->dmpi->stride[1]; + mpi->stride[2]=vf->dmpi->stride[2]; + } + mpi->flags|=MP_IMGFLAG_DIRECT; +} + +static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){ + mp_image_t *dmpi; + + if(!(mpi->flags&MP_IMGFLAG_DIRECT)){ + // no DR, so get a new image! hope we'll get DR buffer: + dmpi=ff_vf_get_image(vf->next,mpi->imgfmt, + MP_IMGTYPE_TEMP, + MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE, + mpi->width,mpi->height); + ff_vf_clone_mpi_attributes(dmpi, mpi); + }else{ + dmpi=vf->dmpi; + } + + vf->priv->mpeg2= mpi->qscale_type; + if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){ + if(mpi->qscale || vf->priv->qp){ + filter(vf->priv, dmpi->planes, mpi->planes, dmpi->stride, mpi->stride, mpi->w, mpi->h, mpi->qscale, mpi->qstride); + }else{ + memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]); + memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]); + memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]); + } + } + +#if HAVE_MMX + if(ff_gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t"); +#endif +#if HAVE_MMX2 + if(ff_gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t"); +#endif + + return ff_vf_next_put_image(vf,dmpi, pts); +} + +static void uninit(struct vf_instance *vf){ + int i; + if(!vf->priv) return; + + for(i=0; i<3; i++){ + free(vf->priv->temp[i]); + vf->priv->temp[i]= NULL; + free(vf->priv->src[i]); + vf->priv->src[i]= NULL; + } + for(i=0; i<BLOCK*BLOCK; i++){ + av_freep(&vf->priv->avctx_enc[i]); + } + + free(vf->priv); + vf->priv=NULL; +} + +//===========================================================================// +static int query_format(struct vf_instance *vf, unsigned int fmt){ + switch(fmt){ + case IMGFMT_YV12: + case IMGFMT_I420: + case IMGFMT_IYUV: + case IMGFMT_Y800: + case IMGFMT_Y8: + return ff_vf_next_query_format(vf,fmt); + } + return 0; +} + +static int control(struct vf_instance *vf, int request, void* data){ + switch(request){ + case VFCTRL_QUERY_MAX_PP_LEVEL: + return 8; + case VFCTRL_SET_PP_LEVEL: + vf->priv->log2_count= *((unsigned int*)data); + //FIXME we have to realloc a few things here + return CONTROL_TRUE; + } + return ff_vf_next_control(vf,request,data); +} + +static int vf_open(vf_instance_t *vf, char *args){ + + int log2c=-1; + + vf->config=config; + vf->put_image=put_image; + vf->get_image=get_image; + vf->query_format=query_format; + vf->uninit=uninit; + vf->control= control; + vf->priv=malloc(sizeof(struct vf_priv_s)); + memset(vf->priv, 0, sizeof(struct vf_priv_s)); + + ff_init_avcodec(); + + vf->priv->log2_count= 4; + + if (args) sscanf(args, "%d:%d:%d", &log2c, &vf->priv->qp, &vf->priv->mode); + + if( log2c >=0 && log2c <=8 ) + vf->priv->log2_count = log2c; + + if(vf->priv->qp < 0) + vf->priv->qp = 0; + +// #if HAVE_MMX +// if(ff_gCpuCaps.hasMMX){ +// store_slice= store_slice_mmx; +// } +// #endif + + return 1; +} + +const vf_info_t ff_vf_info_uspp = { + "ultra simple/slow postprocess", + "uspp", + "Michael Niedermayer", + "", + vf_open, + NULL +}; diff --git a/libavfilter/libmpcodecs/vfcap.h b/libavfilter/libmpcodecs/vfcap.h new file mode 100644 index 0000000000..611d642869 --- /dev/null +++ b/libavfilter/libmpcodecs/vfcap.h @@ -0,0 +1,56 @@ +/* VFCAP_* values: they are flags, returned by query_format(): + * + * This file is part of MPlayer. + * + * MPlayer is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * MPlayer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with MPlayer; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPLAYER_VFCAP_H +#define MPLAYER_VFCAP_H + +// set, if the given colorspace is supported (with or without conversion) +#define VFCAP_CSP_SUPPORTED 0x1 +// set, if the given colorspace is supported _without_ conversion +#define VFCAP_CSP_SUPPORTED_BY_HW 0x2 +// set if the driver/filter can draw OSD +#define VFCAP_OSD 0x4 +// set if the driver/filter can handle compressed SPU stream +#define VFCAP_SPU 0x8 +// scaling up/down by hardware, or software: +#define VFCAP_HWSCALE_UP 0x10 +#define VFCAP_HWSCALE_DOWN 0x20 +#define VFCAP_SWSCALE 0x40 +// driver/filter can do vertical flip (upside-down) +#define VFCAP_FLIP 0x80 + +// driver/hardware handles timing (blocking) +#define VFCAP_TIMER 0x100 +// driver _always_ flip image upside-down (for ve_vfw) +#define VFCAP_FLIPPED 0x200 +// vf filter: accepts stride (put_image) +// vo driver: has draw_slice() support for the given csp +#define VFCAP_ACCEPT_STRIDE 0x400 +// filter does postprocessing (so you shouldn't scale/filter image before it) +#define VFCAP_POSTPROC 0x800 +// filter cannot be reconfigured to different size & format +#define VFCAP_CONSTANT 0x1000 +// filter can draw EOSD +#define VFCAP_EOSD 0x2000 +// filter will draw EOSD at screen resolution (without scaling) +#define VFCAP_EOSD_UNSCALED 0x4000 +// used by libvo and vf_vo, indicates the VO does not support draw_slice for this format +#define VOCAP_NOSLICES 0x8000 + +#endif /* MPLAYER_VFCAP_H */ |