summaryrefslogtreecommitdiff
path: root/libswscale/ppc/swscale_ppc_template.c
diff options
context:
space:
mode:
authorLauri Kasanen <cand@gmx.com>2019-01-13 10:26:20 +0200
committerLauri Kasanen <cand@gmx.com>2019-02-05 09:34:53 +0200
commit8522d219ce805ce69ff302f259e6f083fdb4887c (patch)
tree676d1b3db053d630f59b8dd3cac32ccf41dd5bcd /libswscale/ppc/swscale_ppc_template.c
parentfc6022e1088df068b72159e8836c59643795e39e (diff)
libswscale/ppc: VSX-optimize 9-16 bit yuv2planeX
./ffmpeg_g -f rawvideo -pix_fmt rgb24 -s hd1080 -i /dev/zero -pix_fmt yuv420p16be \ -s 1920x1728 -f null -vframes 100 -v error -nostats - 9-14 bit funcs get about 6x speedup, 16-bit gets about 15x. Fate passes, each format tested with an image to video conversion. Only POWER8 includes 32-bit vector multiplies, so POWER7 is locked out of the 16-bit function. This includes the vec_mulo/mule functions too, not just vmuluwm. With TIMER_REPORT skips disabled: yuv420p9le 12412 UNITS in planarX, 131072 runs, 0 skips 73136 UNITS in planarX, 131072 runs, 0 skips yuv420p9be 12481 UNITS in planarX, 131072 runs, 0 skips 73410 UNITS in planarX, 131072 runs, 0 skips yuv420p10le 12322 UNITS in planarX, 131072 runs, 0 skips 72546 UNITS in planarX, 131072 runs, 0 skips yuv420p10be 12291 UNITS in planarX, 131072 runs, 0 skips 72935 UNITS in planarX, 131072 runs, 0 skips yuv420p12le 12316 UNITS in planarX, 131072 runs, 0 skips 72708 UNITS in planarX, 131072 runs, 0 skips yuv420p12be 12319 UNITS in planarX, 131072 runs, 0 skips 72577 UNITS in planarX, 131072 runs, 0 skips yuv420p14le 12259 UNITS in planarX, 131072 runs, 0 skips 72516 UNITS in planarX, 131072 runs, 0 skips yuv420p14be 12440 UNITS in planarX, 131072 runs, 0 skips 72962 UNITS in planarX, 131072 runs, 0 skips yuv420p16le 10548 UNITS in planarX, 131072 runs, 0 skips 73429 UNITS in planarX, 131072 runs, 0 skips yuv420p16be 10634 UNITS in planarX, 131072 runs, 0 skips 150959 UNITS in planarX, 131072 runs, 0 skips Signed-off-by: Lauri Kasanen <cand@gmx.com>
Diffstat (limited to 'libswscale/ppc/swscale_ppc_template.c')
-rw-r--r--libswscale/ppc/swscale_ppc_template.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/libswscale/ppc/swscale_ppc_template.c b/libswscale/ppc/swscale_ppc_template.c
index 00e4b99e00..11decab0b9 100644
--- a/libswscale/ppc/swscale_ppc_template.c
+++ b/libswscale/ppc/swscale_ppc_template.c
@@ -21,7 +21,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-static void FUNC(yuv2planeX_16)(const int16_t *filter, int filterSize,
+static void FUNC(yuv2planeX_8_16)(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest,
const uint8_t *dither, int offset, int x)
{
@@ -88,7 +88,7 @@ static void FUNC(yuv2planeX)(const int16_t *filter, int filterSize,
yuv2planeX_u(filter, filterSize, src, dest, dst_u, dither, offset, 0);
for (i = dst_u; i < dstW - 15; i += 16)
- FUNC(yuv2planeX_16)(filter, filterSize, src, dest + i, dither,
+ FUNC(yuv2planeX_8_16)(filter, filterSize, src, dest + i, dither,
offset, i);
yuv2planeX_u(filter, filterSize, src, dest, dstW, dither, offset, i);