summaryrefslogtreecommitdiff
path: root/libavcodec/x86/snowdsp_mmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/snowdsp_mmx.c')
-rw-r--r--libavcodec/x86/snowdsp_mmx.c37
1 files changed, 31 insertions, 6 deletions
diff --git a/libavcodec/x86/snowdsp_mmx.c b/libavcodec/x86/snowdsp_mmx.c
index 1e1d2e22d8..263f0bbf69 100644
--- a/libavcodec/x86/snowdsp_mmx.c
+++ b/libavcodec/x86/snowdsp_mmx.c
@@ -22,9 +22,10 @@
#include "libavutil/x86_cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/snow.h"
+#include "libavcodec/dwt.h"
#include "dsputil_mmx.h"
-void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
+static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
const int w2= (width+1)>>1;
DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1];
const int w_l= (width>>1);
@@ -213,7 +214,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
}
}
-void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
+static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
const int w2= (width+1)>>1;
IDWTELEM temp[width >> 1];
const int w_l= (width>>1);
@@ -436,7 +437,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
"movdqa %%"s2", %%"t2" \n\t"\
"movdqa %%"s3", %%"t3" \n\t"
-void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
+static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
x86_reg i = width;
while(i & 0x1F)
@@ -534,7 +535,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
"movq %%"s3", %%"t3" \n\t"
-void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
+static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
x86_reg i = width;
while(i & 15)
{
@@ -847,7 +848,7 @@ snow_inner_add_yblock_mmx_mix("16", "8")
snow_inner_add_yblock_mmx_end("32")
}
-void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
if (b_w == 16)
@@ -861,7 +862,7 @@ void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, u
ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
}
-void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
if (b_w == 16)
inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
@@ -870,3 +871,27 @@ void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui
else
ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
}
+
+void ff_dwt_init_x86(DWTContext *c)
+{
+ mm_flags = mm_support();
+
+ if (mm_flags & FF_MM_MMX) {
+ if(mm_flags & FF_MM_SSE2 & 0){
+ c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
+#if HAVE_7REGS
+ c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
+#endif
+ c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
+ }
+ else{
+ if(mm_flags & FF_MM_MMX2){
+ c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
+#if HAVE_7REGS
+ c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
+#endif
+ }
+ c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
+ }
+ }
+}