summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/dsputil.c5
-rw-r--r--libavcodec/dsputil.h9
-rw-r--r--libavcodec/i386/mmx.h2
-rw-r--r--libavcodec/snow.c184
-rw-r--r--libavcodec/snow.h123
5 files changed, 190 insertions, 133 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 5a518f4c8c..6bc59db9fa 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -30,6 +30,7 @@
#include "mpegvideo.h"
#include "simple_idct.h"
#include "faandct.h"
+#include "snow.h"
/* snow.c */
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
@@ -4047,6 +4048,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->try_8x8basis= try_8x8basis_c;
c->add_8x8basis= add_8x8basis_c;
+ c->vertical_compose97i = ff_snow_vertical_compose97i;
+ c->horizontal_compose97i = ff_snow_horizontal_compose97i;
+ c->inner_add_yblock = ff_snow_inner_add_yblock;
+
#ifdef HAVE_MMX
dsputil_init_mmx(c, avctx);
#endif
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index e165eede2d..8fe9be7798 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -35,6 +35,7 @@
//#define DEBUG
/* dct code */
typedef short DCTELEM;
+typedef int DWTELEM;
void fdct_ifast (DCTELEM *data);
void fdct_ifast248 (DCTELEM *data);
@@ -133,6 +134,9 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
+// for snow slices
+typedef struct slice_buffer_s slice_buffer;
+
/**
* DSPContext.
*/
@@ -334,6 +338,11 @@ typedef struct DSPContext {
void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+
+ /* snow wavelet */
+ void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
+ void (*horizontal_compose97i)(DWTELEM *b, int width);
+ void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
} DSPContext;
void dsputil_static_init(void);
diff --git a/libavcodec/i386/mmx.h b/libavcodec/i386/mmx.h
index df4620e0ab..df1791823b 100644
--- a/libavcodec/i386/mmx.h
+++ b/libavcodec/i386/mmx.h
@@ -12,6 +12,7 @@
# define REG_d "rdx"
# define REG_D "rdi"
# define REG_S "rsi"
+# define PTR_SIZE "8"
#else
# define REG_a "eax"
# define REG_b "ebx"
@@ -19,6 +20,7 @@
# define REG_d "edx"
# define REG_D "edi"
# define REG_S "esi"
+# define PTR_SIZE "4"
#endif
/*
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index b22d8b8a74..8fdc956c2b 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -19,23 +19,15 @@
#include "avcodec.h"
#include "common.h"
#include "dsputil.h"
+#include "snow.h"
#include "rangecoder.h"
-#define MID_STATE 128
#include "mpegvideo.h"
#undef NDEBUG
#include <assert.h>
-#define MAX_DECOMPOSITIONS 8
-#define MAX_PLANES 4
-#define DWTELEM int
-#define QSHIFT 5
-#define QROOT (1<<QSHIFT)
-#define LOSSLESS_QLOG -128
-#define FRAC_BITS 8
-
static const int8_t quant3[256]={
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -181,8 +173,6 @@ static const int8_t quant13[256]={
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
};
-#define LOG2_OBMC_MAX 6
-#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
#if 0 //64*cubic
static const uint8_t obmc32[1024]={
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -425,17 +415,6 @@ typedef struct Plane{
SubBand band[MAX_DECOMPOSITIONS][4];
}Plane;
-/** Used to minimize the amount of memory used in order to optimize cache performance. **/
-typedef struct {
- DWTELEM * * line; ///< For use by idwt and predict_slices.
- DWTELEM * * data_stack; ///< Used for internal purposes.
- int data_stack_top;
- int line_count;
- int line_width;
- int data_count;
- DWTELEM * base_buffer; ///< Buffer that this structure is caching.
-} slice_buffer;
-
typedef struct SnowContext{
// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
@@ -741,6 +720,7 @@ static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst
}
}
+#ifndef lift5
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
const int mirror_left= !highpass;
const int mirror_right= (width&1) ^ highpass;
@@ -770,7 +750,9 @@ static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds
dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
}
}
+#endif
+#ifndef liftS
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
const int mirror_left= !highpass;
const int mirror_right= (width&1) ^ highpass;
@@ -793,6 +775,7 @@ static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds
dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
}
}
+#endif
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
@@ -1111,76 +1094,6 @@ STOP_TIMER("vertical_decompose53i*")}
}
}
-#define liftS lift
-#define lift5 lift
-#if 1
-#define W_AM 3
-#define W_AO 0
-#define W_AS 1
-
-#undef liftS
-#define W_BM 1
-#define W_BO 8
-#define W_BS 4
-
-#define W_CM 1
-#define W_CO 0
-#define W_CS 0
-
-#define W_DM 3
-#define W_DO 4
-#define W_DS 3
-#elif 0
-#define W_AM 55
-#define W_AO 16
-#define W_AS 5
-
-#define W_BM 3
-#define W_BO 32
-#define W_BS 6
-
-#define W_CM 127
-#define W_CO 64
-#define W_CS 7
-
-#define W_DM 7
-#define W_DO 8
-#define W_DS 4
-#elif 0
-#define W_AM 97
-#define W_AO 32
-#define W_AS 6
-
-#define W_BM 63
-#define W_BO 512
-#define W_BS 10
-
-#define W_CM 13
-#define W_CO 8
-#define W_CS 4
-
-#define W_DM 15
-#define W_DO 16
-#define W_DS 5
-
-#else
-
-#define W_AM 203
-#define W_AO 64
-#define W_AS 7
-
-#define W_BM 217
-#define W_BO 2048
-#define W_BS 12
-
-#define W_CM 113
-#define W_CO 64
-#define W_CS 7
-
-#define W_DM 227
-#define W_DO 128
-#define W_DS 9
-#endif
static void horizontal_decompose97i(DWTELEM *b, int width){
DWTELEM temp[width];
const int w2= (width+1)>>1;
@@ -1410,7 +1323,7 @@ static void spatial_compose53i(DWTELEM *buffer, int width, int height, int strid
}
-static void horizontal_compose97i(DWTELEM *b, int width){
+void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
DWTELEM temp[width];
const int w2= (width+1)>>1;
@@ -1463,7 +1376,7 @@ static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
}
}
-static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
+void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
int i;
for(i=0; i<width; i++){
@@ -1504,7 +1417,7 @@ static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int heig
cs->y = -3;
}
-static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
+static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
int y = cs->y;
DWTELEM *b0= cs->b0;
@@ -1516,7 +1429,7 @@ static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb,
{START_TIMER
if(y>0 && y+4<height){
- vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
+ dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
}else{
if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
@@ -1527,8 +1440,8 @@ if(width>400){
STOP_TIMER("vertical_compose97i")}}
{START_TIMER
- if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
- if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
+ if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
+ if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
if(width>400 && y+0<(unsigned)height){
STOP_TIMER("horizontal_compose97i")}}
@@ -1557,8 +1470,8 @@ if(width>400){
STOP_TIMER("vertical_compose97i")}}
{START_TIMER
- if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
- if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
+ if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
+ if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
if(width>400 && b0 <= b2){
STOP_TIMER("horizontal_compose97i")}}
@@ -1619,7 +1532,7 @@ static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width,
}
}
-static void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
+static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
const int support = type==1 ? 3 : 5;
int level;
if(type==2) return;
@@ -1627,7 +1540,7 @@ static void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * sli
for(level=decomposition_count-1; level>=0; level--){
while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
switch(type){
- case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
+ case 0: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
break;
case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
break;
@@ -2545,6 +2458,40 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp,
}
}
+void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+ int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+ int y, x;
+ DWTELEM * dst;
+ for(y=0; y<b_h; y++){
+ //FIXME ugly missue of obmc_stride
+ uint8_t *obmc1= obmc + y*obmc_stride;
+ uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+ uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+ uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+ dst = slice_buffer_get_line(sb, src_y + y);
+ for(x=0; x<b_w; x++){
+ int v= obmc1[x] * block[3][x + y*src_stride]
+ +obmc2[x] * block[2][x + y*src_stride]
+ +obmc3[x] * block[1][x + y*src_stride]
+ +obmc4[x] * block[0][x + y*src_stride];
+
+ v <<= 8 - LOG2_OBMC_MAX;
+ if(FRAC_BITS != 8){
+ v += 1<<(7 - FRAC_BITS);
+ v >>= 8 - FRAC_BITS;
+ }
+ if(add){
+ v += dst[x + src_x];
+ v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
+ if(v&(~255)) v= ~(v>>31);
+ dst8[x + y*src_stride] = v;
+ }else{
+ dst[x + src_x] -= v;
+ }
+ }
+ }
+}
+
//FIXME name clenup (b_w, block_w, b_width stuff)
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
DWTELEM * dst = NULL;
@@ -2669,36 +2616,7 @@ assert(src_stride > 2*MB_SIZE + 5);
START_TIMER
- for(y=0; y<b_h; y++){
- //FIXME ugly missue of obmc_stride
- uint8_t *obmc1= obmc + y*obmc_stride;
- uint8_t *obmc2= obmc1+ (obmc_stride>>1);
- uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
- uint8_t *obmc4= obmc3+ (obmc_stride>>1);
- dst = slice_buffer_get_line(sb, src_y + y);
- for(x=0; x<b_w; x++){
- int v= obmc1[x] * block[3][x + y*src_stride]
- +obmc2[x] * block[2][x + y*src_stride]
- +obmc3[x] * block[1][x + y*src_stride]
- +obmc4[x] * block[0][x + y*src_stride];
-
- v <<= 8 - LOG2_OBMC_MAX;
- if(FRAC_BITS != 8){
- v += 1<<(7 - FRAC_BITS);
- v >>= 8 - FRAC_BITS;
- }
- if(add){
-// v += old_dst[x + y*dst_stride];
- v += dst[x + src_x];
- v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
- if(v&(~255)) v= ~(v>>31);
- dst8[x + y*src_stride] = v;
- }else{
-// old_dst[x + y*dst_stride] -= v;
- dst[x + src_x] -= v;
- }
- }
- }
+ s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
STOP_TIMER("Inner add y block")
}
#endif
@@ -4399,7 +4317,7 @@ if(s->avctx->debug&2048){
{ START_TIMER
for(; yd<slice_h; yd+=4){
- ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
+ ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
}
STOP_TIMER("idwt slice");}
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
new file mode 100644
index 0000000000..11fd15a295
--- /dev/null
+++ b/libavcodec/snow.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _SNOW_H
+#define _SNOW_H
+
+#include "dsputil.h"
+
+#define MID_STATE 128
+
+#define MAX_DECOMPOSITIONS 8
+#define MAX_PLANES 4
+#define QSHIFT 5
+#define QROOT (1<<QSHIFT)
+#define LOSSLESS_QLOG -128
+#define FRAC_BITS 8
+
+#define LOG2_OBMC_MAX 6
+#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
+
+/** Used to minimize the amount of memory used in order to optimize cache performance. **/
+struct slice_buffer_s {
+ DWTELEM * * line; ///< For use by idwt and predict_slices.
+ DWTELEM * * data_stack; ///< Used for internal purposes.
+ int data_stack_top;
+ int line_count;
+ int line_width;
+ int data_count;
+ DWTELEM * base_buffer; ///< Buffer that this structure is caching.
+};
+
+#define liftS lift
+#define lift5 lift
+#if 1
+#define W_AM 3
+#define W_AO 0
+#define W_AS 1
+
+#undef liftS
+#define W_BM 1
+#define W_BO 8
+#define W_BS 4
+
+#define W_CM 1
+#define W_CO 0
+#define W_CS 0
+
+#define W_DM 3
+#define W_DO 4
+#define W_DS 3
+#elif 0
+#define W_AM 55
+#define W_AO 16
+#define W_AS 5
+
+#define W_BM 3
+#define W_BO 32
+#define W_BS 6
+
+#define W_CM 127
+#define W_CO 64
+#define W_CS 7
+
+#define W_DM 7
+#define W_DO 8
+#define W_DS 4
+#elif 0
+#define W_AM 97
+#define W_AO 32
+#define W_AS 6
+
+#define W_BM 63
+#define W_BO 512
+#define W_BS 10
+
+#define W_CM 13
+#define W_CO 8
+#define W_CS 4
+
+#define W_DM 15
+#define W_DO 16
+#define W_DS 5
+
+#else
+
+#define W_AM 203
+#define W_AO 64
+#define W_AS 7
+
+#define W_BM 217
+#define W_BO 2048
+#define W_BS 12
+
+#define W_CM 113
+#define W_CO 64
+#define W_CS 7
+
+#define W_DM 227
+#define W_DO 128
+#define W_DS 9
+#endif
+
+extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
+extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width);
+extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+
+#endif