From 502ab21af0ca68f76d6112722c46d2f35c004053 Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Tue, 18 Jun 2013 21:30:42 +0000 Subject: x86: lpc: simd av_update_lls 4x-6x faster on sandybridge Signed-off-by: Luca Barbato --- libavutil/lls.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'libavutil/lls.h') diff --git a/libavutil/lls.h b/libavutil/lls.h index 81834402f7..27c0d5e3fe 100644 --- a/libavutil/lls.h +++ b/libavutil/lls.h @@ -23,9 +23,12 @@ #ifndef AVUTIL_LLS_H #define AVUTIL_LLS_H +#include "common.h" +#include "mem.h" #include "version.h" #define MAX_VARS 32 +#define MAX_VARS_ALIGN FFALIGN(MAX_VARS+1,4) //FIXME avoid direct access to LLSModel from outside @@ -33,26 +36,29 @@ * Linear least squares model. */ typedef struct LLSModel { - double covariance[MAX_VARS + 1][MAX_VARS + 1]; - double coeff[MAX_VARS][MAX_VARS]; + DECLARE_ALIGNED(32, double, covariance[MAX_VARS_ALIGN][MAX_VARS_ALIGN]); + DECLARE_ALIGNED(32, double, coeff[MAX_VARS][MAX_VARS]); double variance[MAX_VARS]; int indep_count; /** * Take the outer-product of var[] with itself, and add to the covariance matrix. * @param m this context * @param var training samples, starting with the value to be predicted + * 32-byte aligned, and any padding elements must be initialized + * (i.e not denormal/nan). */ void (*update_lls)(struct LLSModel *m, double *var); /** * Inner product of var[] and the LPC coefs. * @param m this context - * @param var training samples, excluding the value to be predicted + * @param var training samples, excluding the value to be predicted. unaligned. * @param order lpc order */ double (*evaluate_lls)(struct LLSModel *m, double *var, int order); } LLSModel; void avpriv_init_lls(LLSModel *m, int indep_count); +void ff_init_lls_x86(LLSModel *m); void avpriv_solve_lls(LLSModel *m, double threshold, unsigned short min_order); #if FF_API_LLS_PRIVATE -- cgit v1.2.3