From 95867280ab72ce6ade22f2cf7fcbc5b47b6cc95d Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 11 Mar 2016 20:00:45 +0100 Subject: solve: export aligned coefficients --- init.c | 2 +- internal.h | 6 ++++++ solve.c | 28 ++++++++++++++++++++++++---- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/init.c b/init.c index 6ea45de..292c555 100644 --- a/init.c +++ b/init.c @@ -75,7 +75,7 @@ int bd_solve(BDContext *bd) return ret; bd->psi_minus1_coeffs = s->coeffs; - bd->stride = s->nb_coeffs[0]; + bd->stride = s->coeffs_stride; return 0; } diff --git a/internal.h b/internal.h index e0cbf60..19e6d7f 100644 --- a/internal.h +++ b/internal.h @@ -18,12 +18,17 @@ #ifndef BRILL_DATA_INTERNAL_H #define BRILL_DATA_INTERNAL_H +#include + #include "brill_data.h" #define MAX(x, y) ((x) > (y) ? (x) : (y)) #define SQR(x) ((x) * (x)) #define ARRAY_ELEMS(x) (sizeof(x) / sizeof(x[0])) +#define ALIGN(x, a) (((x)+(a)-1)&~((a)-1)) + +#define REQ_ALIGNMENT(x) (32 / sizeof(x)) /* * small number to avoid r=0 singularities @@ -62,6 +67,7 @@ typedef struct BDPriv { int nb_coeffs[2]; double *coeffs; + ptrdiff_t coeffs_stride; } BDPriv; #define NB_COEFFS(s) (s->nb_coeffs[0] * s->nb_coeffs[1]) diff --git a/solve.c b/solve.c index b9b26a2..64dcb55 100644 --- a/solve.c +++ b/solve.c @@ -169,6 +169,26 @@ static int brill_solve_svd(const BDContext *bd, double *mat, return 0; } +static int brill_export_coeffs(BDPriv *s, const double *coeffs) +{ + int alignment = REQ_ALIGNMENT(*coeffs); + int nb_coeffs_aligned[2] = { ALIGN(s->nb_coeffs[0], alignment), ALIGN(s->nb_coeffs[0], alignment) }; + + int ret, i; + + ret = posix_memalign((void**)&s->coeffs, REQ_ALIGNMENT(char), nb_coeffs_aligned[0] * nb_coeffs_aligned[1] * sizeof(*s->coeffs)); + if (ret) + return -ret; + + memset(s->coeffs, 0, nb_coeffs_aligned[0] * nb_coeffs_aligned[1] * sizeof(*s->coeffs)); + for (i = 0; i < s->nb_coeffs[1]; i++) + memcpy(s->coeffs + i * nb_coeffs_aligned[0], coeffs + i * s->nb_coeffs[0], sizeof(*coeffs) * s->nb_coeffs[0]); + + s->coeffs_stride = nb_coeffs_aligned[0]; + + return 0; +} + /* * Solve the equation * Δψ + ¼ ψ (∂²q/∂r² + ∂²q/∂z²) = 0 @@ -213,12 +233,12 @@ int bdi_solve(BDContext *bd) ret = brill_solve_linear(bd, mat, &coeffs, &rhs); /* export the result */ - s->coeffs = coeffs; - -fail: + ret = brill_export_coeffs(s, coeffs); if (ret < 0) - free(coeffs); + goto fail; +fail: + free(coeffs); free(rhs); free(mat); -- cgit v1.2.3