diff options
author | Anton Khirnov <anton@khirnov.net> | 2016-03-11 20:00:45 +0100 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2016-08-28 10:18:38 +0200 |
commit | 95867280ab72ce6ade22f2cf7fcbc5b47b6cc95d (patch) | |
tree | 60c555a283b8d31d95d6e4583539cb7f4b7705dc | |
parent | a10a89ae75b6215349aebc9257c56d2f79597f6e (diff) |
solve: export aligned coefficients
-rw-r--r-- | init.c | 2 | ||||
-rw-r--r-- | internal.h | 6 | ||||
-rw-r--r-- | solve.c | 28 |
3 files changed, 31 insertions, 5 deletions
@@ -75,7 +75,7 @@ int bd_solve(BDContext *bd) return ret; bd->psi_minus1_coeffs = s->coeffs; - bd->stride = s->nb_coeffs[0]; + bd->stride = s->coeffs_stride; return 0; } @@ -18,12 +18,17 @@ #ifndef BRILL_DATA_INTERNAL_H #define BRILL_DATA_INTERNAL_H +#include <stddef.h> + #include "brill_data.h" #define MAX(x, y) ((x) > (y) ? (x) : (y)) #define SQR(x) ((x) * (x)) #define ARRAY_ELEMS(x) (sizeof(x) / sizeof(x[0])) +#define ALIGN(x, a) (((x)+(a)-1)&~((a)-1)) + +#define REQ_ALIGNMENT(x) (32 / sizeof(x)) /* * small number to avoid r=0 singularities @@ -62,6 +67,7 @@ typedef struct BDPriv { int nb_coeffs[2]; double *coeffs; + ptrdiff_t coeffs_stride; } BDPriv; #define NB_COEFFS(s) (s->nb_coeffs[0] * s->nb_coeffs[1]) @@ -169,6 +169,26 @@ static int brill_solve_svd(const BDContext *bd, double *mat, return 0; } +static int brill_export_coeffs(BDPriv *s, const double *coeffs) +{ + int alignment = REQ_ALIGNMENT(*coeffs); + int nb_coeffs_aligned[2] = { ALIGN(s->nb_coeffs[0], alignment), ALIGN(s->nb_coeffs[0], alignment) }; + + int ret, i; + + ret = posix_memalign((void**)&s->coeffs, REQ_ALIGNMENT(char), nb_coeffs_aligned[0] * nb_coeffs_aligned[1] * sizeof(*s->coeffs)); + if (ret) + return -ret; + + memset(s->coeffs, 0, nb_coeffs_aligned[0] * nb_coeffs_aligned[1] * sizeof(*s->coeffs)); + for (i = 0; i < s->nb_coeffs[1]; i++) + memcpy(s->coeffs + i * nb_coeffs_aligned[0], coeffs + i * s->nb_coeffs[0], sizeof(*coeffs) * s->nb_coeffs[0]); + + s->coeffs_stride = nb_coeffs_aligned[0]; + + return 0; +} + /* * Solve the equation * Δψ + ¼ ψ (∂²q/∂r² + ∂²q/∂z²) = 0 @@ -213,12 +233,12 @@ int bdi_solve(BDContext *bd) ret = brill_solve_linear(bd, mat, &coeffs, &rhs); /* export the result */ - s->coeffs = coeffs; - -fail: + ret = brill_export_coeffs(s, coeffs); if (ret < 0) - free(coeffs); + goto fail; +fail: + free(coeffs); free(rhs); free(mat); |