aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2016-03-11 20:00:45 +0100
committerAnton Khirnov <anton@khirnov.net>2016-08-28 10:18:38 +0200
commit95867280ab72ce6ade22f2cf7fcbc5b47b6cc95d (patch)
tree60c555a283b8d31d95d6e4583539cb7f4b7705dc
parenta10a89ae75b6215349aebc9257c56d2f79597f6e (diff)
solve: export aligned coefficients
-rw-r--r--init.c2
-rw-r--r--internal.h6
-rw-r--r--solve.c28
3 files changed, 31 insertions, 5 deletions
diff --git a/init.c b/init.c
index 6ea45de..292c555 100644
--- a/init.c
+++ b/init.c
@@ -75,7 +75,7 @@ int bd_solve(BDContext *bd)
return ret;
bd->psi_minus1_coeffs = s->coeffs;
- bd->stride = s->nb_coeffs[0];
+ bd->stride = s->coeffs_stride;
return 0;
}
diff --git a/internal.h b/internal.h
index e0cbf60..19e6d7f 100644
--- a/internal.h
+++ b/internal.h
@@ -18,12 +18,17 @@
#ifndef BRILL_DATA_INTERNAL_H
#define BRILL_DATA_INTERNAL_H
+#include <stddef.h>
+
#include "brill_data.h"
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define SQR(x) ((x) * (x))
#define ARRAY_ELEMS(x) (sizeof(x) / sizeof(x[0]))
+#define ALIGN(x, a) (((x)+(a)-1)&~((a)-1))
+
+#define REQ_ALIGNMENT(x) (32 / sizeof(x))
/*
* small number to avoid r=0 singularities
@@ -62,6 +67,7 @@ typedef struct BDPriv {
int nb_coeffs[2];
double *coeffs;
+ ptrdiff_t coeffs_stride;
} BDPriv;
#define NB_COEFFS(s) (s->nb_coeffs[0] * s->nb_coeffs[1])
diff --git a/solve.c b/solve.c
index b9b26a2..64dcb55 100644
--- a/solve.c
+++ b/solve.c
@@ -169,6 +169,26 @@ static int brill_solve_svd(const BDContext *bd, double *mat,
return 0;
}
+static int brill_export_coeffs(BDPriv *s, const double *coeffs)
+{
+ int alignment = REQ_ALIGNMENT(*coeffs);
+ int nb_coeffs_aligned[2] = { ALIGN(s->nb_coeffs[0], alignment), ALIGN(s->nb_coeffs[0], alignment) };
+
+ int ret, i;
+
+ ret = posix_memalign((void**)&s->coeffs, REQ_ALIGNMENT(char), nb_coeffs_aligned[0] * nb_coeffs_aligned[1] * sizeof(*s->coeffs));
+ if (ret)
+ return -ret;
+
+ memset(s->coeffs, 0, nb_coeffs_aligned[0] * nb_coeffs_aligned[1] * sizeof(*s->coeffs));
+ for (i = 0; i < s->nb_coeffs[1]; i++)
+ memcpy(s->coeffs + i * nb_coeffs_aligned[0], coeffs + i * s->nb_coeffs[0], sizeof(*coeffs) * s->nb_coeffs[0]);
+
+ s->coeffs_stride = nb_coeffs_aligned[0];
+
+ return 0;
+}
+
/*
* Solve the equation
* Δψ + ¼ ψ (∂²q/∂r² + ∂²q/∂z²) = 0
@@ -213,12 +233,12 @@ int bdi_solve(BDContext *bd)
ret = brill_solve_linear(bd, mat, &coeffs, &rhs);
/* export the result */
- s->coeffs = coeffs;
-
-fail:
+ ret = brill_export_coeffs(s, coeffs);
if (ret < 0)
- free(coeffs);
+ goto fail;
+fail:
+ free(coeffs);
free(rhs);
free(mat);