aboutsummaryrefslogtreecommitdiff
path: root/init.c
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2018-02-09 15:34:25 +0100
committerAnton Khirnov <anton@khirnov.net>2018-02-09 15:36:14 +0100
commit234e722c5abfb15c9850b4224cabbb3e2d0c3657 (patch)
treefafd3c0afc7d4c8a6f948069d7f4daf2722dea24 /init.c
parent5981560b3b54e1c129a0b14c9b2dec80609f9dcd (diff)
Add the remaining parts for teukolsky waves initial data
Diffstat (limited to 'init.c')
-rw-r--r--init.c438
1 files changed, 438 insertions, 0 deletions
diff --git a/init.c b/init.c
new file mode 100644
index 0000000..4fdece4
--- /dev/null
+++ b/init.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright 2017 Anton Khirnov <anton@khirnov.net>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <float.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <cblas.h>
+
+#if HAVE_OPENCL
+#include <cl.h>
+#include <clBLAS.h>
+#endif
+
+#include "basis.h"
+#include "common.h"
+#include "cpu.h"
+#include "log.h"
+#include "nlsolve.h"
+#include "teukolsky_data.h"
+#include "threadpool.h"
+
+#define NB_EQUATIONS 3
+
+typedef struct TDPriv {
+ unsigned int basis_order[NB_EQUATIONS][2];
+ BasisSetContext *basis[NB_EQUATIONS][2];
+
+ NLSolveContext *solver;
+
+ ThreadPoolContext *tp;
+ TDLogger logger;
+
+ double *coeffs;
+ ptrdiff_t coeffs_stride;
+
+ int cpu_flags;
+
+ double (*scalarproduct_metric)(size_t len1, size_t len2, double *mat,
+ double *vec1, double *vec2);
+} TDPriv;
+
+double tdi_scalarproduct_metric_fma3(size_t len1, size_t len2, double *mat,
+ double *vec1, double *vec2);
+double tdi_scalarproduct_metric_avx(size_t len1, size_t len2, double *mat,
+ double *vec1, double *vec2);
+double tdi_scalarproduct_metric_sse3(size_t len1, size_t len2, double *mat,
+ double *vec1, double *vec2);
+double tdi_scalarproduct_metric_c(size_t len1, size_t len2, double *mat,
+ double *vec1, double *vec2);
+
+static void init_opencl(TDPriv *s)
+#if HAVE_OPENCL
+{
+ int err, count;
+ cl_platform_id platform;
+ cl_context_properties props[3];
+ cl_device_id ocl_device;
+
+ err = clGetPlatformIDs(1, &platform, &count);
+ if (err != CL_SUCCESS || count < 1) {
+ tdi_log(&s->logger, 0, "Could not get an OpenCL platform ID\n");
+ return;
+ }
+
+ err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ocl_device, &count);
+ if (err != CL_SUCCESS || count < 1) {
+ tdi_log(&s->logger, 0, "Could not get an OpenCL device ID\n");
+ return;
+ }
+
+ props[0] = CL_CONTEXT_PLATFORM;
+ props[1] = (cl_context_properties)platform;
+ props[2] = 0;
+
+ s->ocl_ctx = clCreateContext(props, 1, &ocl_device, NULL, NULL, &err);
+ if (err != CL_SUCCESS || !s->ocl_ctx) {
+ tdi_log(&s->logger, 0, "Could not create an OpenCL context\n");
+ return;
+ }
+
+ s->ocl_queue = clCreateCommandQueue(s->ocl_ctx, ocl_device, 0, &err);
+ if (err != CL_SUCCESS || !s->ocl_queue) {
+ tdi_log(&s->logger, 0, "Could not create an OpenCL command queue: %d\n", err);
+ goto fail;
+ }
+
+ err = clblasSetup();
+ if (err != CL_SUCCESS) {
+ tdi_log(&s->logger, 0, "Error setting up clBLAS\n");
+ goto fail;
+ }
+
+ return;
+fail:
+ if (s->ocl_queue)
+ clReleaseCommandQueue(s->ocl_queue);
+ s->ocl_queue = 0;
+
+ if (s->ocl_ctx)
+ clReleaseContext(s->ocl_ctx);
+ s->ocl_ctx = 0;
+}
+#else
+{
+}
+#endif
+
+static const enum BasisFamily basis_sets[NB_EQUATIONS][2] = {
+ { BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN },
+ { BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN },
+ { BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN },
+};
+
+static int teukolsky_init_check_options(TDContext *td)
+{
+ TDPriv *s = td->priv;
+ int ret;
+
+ s->cpu_flags = tdi_init_cpu_flags();
+
+ if (!td->nb_threads) {
+ td->nb_threads = tdi_cpu_count();
+ if (!td->nb_threads)
+ td->nb_threads = 1;
+ }
+
+ ret = tdi_threadpool_init(&s->tp, td->nb_threads);
+ if (ret < 0)
+ return ret;
+
+ init_opencl(s);
+
+ s->logger.log = tdi_log_default_callback;
+
+ //s->scalarproduct_metric = tdi_scalarproduct_metric_c;
+ //if (EXTERNAL_SSE3(s->cpu_flags))
+ // s->scalarproduct_metric = tdi_scalarproduct_metric_sse3;
+ //if (EXTERNAL_AVX(s->cpu_flags))
+ // s->scalarproduct_metric = tdi_scalarproduct_metric_avx;
+ //if (EXTERNAL_FMA3(s->cpu_flags))
+ // s->scalarproduct_metric = tdi_scalarproduct_metric_fma3;
+// for (int i = 0; i < ctx->nb_equations; i++)
+// for (int j = 0; j < 2; j++) {
+// s->ps_ctx->basis[i][j] = ctx->basis[i][j];
+// s->ps_ctx->solve_order[i][j] = basis_order[i][j];
+// max_order = MAX(max_order, basis_order[i][j]);
+// }
+//
+ //for (int i = 0; i < max_order; i++) {
+ // tdi_log(&s->logger, 2, "%d ", i);
+ // for (int j = 0; j < ctx->nb_equations; j++)
+ // for (int k = 0; k < 2; k++) {
+ // if (i < s->ps_ctx->solve_order[j][k])
+ // tdi_log(&s->logger, 2, "%8.8g\t", s->ps_ctx->colloc_grid[j][k][i]);
+ // else
+ // tdi_log(&s->logger, 2, " ");
+ // }
+ // tdi_log(&s->logger, 2, "\n");
+ //}
+
+ s->basis_order[0][0] = td->nb_coeffs[0];
+ s->basis_order[0][1] = td->nb_coeffs[1];
+ s->basis_order[1][0] = td->nb_coeffs[0];
+ s->basis_order[1][1] = td->nb_coeffs[1];
+ s->basis_order[2][0] = td->nb_coeffs[0];
+ s->basis_order[2][1] = td->nb_coeffs[1];
+
+ ret = posix_memalign((void**)&s->coeffs, 32,
+ sizeof(*s->coeffs) * NB_EQUATIONS * td->nb_coeffs[0] * td->nb_coeffs[1]);
+ if (ret)
+ return -ENOMEM;
+ memset(s->coeffs, 0, sizeof(*s->coeffs) * NB_EQUATIONS * td->nb_coeffs[0] * td->nb_coeffs[1]);
+
+ for (int i = 0; i < NB_EQUATIONS; i++)
+ td->coeffs[i] = s->coeffs + i * td->nb_coeffs[0] * td->nb_coeffs[1];
+
+ for (int i = 0; i < ARRAY_ELEMS(basis_sets); i++)
+ for (int j = 0; j < ARRAY_ELEMS(basis_sets[i]); j++) {
+ double sf;
+
+ ret = tdi_basis_init(&s->basis[i][j], basis_sets[i][j], td->basis_scale_factor[j]);
+ if (ret < 0)
+ return ret;
+ }
+
+
+ ret = tdi_nlsolve_context_alloc(&s->solver, ARRAY_ELEMS(basis_sets));
+ if (ret < 0) {
+ tdi_log(&s->logger, 0, "Error allocating the non-linear solver\n");
+ return ret;
+ }
+
+ s->solver->logger = s->logger;
+ s->solver->tp = s->tp;
+ s->solver->maxiter = td->max_iter;
+ s->solver->atol = td->atol;
+
+ memcpy(s->solver->basis, s->basis, sizeof(s->basis));
+ memcpy(s->solver->solve_order, s->basis_order, sizeof(s->basis_order));
+
+#if HAVE_OPENCL
+ s->solver->ocl_ctx = s->ocl_ctx;
+ s->solver->ocl_queue = s->ocl_queue;
+#endif
+
+ ret = tdi_nlsolve_context_init(s->solver);
+ if (ret < 0) {
+ tdi_log(&s->logger, 0, "Error initializing the non-linear solver\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int teukolsky_constraint_eval(void *opaque, unsigned int eq_idx,
+ const unsigned int *colloc_grid_order,
+ const double * const *colloc_grid,
+ const double * const (*vars)[PSSOLVE_DIFF_ORDER_NB],
+ double *dst)
+{
+ const double *amplitude = opaque;
+
+ return tdi_constraints_eval(eq_idx, *amplitude, colloc_grid_order, colloc_grid,
+ vars, dst);
+}
+
+#define AMPLITUDE_DIRECT_SOLVE 1e-3
+
+int td_solve(TDContext *td, double *coeffs_init[3])
+{
+ TDPriv *s = td->priv;
+ double cur_amplitude = td->amplitude;//MIN(td->amplitude, AMPLITUDE_DIRECT_SOLVE);
+ int ret;
+
+ ret = teukolsky_init_check_options(td);
+ if (ret < 0)
+ return ret;
+
+ if (coeffs_init) {
+ for (int i = 0; i < 3; i++) {
+ memcpy(td->coeffs[i], coeffs_init[i], sizeof(*td->coeffs[i]) *
+ td->nb_coeffs[0] * td->nb_coeffs[1]);
+ }
+ }
+
+ while (1) {
+ double scale_factor;
+
+ ret = tdi_nlsolve_solve(s->solver, teukolsky_constraint_eval, NULL,
+ &cur_amplitude, s->coeffs);
+ if (ret < 0)
+ return ret;
+
+ if (fabs(cur_amplitude - td->amplitude) < DBL_EPSILON)
+ goto finish;
+
+ scale_factor = cur_amplitude;
+ cur_amplitude = MIN(td->amplitude, 2 * cur_amplitude);
+ scale_factor = cur_amplitude / scale_factor;
+
+ cblas_dscal(td->nb_coeffs[0] * td->nb_coeffs[1], SQR(scale_factor), td->coeffs[0], 1);
+ cblas_dscal(td->nb_coeffs[0] * td->nb_coeffs[1], scale_factor, td->coeffs[1], 1);
+ cblas_dscal(td->nb_coeffs[0] * td->nb_coeffs[1], scale_factor, td->coeffs[2], 1);
+
+ tdi_log(&s->logger, 2, "Trying amplitude %g %g\n", cur_amplitude, scale_factor);
+ }
+
+finish:
+ tdi_solver_print_stats(s->solver);
+
+ return 0;
+}
+
+TDContext *td_context_alloc(void)
+{
+ TDContext *td = calloc(1, sizeof(*td));
+
+ if (!td)
+ return NULL;
+
+ td->nb_threads = 1;
+
+ td->amplitude = 1.0;
+
+ td->nb_coeffs[0] = 32;
+ td->nb_coeffs[1] = 16;
+
+ td->basis_scale_factor[0] = 3.0;
+ td->basis_scale_factor[1] = 3.0;
+
+ td->max_iter = 16;
+ td->atol = 1e-12;
+
+ td->log_callback = tdi_log_default_callback;
+
+ td->priv = calloc(1, sizeof(TDPriv));
+ if (!td->priv) {
+ free(td);
+ return NULL;
+ }
+
+ return td;
+}
+
+void td_context_free(TDContext **ptd)
+{
+ TDContext *td = *ptd;
+ TDPriv *s;
+
+ if (!td)
+ return;
+
+ s = td->priv;
+
+ tdi_nlsolve_context_free(&s->solver);
+ tdi_threadpool_free(&s->tp);
+
+#if HAVE_OPENCL
+ if (s->ocl_queue)
+ clReleaseCommandQueue(s->ocl_queue);
+ if (s->ocl_ctx)
+ clReleaseContext(s->ocl_ctx);
+#endif
+
+ for (int i = 0; i < ARRAY_ELEMS(s->basis); i++)
+ for (int j = 0; j < ARRAY_ELEMS(s->basis[i]); j++)
+ tdi_basis_free(&s->basis[i][j]);
+
+ free(s->coeffs);
+
+ free(td->priv);
+ free(td);
+ *ptd = NULL;
+}
+
+static double scalarproduct_metric_c(size_t len1, size_t len2, double *mat,
+ double *vec1, double *vec2)
+{
+ double val = 0.0;
+ for (int m = 0; m < len2; m++) {
+ double tmp = 0.0;
+ for (int n = 0; n < len1; n++)
+ tmp += mat[m * len1 + n] * vec1[n];
+ val += tmp * vec2[m];
+ }
+ return val;
+}
+
+static int eval_var(const TDContext *td, unsigned int var_idx,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2],
+ double *out)
+{
+ TDPriv *s = td->priv;
+ double *basis_val[2] = { NULL };
+ int ret = 0;
+
+ if (diff_order[0] > 0 || diff_order[1] > 0) {
+ tdi_log(&s->logger, 0, "Derivatives of higher order than 2 are not supported.\n");
+ return -ENOSYS;
+ }
+
+ posix_memalign((void**)&basis_val[0], 32, sizeof(*basis_val[0]) * td->nb_coeffs[0]);
+ posix_memalign((void**)&basis_val[1], 32, sizeof(*basis_val[1]) * td->nb_coeffs[1]);
+ if (!basis_val[0] || !basis_val[1]) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ memset(basis_val[0], 0, sizeof(*basis_val[0]) * td->nb_coeffs[0]);
+ memset(basis_val[1], 0, sizeof(*basis_val[1]) * td->nb_coeffs[1]);
+
+ for (int i = 0; i < nb_coords; i++) {
+ double theta_val = theta[i];
+ double r_val = r[i];
+
+ double val = (var_idx == 0) ? 1.0 : 0.0;
+
+ for (int k = 0; k < td->nb_coeffs[0]; k++)
+ basis_val[0][k] = tdi_basis_eval(s->basis[var_idx][0], BS_EVAL_TYPE_VALUE, r_val, k);
+ for (int k = 0; k < td->nb_coeffs[1]; k++)
+ basis_val[1][k] = tdi_basis_eval(s->basis[var_idx][1], BS_EVAL_TYPE_VALUE, theta_val, k);
+
+ val += scalarproduct_metric_c(td->nb_coeffs[0], td->nb_coeffs[1], td->coeffs[var_idx],
+ basis_val[0], basis_val[1]);
+
+ out[i] = val;
+ }
+
+
+fail:
+ free(basis_val[0]);
+ free(basis_val[1]);
+
+ return ret;
+}
+
+int td_eval_psi(const TDContext *td,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2],
+ double *out)
+{
+ return eval_var(td, 0, nb_coords, r, theta, diff_order, out);
+}
+
+int td_eval_krr(const TDContext *td,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2],
+ double *out)
+{
+ return eval_var(td, 1, nb_coords, r, theta, diff_order, out);
+}
+int td_eval_kpp(const TDContext *td,
+ size_t nb_coords, const double *r, const double *theta,
+ const unsigned int diff_order[2],
+ double *out)
+{
+ return eval_var(td, 2, nb_coords, r, theta, diff_order, out);
+}