From 56b797850884656ae28eef80560563ccf42db007 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Thu, 16 Nov 2017 13:11:07 +0100 Subject: Add the generic non-linear solver layer. --- nlsolve.c | 529 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 529 insertions(+) create mode 100644 nlsolve.c (limited to 'nlsolve.c') diff --git a/nlsolve.c b/nlsolve.c new file mode 100644 index 0000000..5d691e0 --- /dev/null +++ b/nlsolve.c @@ -0,0 +1,529 @@ +/* + * Newton-Kantorovich iterative solver for nonlinear PDE systems + * Copyright (C) 2017 Anton Khirnov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#if HAVE_OPENCL +#include +#include +#endif + +#include "basis.h" +#include "common.h" +#include "log.h" +#include "pssolve.h" +#include "nlsolve.h" +#include "threadpool.h" + +#define NB_COEFFS(td) (td->nb_coeffs[0] * td->nb_coeffs[1]) +#define NB_COLLOC_POINTS(td) (td->nb_colloc_points[0] * td->nb_colloc_points[1]) +#define SOLVE_ORDER(ctx, idx) (ctx->solve_order[idx][0] * ctx->solve_order[idx][1]) +#define VAR_EPS 1e-4 + +/* per-equation state */ +typedef struct NLEquationContext { + /* eq_coeffs[i][j] is an array of coefficients at the collocation points + * for j-th derivative of i-th unknown function */ + double *(*eq_coeffs)[PSSOLVE_DIFF_ORDER_NB]; + const double *(*var_ptrs)[PSSOLVE_DIFF_ORDER_NB]; + const double *(*var_mod_ptrs)[PSSOLVE_DIFF_ORDER_NB]; + + double *rhs; + +} NLEquationContext; + +/* per-variable state */ +typedef struct NLVarContext { + ptrdiff_t coeffs_offset; + double *vars[PSSOLVE_DIFF_ORDER_NB]; + double *val_tmp[3]; + double *eps; + + double *transform_matrix[2][3]; + double *transform_tmp; +} NLVarContext; + +struct NLSolvePriv { + PSSolveContext *ps_ctx; + + TDLogger logger; + + double amplitude; + + unsigned int nb_equations; + unsigned int nb_vars; + unsigned int solve_order; + + NLEquationContext *eqs; + NLVarContext *vars; + + const double *(**eq_coeffs)[PSSOLVE_DIFF_ORDER_NB]; + double *delta; + double *rhs; + + ThreadPoolContext *tp; + ThreadPoolContext *tp_internal; + + uint64_t solve_count; + uint64_t solve_time; + + uint64_t calc_eq_coeffs_count; + uint64_t calc_eq_coeffs_time; +}; + +int tdi_nlsolve_solve(NLSolveContext *ctx, NLEqCallback eq_eval, + NLEqJacobianCallback eq_jac_eval, void *opaque, double *coeffs) +{ + NLSolvePriv *s = ctx->priv; + int64_t start, totaltime_start; + int ret = 0; + + totaltime_start = gettime(); + + for (unsigned int it = 0; it < ctx->maxiter; it++) { + size_t max_idx; + + start = gettime(); + + // evaluate the unknowns and their derivatives on the pseudospectral grid + for (int i = 0; i < s->nb_vars; i++) { + NLVarContext *var_ctx = &s->vars[i]; + + for (int j = 0; j < PSSOLVE_DIFF_ORDER_NB; j++) { + int diff0 = tdi_pssolve_diff_order(j, 0); + int diff1 = tdi_pssolve_diff_order(j, 1); + + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + ctx->solve_order[i][0], ctx->solve_order[i][1], ctx->solve_order[i][1], 1.0, + coeffs + var_ctx->coeffs_offset, ctx->solve_order[i][0], + var_ctx->transform_matrix[1][diff1], ctx->solve_order[i][1], + 0.0, var_ctx->transform_tmp, ctx->solve_order[i][0]); + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + ctx->solve_order[i][0], ctx->solve_order[i][1], ctx->solve_order[i][0], 1.0, + var_ctx->transform_matrix[0][diff0], ctx->solve_order[i][0], + var_ctx->transform_tmp, ctx->solve_order[i][0], 0.0, var_ctx->vars[j], ctx->solve_order[i][0]); + } + } + + // build equation coeffs + for (int i = 0; i < s->nb_equations; i++) { + NLEquationContext *eq_ctx = &s->eqs[i]; + const int N = SOLVE_ORDER(ctx, i); + + for (int j = 0; j < s->nb_vars; j++) + for (int k = 0; k < ARRAY_ELEMS(eq_ctx->var_ptrs[j]); k++) + eq_ctx->var_ptrs[j][k] = s->vars[j].vars[k]; + + ret = eq_eval(opaque, i, s->ps_ctx->solve_order[i], + s->ps_ctx->colloc_grid[i], eq_ctx->var_ptrs, + eq_ctx->rhs); + if (ret < 0) + return ret; + cblas_dscal(N, -1.0, eq_ctx->rhs, 1); + + for (int j = 0; j < s->nb_vars; j++) { + NLVarContext *var_ctx = &s->vars[j]; + + for (int k = 0; k < PSSOLVE_DIFF_ORDER_NB; k++) { + double *dst = eq_ctx->eq_coeffs[j][k]; + + if (eq_jac_eval) { + ret = eq_jac_eval(opaque, i, j, k, s->ps_ctx->solve_order[i], + s->ps_ctx->colloc_grid[i], eq_ctx->var_ptrs, dst); + if (ret < 0) + return ret; + + continue; + } + + // jacobian not available -- try to approximate with + // finite differences + memcpy(eq_ctx->var_mod_ptrs, eq_ctx->var_ptrs, s->nb_vars * sizeof(*eq_ctx->var_ptrs)); + memcpy(var_ctx->val_tmp[0], eq_ctx->var_ptrs[j][k], N * sizeof(*var_ctx->eps)); + + cblas_daxpy(N, 1.0, var_ctx->eps, 1, var_ctx->val_tmp[0], 1); + eq_ctx->var_mod_ptrs[j][k] = var_ctx->val_tmp[0]; + + ret = eq_eval(opaque, i, + s->ps_ctx->solve_order[i], s->ps_ctx->colloc_grid[i], + eq_ctx->var_mod_ptrs, dst); + if (ret < 0) + return ret; + + cblas_daxpy(N, -2.0, var_ctx->eps, 1, var_ctx->val_tmp[0], 1); + ret = eq_eval(opaque, i, + s->ps_ctx->solve_order[i], s->ps_ctx->colloc_grid[i], + eq_ctx->var_mod_ptrs, var_ctx->val_tmp[1]); + if (ret < 0) + return ret; + + cblas_daxpy(N, -1.0, var_ctx->val_tmp[1], 1, dst, 1); + cblas_dscal(N, 1.0 / (2.0 * VAR_EPS), dst, 1); + } + } + } + + // solve for delta + ret = tdi_pssolve_solve(s->ps_ctx, s->eq_coeffs, s->rhs, s->delta); + if (ret < 0) + return ret; + + cblas_daxpy(s->solve_order, 1.0, s->delta, 1, coeffs, 1); + max_idx = cblas_idamax(s->solve_order, s->delta, 1); + if (fabs(s->delta[max_idx]) < ctx->atol) { + tdi_log(&ctx->logger, 2, "Converged on iteration %d: max(delta) %g, tolerance %g\n", + it, s->delta[max_idx], ctx->atol); + ret = 0; + goto finish; + } + + tdi_log(&ctx->logger, 3, "Iteration %d, max(delta) %g coeffs[0] %g\n", + it, s->delta[max_idx], coeffs[0]); + } + + tdi_log(&ctx->logger, 0, "The solver did not converge\n"); + ret = -EDOM; + +finish: + s->solve_count++; + s->solve_time += gettime() - totaltime_start; + + return ret; +} + +void tdi_solver_print_stats(NLSolveContext *ctx) +{ + NLSolvePriv *s = ctx->priv; + + tdi_log(&ctx->logger, 2, + "%g%% calc equation coefficients: %lu, " + "total time %g s, avg time per call %g ms\n", + (double)s->calc_eq_coeffs_time * 100 / s->solve_time, + s->calc_eq_coeffs_count, (double)s->calc_eq_coeffs_time / 1e6, + (double)s->calc_eq_coeffs_time / s->calc_eq_coeffs_count / 1e3); + tdi_log(&ctx->logger, 2, + "%g%% pseudospectral matrix construction: %lu, " + "total time %g s, avg time per call %g ms\n", + (double)s->ps_ctx->construct_matrix_time * 100 / s->solve_time, + s->ps_ctx->construct_matrix_count, (double)s->ps_ctx->construct_matrix_time / 1e6, + (double)s->ps_ctx->construct_matrix_time / s->ps_ctx->construct_matrix_count / 1e3); + tdi_log(&ctx->logger, 2, + "%g%% BiCGSTAB %lu solves, " + "%lu iterations, total time %g s, " + "avg iterations per solve %g, avg time per solve %g ms, " + "avg time per iteration %g ms\n", + (double)s->ps_ctx->cg_time_total * 100 / s->solve_time, + s->ps_ctx->cg_solve_count, s->ps_ctx->cg_iter_count, (double)s->ps_ctx->cg_time_total / 1e6, + (double)s->ps_ctx->cg_iter_count / s->ps_ctx->cg_solve_count, + (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_solve_count / 1e3, + (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_iter_count / 1e3); + tdi_log(&ctx->logger, 2, + "%g%% LU %lu solves, total time %g s, avg time per solve %g ms\n", + (double)s->ps_ctx->lu_solves_time * 100 / s->solve_time, + s->ps_ctx->lu_solves_count, (double)s->ps_ctx->lu_solves_time / 1e6, + (double)s->ps_ctx->lu_solves_time / s->ps_ctx->lu_solves_count / 1e3); +} + +static void eq_ctx_free(NLSolveContext *ctx, unsigned int eq_idx) +{ + NLSolvePriv *s = ctx->priv; + NLEquationContext *eq_ctx = &s->eqs[eq_idx]; + int i, j; + + if (eq_ctx->eq_coeffs) { + for (int i = 0; i < s->nb_vars; i++) + for (int j = 0; j < ARRAY_ELEMS(eq_ctx->eq_coeffs[i]); j++) + free(eq_ctx->eq_coeffs[i][j]); + } + free(eq_ctx->eq_coeffs); + free(eq_ctx->var_ptrs); + free(eq_ctx->var_mod_ptrs); +} + +static int eq_ctx_init(NLSolveContext *ctx, unsigned int eq_idx) +{ + NLSolvePriv *s = ctx->priv; + NLEquationContext *eq_ctx = &s->eqs[eq_idx]; + int ret; + + /* allocate the equation coefficients */ + eq_ctx->eq_coeffs = calloc(s->nb_vars, sizeof(*eq_ctx->eq_coeffs)); + if (!eq_ctx->eq_coeffs) + return -ENOMEM; + for (int i = 0; i < s->nb_vars; i++) + for (int j = 0; j < ARRAY_ELEMS(eq_ctx->eq_coeffs[i]); j++) { + ret = posix_memalign((void**)&eq_ctx->eq_coeffs[i][j], 32, + SOLVE_ORDER(ctx, i) * sizeof(*eq_ctx->eq_coeffs[i][j])); + if (ret) + return -ENOMEM; + } + + eq_ctx->var_ptrs = calloc(s->nb_vars, sizeof(*eq_ctx->var_ptrs)); + eq_ctx->var_mod_ptrs = calloc(s->nb_vars, sizeof(*eq_ctx->var_mod_ptrs)); + if (!eq_ctx->var_ptrs || !eq_ctx->var_mod_ptrs) + return -ENOMEM; + + /* setup the RHS pointer */ + if (eq_idx == 0) + eq_ctx->rhs = s->rhs; + else + eq_ctx->rhs = s->eqs[eq_idx - 1].rhs + SOLVE_ORDER(ctx, eq_idx - 1); + + return 0; +} + +static void var_ctx_free(NLSolveContext *ctx, unsigned int var_idx) +{ + NLSolvePriv *s = ctx->priv; + NLVarContext *var_ctx = &s->vars[var_idx]; + + for (int i = 0; i < ARRAY_ELEMS(var_ctx->vars); i++) + free(var_ctx->vars[i]); + + for (int i = 0; i < ARRAY_ELEMS(var_ctx->transform_matrix); i++) + for (int j = 0; j < ARRAY_ELEMS(var_ctx->transform_matrix[i]); j++) + free(var_ctx->transform_matrix[i][j]); + + for (int i = 0; i < ARRAY_ELEMS(var_ctx->val_tmp); i++) + free(var_ctx->val_tmp[i]); + + free(var_ctx->transform_tmp); + free(var_ctx->eps); +} + +static int var_ctx_init(NLSolveContext *ctx, unsigned int var_idx) +{ + NLSolvePriv *s = ctx->priv; + NLVarContext *var_ctx = &s->vars[var_idx]; + int ret; + + for (int i = 0; i < ARRAY_ELEMS(var_ctx->vars); i++) { + ret = posix_memalign((void**)&var_ctx->vars[i], 32, + SOLVE_ORDER(ctx, var_idx) * sizeof(*var_ctx->vars[i])); + if (ret) + return -ENOMEM; + } + + for (int i = 0; i < ARRAY_ELEMS(var_ctx->transform_matrix); i++) + for (int j = 0; j < ARRAY_ELEMS(var_ctx->transform_matrix[i]); j++) { + ret = posix_memalign((void**)&var_ctx->transform_matrix[i][j], 32, + SQR(ctx->solve_order[var_idx][i]) * sizeof(*var_ctx->transform_matrix[i][j])); + if (ret) + return -ENOMEM; + + for (int k = 0; k < ctx->solve_order[var_idx][i]; k++) + for (int l = 0; l < ctx->solve_order[var_idx][i]; l++) { + int idx = (i == 0) ? ctx->solve_order[var_idx][i] * l + k : + ctx->solve_order[var_idx][i] * k + l; + var_ctx->transform_matrix[i][j][idx] = tdi_basis_eval(ctx->basis[var_idx][i], j, + s->ps_ctx->colloc_grid[var_idx][i][k], l); + } + } + + for (int i = 0; i < ARRAY_ELEMS(var_ctx->val_tmp); i++) { + ret = posix_memalign((void**)&var_ctx->val_tmp[i], 32, + SOLVE_ORDER(ctx, var_idx) * sizeof(*var_ctx->val_tmp[i])); + if (ret) + return -ENOMEM; + } + + ret = posix_memalign((void**)&var_ctx->transform_tmp, 32, + SOLVE_ORDER(ctx, var_idx) * sizeof(*var_ctx->transform_tmp)); + if (ret) + return -ENOMEM; + + ret = posix_memalign((void**)&var_ctx->eps, 32, + SOLVE_ORDER(ctx, var_idx) * sizeof(*var_ctx->eps)); + if (ret) + return -ENOMEM; + for (int i = 0; i < SOLVE_ORDER(ctx, var_idx); i++) + var_ctx->eps[i] = VAR_EPS; + + /* setup the coeffs offset */ + if (var_idx > 0) + var_ctx->coeffs_offset = s->vars[var_idx - 1].coeffs_offset + SOLVE_ORDER(ctx, var_idx - 1); + + return 0; +} + +int tdi_nlsolve_context_alloc(NLSolveContext **pctx, unsigned int nb_equations) +{ + NLSolveContext *ctx; + NLSolvePriv *s; + int ret; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return -ENOMEM; + + ctx->priv = calloc(1, sizeof(*ctx->priv)); + if (!ctx->priv) { + ret = -ENOMEM; + goto fail; + } + s = ctx->priv; + + ctx->nb_equations = nb_equations; + s->nb_equations = nb_equations; + s->nb_vars = nb_equations; + + s->eq_coeffs = calloc(s->nb_equations, sizeof(*s->eq_coeffs)); + s->eqs = calloc(s->nb_equations, sizeof(*s->eqs)); + s->vars = calloc(s->nb_vars, sizeof(*s->vars)); + if (!s->eq_coeffs || !s->eqs || !s->vars) { + ret = -ENOMEM; + goto fail; + } + + ctx->basis = calloc(s->nb_vars, sizeof(*ctx->basis)); + if (!ctx->basis) { + ret = -ENOMEM; + goto fail; + } + + ctx->solve_order = calloc(s->nb_vars, sizeof(*ctx->solve_order)); + if (!ctx->solve_order) { + ret = -ENOMEM; + goto fail; + } + + *pctx = ctx; + return 0; +fail: + tdi_nlsolve_context_free(&ctx); + return ret; +} + +int tdi_nlsolve_context_init(NLSolveContext *ctx) +{ + NLSolvePriv *s = ctx->priv; + unsigned int solve_order = 0; + int ret; + + if (ctx->tp) { + s->tp = ctx->tp; + } else { + ret = tdi_threadpool_init(&s->tp_internal, 1); + if (ret < 0) + return ret; + s->tp = s->tp_internal; + } + + for (int i = 0; i < s->nb_vars; i++) { + if (ctx->solve_order[i][0] >= UINT_MAX / ctx->solve_order[i][1] || + solve_order >= UINT_MAX - ctx->solve_order[i][0] * ctx->solve_order[i][1]) { + return -EDOM; + } + solve_order += ctx->solve_order[i][0] * ctx->solve_order[i][1]; + } + s->solve_order = solve_order; + + ret = posix_memalign((void**)&s->rhs, 32, sizeof(*s->rhs) * solve_order); + ret |= posix_memalign((void**)&s->delta, 32, sizeof(*s->delta) * solve_order); + if (ret) { + return -ENOMEM; + } + + ret = tdi_pssolve_context_alloc(&s->ps_ctx, s->nb_equations); + if (ret < 0) { + tdi_log(&ctx->logger, 0, "Error allocating the pseudospectral solver"); + return ret; + } + + s->ps_ctx->logger = ctx->logger; + s->ps_ctx->tp = s->tp; + +#if HAVE_OPENCL + s->ps_ctx->ocl_ctx = ctx->ocl_ctx; + s->ps_ctx->ocl_queue = ctx->ocl_queue; +#endif + + memcpy(s->ps_ctx->basis, ctx->basis, s->nb_vars * sizeof(*ctx->basis)); + memcpy(s->ps_ctx->solve_order, ctx->solve_order, s->nb_vars * sizeof(*ctx->solve_order)); + + ret = tdi_pssolve_context_init(s->ps_ctx); + if (ret < 0) { + tdi_log(&ctx->logger, 0, "Error initializing the pseudospectral solver\n"); + return ret; + } + + /* init the per-equation state */ + for (int i = 0; i < s->nb_equations; i++) { + ret = eq_ctx_init(ctx, i); + if (ret < 0) + return ret; + + s->eq_coeffs[i] = s->eqs[i].eq_coeffs; + } + + /* init the per-variable state */ + for (int i = 0; i < s->nb_vars; i++) { + ret = var_ctx_init(ctx, i); + if (ret < 0) + return ret; + } + + return 0; +} + +void tdi_nlsolve_context_free(NLSolveContext **pctx) +{ + NLSolveContext *ctx = *pctx; + + if (!ctx) + return; + + if (ctx->priv) { + free(ctx->priv->rhs); + free(ctx->priv->delta); + + if (ctx->priv->eqs) + for (int i = 0; i < ctx->nb_equations; i++) + eq_ctx_free(ctx, i); + free(ctx->priv->eqs); + + if (ctx->priv->vars) + for (int i = 0; i < ctx->nb_equations; i++) + var_ctx_free(ctx, i); + free(ctx->priv->vars); + + free(ctx->priv->eq_coeffs); + + tdi_pssolve_context_free(&ctx->priv->ps_ctx); + + tdi_threadpool_free(&ctx->priv->tp_internal); + } + + free(ctx->priv); + + free(ctx->basis); + free(ctx->solve_order); + + free(ctx); + *pctx = NULL; +} -- cgit v1.2.3