/* * Newton-Kantorovich iterative solver for nonlinear PDE systems * Copyright (C) 2017 Anton Khirnov * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "config.h" #include #include #include #include #include #include #include #include #include #include #include "basis.h" #include "common.h" #include "log.h" #include "pssolve.h" #include "nlsolve.h" #define NB_COEFFS(td) (td->nb_coeffs[0] * td->nb_coeffs[1]) #define NB_COLLOC_POINTS(td) (td->nb_colloc_points[0] * td->nb_colloc_points[1]) #define SOLVE_ORDER(ctx, idx) (ctx->solve_order[idx][0] * ctx->solve_order[idx][1]) #define VAR_EPS 1e-4 /* per-equation state */ typedef struct NLEquationContext { /* eq_coeffs[i][j] is an array of coefficients at the collocation points * for j-th derivative of i-th unknown function */ double *(*eq_coeffs)[PSSOLVE_DIFF_ORDER_NB]; const double *(*var_ptrs)[PSSOLVE_DIFF_ORDER_NB]; const double *(*var_mod_ptrs)[PSSOLVE_DIFF_ORDER_NB]; double *rhs; } NLEquationContext; /* per-variable state */ typedef struct NLVarContext { ptrdiff_t coeffs_offset; double *vars[PSSOLVE_DIFF_ORDER_NB]; double *val_tmp[3]; double *eps; double *transform_matrix[2][3]; double *transform_tmp; } NLVarContext; struct NLSolvePriv { PSSolveContext *ps_ctx; TDLogger logger; double amplitude; unsigned int nb_equations; unsigned int nb_vars; unsigned int solve_order; NLEquationContext *eqs; NLVarContext *vars; PSEqCoeffs *eq_coeffs; double *delta; double *rhs; TPContext *tp; TPContext *tp_internal; uint64_t solve_count; uint64_t solve_time; uint64_t calc_eq_coeffs_count; uint64_t calc_eq_coeffs_time; uint64_t var_eval_count; uint64_t var_eval_time; }; int tdi_nlsolve_solve(NLSolveContext *ctx, NLEqCallback eq_eval, NLEqJacobianCallback eq_jac_eval, void *opaque, double *coeffs, int fast_abort) { NLSolvePriv *s = ctx->priv; int64_t start, totaltime_start; int ret = 0; totaltime_start = gettime(); for (unsigned int it = 0; it < ctx->maxiter; it++) { size_t max_idx; start = gettime(); // evaluate the unknowns and their derivatives on the pseudospectral grid for (int i = 0; i < s->nb_vars; i++) { NLVarContext *var_ctx = &s->vars[i]; for (int j = 0; j < PSSOLVE_DIFF_ORDER_NB; j++) { int diff0 = tdi_pssolve_diff_order(j, 0); int diff1 = tdi_pssolve_diff_order(j, 1); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, ctx->solve_order[i][0], ctx->solve_order[i][1], ctx->solve_order[i][1], 1.0, coeffs + var_ctx->coeffs_offset, ctx->solve_order[i][0], var_ctx->transform_matrix[1][diff1], ctx->solve_order[i][1], 0.0, var_ctx->transform_tmp, ctx->solve_order[i][0]); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, ctx->solve_order[i][0], ctx->solve_order[i][1], ctx->solve_order[i][0], 1.0, var_ctx->transform_matrix[0][diff0], ctx->solve_order[i][0], var_ctx->transform_tmp, ctx->solve_order[i][0], 0.0, var_ctx->vars[j], ctx->solve_order[i][0]); } } s->var_eval_count++; s->var_eval_time += gettime() - start; start = gettime(); // build equation coeffs for (int i = 0; i < s->nb_equations; i++) { NLEquationContext *eq_ctx = &s->eqs[i]; const int N = SOLVE_ORDER(ctx, i); for (int j = 0; j < s->nb_vars; j++) for (int k = 0; k < ARRAY_ELEMS(eq_ctx->var_ptrs[j]); k++) eq_ctx->var_ptrs[j][k] = s->vars[j].vars[k]; ret = eq_eval(opaque, i, s->ps_ctx->solve_order[i], s->ps_ctx->colloc_grid[i], eq_ctx->var_ptrs, eq_ctx->rhs); if (ret < 0) return ret; cblas_dscal(N, -1.0, eq_ctx->rhs, 1); for (int j = 0; j < s->nb_vars; j++) { NLVarContext *var_ctx = &s->vars[j]; for (int k = 0; k < PSSOLVE_DIFF_ORDER_NB; k++) { double *dst = eq_ctx->eq_coeffs[j][k]; if (eq_jac_eval) { ret = eq_jac_eval(opaque, i, j, k, s->ps_ctx->solve_order[i], s->ps_ctx->colloc_grid[i], eq_ctx->var_ptrs, dst); if (ret < 0) return ret; continue; } // jacobian not available -- try to approximate with // finite differences memcpy(eq_ctx->var_mod_ptrs, eq_ctx->var_ptrs, s->nb_vars * sizeof(*eq_ctx->var_ptrs)); memcpy(var_ctx->val_tmp[0], eq_ctx->var_ptrs[j][k], N * sizeof(*var_ctx->eps)); cblas_daxpy(N, 1.0, var_ctx->eps, 1, var_ctx->val_tmp[0], 1); eq_ctx->var_mod_ptrs[j][k] = var_ctx->val_tmp[0]; ret = eq_eval(opaque, i, s->ps_ctx->solve_order[i], s->ps_ctx->colloc_grid[i], eq_ctx->var_mod_ptrs, dst); if (ret < 0) return ret; cblas_daxpy(N, -2.0, var_ctx->eps, 1, var_ctx->val_tmp[0], 1); ret = eq_eval(opaque, i, s->ps_ctx->solve_order[i], s->ps_ctx->colloc_grid[i], eq_ctx->var_mod_ptrs, var_ctx->val_tmp[1]); if (ret < 0) return ret; cblas_daxpy(N, -1.0, var_ctx->val_tmp[1], 1, dst, 1); cblas_dscal(N, 1.0 / (2.0 * VAR_EPS), dst, 1); } } } s->calc_eq_coeffs_count++; s->calc_eq_coeffs_time += gettime() - start; // solve for delta ret = tdi_pssolve_solve(s->ps_ctx, s->eq_coeffs, s->rhs, s->delta); if (ret < 0) return ret; max_idx = cblas_idamax(s->solve_order, s->delta, 1); if (fabs(s->delta[max_idx]) < ctx->atol) { tdi_log(&ctx->logger, 2, "Converged on iteration %d: max(delta) %g, tolerance %g\n", it, s->delta[max_idx], ctx->atol); ret = 0; goto finish; } else if ((fast_abort && fabs(s->delta[max_idx]) > 1e6) || fabs(s->delta[max_idx]) > 1e12) { tdi_log(&ctx->logger, 2, "max(delta) %g, aborting\n", s->delta[max_idx]); ret = -EDOM; goto finish; } cblas_daxpy(s->solve_order, 1.0, s->delta, 1, coeffs, 1); tdi_log(&ctx->logger, 3, "Iteration %d, max(delta) %g ", it, s->delta[max_idx]); for (int i = 0; i < s->nb_vars; i++) tdi_log(&ctx->logger, 3, "coeffs%d[0] %g ", i, coeffs[s->vars[i].coeffs_offset]); tdi_log(&ctx->logger, 3, "\n"); } tdi_log(&ctx->logger, 0, "The solver did not converge\n"); ret = -EDOM; finish: s->solve_count++; s->solve_time += gettime() - totaltime_start; return ret; } void tdi_nlsolve_print_stats(NLSolveContext *ctx) { NLSolvePriv *s = ctx->priv; tdi_log(&ctx->logger, 2, "%g%% variables evaluation: %lu, " "total time %g s, avg time per call %g ms\n", (double)s->var_eval_time * 100 / s->solve_time, s->var_eval_count, (double)s->var_eval_time / 1e6, (double)s->var_eval_time / s->var_eval_count / 1e3); tdi_log(&ctx->logger, 2, "%g%% calc equation coefficients: %lu, " "total time %g s, avg time per call %g ms\n", (double)s->calc_eq_coeffs_time * 100 / s->solve_time, s->calc_eq_coeffs_count, (double)s->calc_eq_coeffs_time / 1e6, (double)s->calc_eq_coeffs_time / s->calc_eq_coeffs_count / 1e3); tdi_log(&ctx->logger, 2, "%g%% pseudospectral matrix construction: %lu, " "total time %g s, avg time per call %g ms\n", (double)s->ps_ctx->construct_matrix_time * 100 / s->solve_time, s->ps_ctx->construct_matrix_count, (double)s->ps_ctx->construct_matrix_time / 1e6, (double)s->ps_ctx->construct_matrix_time / s->ps_ctx->construct_matrix_count / 1e3); tdi_log(&ctx->logger, 2, "%g%% BiCGSTAB %lu solves, " "%lu iterations, total time %g s, " "avg iterations per solve %g, avg time per solve %g ms, " "avg time per iteration %g ms\n", (double)s->ps_ctx->cg_time_total * 100 / s->solve_time, s->ps_ctx->cg_solve_count, s->ps_ctx->cg_iter_count, (double)s->ps_ctx->cg_time_total / 1e6, (double)s->ps_ctx->cg_iter_count / s->ps_ctx->cg_solve_count, (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_solve_count / 1e3, (double)s->ps_ctx->cg_time_total / s->ps_ctx->cg_iter_count / 1e3); tdi_log(&ctx->logger, 2, "%g%% LU %lu solves, total time %g s, avg time per solve %g ms\n", (double)s->ps_ctx->lu_solves_time * 100 / s->solve_time, s->ps_ctx->lu_solves_count, (double)s->ps_ctx->lu_solves_time / 1e6, (double)s->ps_ctx->lu_solves_time / s->ps_ctx->lu_solves_count / 1e3); } static void eq_ctx_free(NLSolveContext *ctx, unsigned int eq_idx) { NLSolvePriv *s = ctx->priv; NLEquationContext *eq_ctx = &s->eqs[eq_idx]; int i, j; if (eq_ctx->eq_coeffs) { for (int i = 0; i < s->nb_vars; i++) for (int j = 0; j < ARRAY_ELEMS(eq_ctx->eq_coeffs[i]); j++) free(eq_ctx->eq_coeffs[i][j]); } free(eq_ctx->eq_coeffs); free(eq_ctx->var_ptrs); free(eq_ctx->var_mod_ptrs); } static int eq_ctx_init(NLSolveContext *ctx, unsigned int eq_idx) { NLSolvePriv *s = ctx->priv; NLEquationContext *eq_ctx = &s->eqs[eq_idx]; int ret; /* allocate the equation coefficients */ eq_ctx->eq_coeffs = calloc(s->nb_vars, sizeof(*eq_ctx->eq_coeffs)); if (!eq_ctx->eq_coeffs) return -ENOMEM; for (int i = 0; i < s->nb_vars; i++) for (int j = 0; j < ARRAY_ELEMS(eq_ctx->eq_coeffs[i]); j++) { ret = posix_memalign((void**)&eq_ctx->eq_coeffs[i][j], 32, SOLVE_ORDER(ctx, i) * sizeof(*eq_ctx->eq_coeffs[i][j])); if (ret) return -ENOMEM; } eq_ctx->var_ptrs = calloc(s->nb_vars, sizeof(*eq_ctx->var_ptrs)); eq_ctx->var_mod_ptrs = calloc(s->nb_vars, sizeof(*eq_ctx->var_mod_ptrs)); if (!eq_ctx->var_ptrs || !eq_ctx->var_mod_ptrs) return -ENOMEM; /* setup the RHS pointer */ if (eq_idx == 0) eq_ctx->rhs = s->rhs; else eq_ctx->rhs = s->eqs[eq_idx - 1].rhs + SOLVE_ORDER(ctx, eq_idx - 1); return 0; } static void var_ctx_free(NLSolveContext *ctx, unsigned int var_idx) { NLSolvePriv *s = ctx->priv; NLVarContext *var_ctx = &s->vars[var_idx]; for (int i = 0; i < ARRAY_ELEMS(var_ctx->vars); i++) free(var_ctx->vars[i]); for (int i = 0; i < ARRAY_ELEMS(var_ctx->transform_matrix); i++) for (int j = 0; j < ARRAY_ELEMS(var_ctx->transform_matrix[i]); j++) free(var_ctx->transform_matrix[i][j]); for (int i = 0; i < ARRAY_ELEMS(var_ctx->val_tmp); i++) free(var_ctx->val_tmp[i]); free(var_ctx->transform_tmp); free(var_ctx->eps); } static int var_ctx_init(NLSolveContext *ctx, unsigned int var_idx) { NLSolvePriv *s = ctx->priv; NLVarContext *var_ctx = &s->vars[var_idx]; int ret; for (int i = 0; i < ARRAY_ELEMS(var_ctx->vars); i++) { ret = posix_memalign((void**)&var_ctx->vars[i], 32, SOLVE_ORDER(ctx, var_idx) * sizeof(*var_ctx->vars[i])); if (ret) return -ENOMEM; } for (int i = 0; i < ARRAY_ELEMS(var_ctx->transform_matrix); i++) for (int j = 0; j < ARRAY_ELEMS(var_ctx->transform_matrix[i]); j++) { ret = posix_memalign((void**)&var_ctx->transform_matrix[i][j], 32, SQR(ctx->solve_order[var_idx][i]) * sizeof(*var_ctx->transform_matrix[i][j])); if (ret) return -ENOMEM; for (int k = 0; k < ctx->solve_order[var_idx][i]; k++) for (int l = 0; l < ctx->solve_order[var_idx][i]; l++) { int idx = (i == 0) ? ctx->solve_order[var_idx][i] * l + k : ctx->solve_order[var_idx][i] * k + l; var_ctx->transform_matrix[i][j][idx] = tdi_basis_eval(ctx->basis[var_idx][i], j, s->ps_ctx->colloc_grid[var_idx][i][k], l); } } for (int i = 0; i < ARRAY_ELEMS(var_ctx->val_tmp); i++) { ret = posix_memalign((void**)&var_ctx->val_tmp[i], 32, SOLVE_ORDER(ctx, var_idx) * sizeof(*var_ctx->val_tmp[i])); if (ret) return -ENOMEM; } ret = posix_memalign((void**)&var_ctx->transform_tmp, 32, SOLVE_ORDER(ctx, var_idx) * sizeof(*var_ctx->transform_tmp)); if (ret) return -ENOMEM; ret = posix_memalign((void**)&var_ctx->eps, 32, SOLVE_ORDER(ctx, var_idx) * sizeof(*var_ctx->eps)); if (ret) return -ENOMEM; for (int i = 0; i < SOLVE_ORDER(ctx, var_idx); i++) var_ctx->eps[i] = VAR_EPS; /* setup the coeffs offset */ if (var_idx > 0) var_ctx->coeffs_offset = s->vars[var_idx - 1].coeffs_offset + SOLVE_ORDER(ctx, var_idx - 1); return 0; } int tdi_nlsolve_context_alloc(NLSolveContext **pctx, unsigned int nb_equations) { NLSolveContext *ctx; NLSolvePriv *s; int ret; ctx = calloc(1, sizeof(*ctx)); if (!ctx) return -ENOMEM; ctx->priv = calloc(1, sizeof(*ctx->priv)); if (!ctx->priv) { ret = -ENOMEM; goto fail; } s = ctx->priv; ctx->nb_equations = nb_equations; s->nb_equations = nb_equations; s->nb_vars = nb_equations; s->eq_coeffs = calloc(s->nb_equations, sizeof(*s->eq_coeffs)); s->eqs = calloc(s->nb_equations, sizeof(*s->eqs)); s->vars = calloc(s->nb_vars, sizeof(*s->vars)); if (!s->eq_coeffs || !s->eqs || !s->vars) { ret = -ENOMEM; goto fail; } ctx->basis = calloc(s->nb_vars, sizeof(*ctx->basis)); if (!ctx->basis) { ret = -ENOMEM; goto fail; } ctx->solve_order = calloc(s->nb_vars, sizeof(*ctx->solve_order)); if (!ctx->solve_order) { ret = -ENOMEM; goto fail; } *pctx = ctx; return 0; fail: tdi_nlsolve_context_free(&ctx); return ret; } int tdi_nlsolve_context_init(NLSolveContext *ctx) { NLSolvePriv *s = ctx->priv; unsigned int solve_order = 0; int ret; if (ctx->tp) { s->tp = ctx->tp; } else { ret = tp_init(&s->tp_internal, 1); if (ret < 0) return ret; s->tp = s->tp_internal; } for (int i = 0; i < s->nb_vars; i++) { if (ctx->solve_order[i][0] >= UINT_MAX / ctx->solve_order[i][1] || solve_order >= UINT_MAX - ctx->solve_order[i][0] * ctx->solve_order[i][1]) { return -EDOM; } solve_order += ctx->solve_order[i][0] * ctx->solve_order[i][1]; } s->solve_order = solve_order; ret = posix_memalign((void**)&s->rhs, 32, sizeof(*s->rhs) * solve_order); ret |= posix_memalign((void**)&s->delta, 32, sizeof(*s->delta) * solve_order); if (ret) { return -ENOMEM; } ret = tdi_pssolve_context_alloc(&s->ps_ctx, s->nb_equations); if (ret < 0) { tdi_log(&ctx->logger, 0, "Error allocating the pseudospectral solver"); return ret; } s->ps_ctx->logger = ctx->logger; s->ps_ctx->tp = s->tp; memcpy(s->ps_ctx->basis, ctx->basis, s->nb_vars * sizeof(*ctx->basis)); memcpy(s->ps_ctx->solve_order, ctx->solve_order, s->nb_vars * sizeof(*ctx->solve_order)); ret = tdi_pssolve_context_init(s->ps_ctx); if (ret < 0) { tdi_log(&ctx->logger, 0, "Error initializing the pseudospectral solver\n"); return ret; } ctx->colloc_grid = s->ps_ctx->colloc_grid; /* init the per-equation state */ for (int i = 0; i < s->nb_equations; i++) { ret = eq_ctx_init(ctx, i); if (ret < 0) return ret; s->eq_coeffs[i].func_coeffs = (const double * const (*)[PSSOLVE_DIFF_ORDER_NB])s->eqs[i].eq_coeffs; } /* init the per-variable state */ for (int i = 0; i < s->nb_vars; i++) { ret = var_ctx_init(ctx, i); if (ret < 0) return ret; } return 0; } void tdi_nlsolve_context_free(NLSolveContext **pctx) { NLSolveContext *ctx = *pctx; if (!ctx) return; if (ctx->priv) { free(ctx->priv->rhs); free(ctx->priv->delta); if (ctx->priv->eqs) for (int i = 0; i < ctx->nb_equations; i++) eq_ctx_free(ctx, i); free(ctx->priv->eqs); if (ctx->priv->vars) for (int i = 0; i < ctx->nb_equations; i++) var_ctx_free(ctx, i); free(ctx->priv->vars); free(ctx->priv->eq_coeffs); tdi_pssolve_context_free(&ctx->priv->ps_ctx); tp_free(&ctx->priv->tp_internal); } free(ctx->priv); free(ctx->basis); free(ctx->solve_order); free(ctx); *pctx = NULL; }