#include #include #include #include #include #include #include #include "cctk.h" #include "cctk_Arguments.h" #include "cctk_Parameters.h" #include "util_ErrorCodes.h" #include "util_Table.h" #include "carpet.hh" #include "TATelliptic.h" namespace Carpet { // TODO: fix this void Restrict (const cGH* cgh); }; namespace CarpetCG { using namespace std; using namespace Carpet; extern "C" { int CarpetCG_register (); } int CarpetCG_solve (const cGH * const cctkGH, const int * const var, const int * const res, const int nvars, const int options_table, const calcfunc calcres, const calcfunc applybnds, void * const userdata); namespace common { const int * var; const int * res; int nvars; int options_table; calcfunc calcres; calcfunc applybnds; void * userdata; int ierr; vector nboundaryzones(2*dim); CCTK_REAL factor; vector factors; vector fromindex; // Can't pass things through CallLocalFunction. vector toindex; // Instead assume everything is going from a GF to a GF. CCTK_REAL realconstant; // With only one required constant CCTK_REAL realoutput; // And one output CCTK_REAL realoutput_count; // And one _more_ output void call_calcres (cGH * const cctkGH) { if (ierr) return; ierr = calcres (cctkGH, options_table, userdata); } void call_applybnds (cGH * const cctkGH) { if (ierr) return; ierr = applybnds (cctkGH, options_table, userdata); } void global_sum (cGH const * restrict const cctkGH, CCTK_REAL * restrict const var) { static int initialised = 0; static int sum_handle; CCTK_REAL local, global; int ierr; if (! initialised) { initialised = 1; sum_handle = CCTK_ReductionArrayHandle ("sum"); assert (sum_handle >= 0); } local = *var; ierr = CCTK_ReduceLocScalar (cctkGH, -1, sum_handle, &local, &global, CCTK_VARIABLE_REAL); assert (!ierr); *var = global; } // Copy void call_copy (cGH * const cctkGH) { cGroup groupdata; ierr = CCTK_GroupData (CCTK_GroupIndexFromVarI(var[0]), &groupdata); assert (!ierr); cGroupDynamicData groupdyndata; ierr = CCTK_GroupDynamicData (cctkGH, CCTK_GroupIndexFromVarI(var[0]), &groupdyndata); assert (!ierr); const int thedim = groupdata.dim; assert (thedim>=0 && thedim<=dim); assert (thedim == groupdyndata.dim); int lsh[dim], nghostzones[dim]; for (int d=0; d=0); assert (nghostzones[d] >= 0 && 2*nghostzones[d] <= lsh[d]); } int const lsize = lsh[0] * lsh[1] * lsh[2]; int n; double chksum=0; double chksum2=0; for (n=0; n=0 && thedim<=dim); assert (thedim == groupdyndata.dim); int lsh[dim], nghostzones[dim], bbox[2*dim]; int lbnd[dim], ubnd[dim]; for (int d=0; d=0); assert (nghostzones[d] >= 0 && 2*nghostzones[d] <= lsh[d]); } for (int d=0; d=0) ? nboundaryzones[2*d] : nghostzones[d]) : nghostzones[d]); ubnd[d] = lsh[d] - (bbox[2*d+1] ? ( (nboundaryzones[2*d+1]>=0) ? nboundaryzones[2*d+1] : nghostzones[d]) : nghostzones[d]); } int const lsize = lsh[0] * lsh[1] * lsh[2]; int n; int i, j, k; int ind; for (n=0; n=0 && thedim<=dim); assert (thedim == groupdyndata.dim); int lsh[dim], nghostzones[dim], bbox[2*dim]; int lbnd[dim], ubnd[dim]; for (int d=0; d=0); assert (nghostzones[d] >= 0 && 2*nghostzones[d] <= lsh[d]); } for (int d=0; d=0) ? nboundaryzones[2*d] : nghostzones[d]) : nghostzones[d]); ubnd[d] = lsh[d] - (bbox[2*d+1] ? ( (nboundaryzones[2*d+1]>=0) ? nboundaryzones[2*d+1] : nghostzones[d]) : nghostzones[d]); } int const lsize = lsh[0] * lsh[1] * lsh[2]; int n; int i, j, k; int ind; for (n=0; n=0 && thedim<=dim); assert (thedim == groupdyndata.dim); int lsh[dim], nghostzones[dim], bbox[2*dim]; int lbnd[dim], ubnd[dim]; for (int d=0; d=0); assert (nghostzones[d] >= 0 && 2*nghostzones[d] <= lsh[d]); } for (int d=0; d=0) ? nboundaryzones[2*d] : nghostzones[d]) : nghostzones[d]); ubnd[d] = lsh[d] - (bbox[2*d+1] ? ( (nboundaryzones[2*d+1]>=0) ? nboundaryzones[2*d+1] : nghostzones[d]) : nghostzones[d]); } int n; int i, j, k; int ind; CCTK_REAL const alpha = realconstant; double chksum=0; double chksum2=0; for (n=0; n=0 && thedim<=dim); assert (thedim == groupdyndata.dim); int lsh[dim], nghostzones[dim], bbox[2*dim]; int lbnd[dim], ubnd[dim]; for (int d=0; d=0); assert (nghostzones[d] >= 0 && 2*nghostzones[d] <= lsh[d]); } for (int d=0; d=0) ? nboundaryzones[2*d] : nghostzones[d]) : nghostzones[d]); ubnd[d] = lsh[d] - (bbox[2*d+1] ? ( (nboundaryzones[2*d+1]>=0) ? nboundaryzones[2*d+1] : nghostzones[d]) : nghostzones[d]); } int n; int i, j, k; int ind; CCTK_REAL const alpha = realconstant; for (n=0; n=0 && thedim<=dim); assert (thedim == groupdyndata.dim); int lsh[dim], nghostzones[dim], bbox[2*dim]; int lbnd[dim], ubnd[dim]; for (int d=0; d=0); assert (nghostzones[d] >= 0 && 2*nghostzones[d] <= lsh[d]); } for (int d=0; d=0) ? nboundaryzones[2*d] : nghostzones[d]) : nghostzones[d]); ubnd[d] = lsh[d] - (bbox[2*d+1] ? ( (nboundaryzones[2*d+1]>=0) ? nboundaryzones[2*d+1] : nghostzones[d]) : nghostzones[d]); } int n; int i, j, k; int ind; CCTK_REAL res; double chksum=0; double chksum2=0; res = 0; realoutput_count=0; // Shift the count to here instead - what a waste of space for (k=0; k 0); assert (var); assert (res); for (n=0; n= 0 && var[n] < CCTK_NumVars()); assert (CCTK_GroupTypeFromVarI(var[n]) == CCTK_GF || CCTK_GroupTypeFromVarI(var[n]) == CCTK_ARRAY); assert (CCTK_GroupDimFromVarI(var[n]) <= dim); assert (CCTK_VarTypeI(var[n]) == CCTK_VARIABLE_REAL); assert (CCTK_QueryGroupStorageI(cctkGH, CCTK_GroupIndexFromVarI(var[n]))); } for (n=0; n= 0 && res[n] < CCTK_NumVars()); assert (CCTK_GroupTypeFromVarI(res[n]) == CCTK_GF || CCTK_GroupTypeFromVarI(res[n]) == CCTK_ARRAY); assert (CCTK_GroupDimFromVarI(res[n]) <= dim); assert (CCTK_VarTypeI(res[n]) == CCTK_VARIABLE_REAL); assert (CCTK_QueryGroupStorageI(cctkGH, CCTK_GroupIndexFromVarI(res[n]))); } for (n=0; n= 0); assert (calcres); assert (applybounds); nelems = Util_TableGetIntArray (options_table, 2*dim, &(common::nboundaryzones[0]), "nboundaryzones"); if (nelems == UTIL_ERROR_TABLE_NO_SUCH_KEY) { for (d=0; d= 0); assert (maxiters2 >= 0); assert (smaxiters >= 0); assert (minerror >= 0); assert (sminerror >= 0); assert (stepsize > 0); // Switch to global mode BEGIN_GLOBAL_MODE(cctkGH) { // Fill common block common::var = var; common::res = res; common::nvars = nvars; common::options_table = options_table; common::calcres = calcres; common::applybnds = applybounds; common::userdata = userdata; if (verbose || veryverbose) { CCTK_VInfo (CCTK_THORNSTRING, "Solving through nonlinear conjugate gradient iterations"); } /* * Literature: * * Jonathan Richard Shewchuk, "An Introduction to the Conjugate * Gradient Method Without the Agonizing Pain", Technical Report * CMU-CS-94-125, Aug. 1994 * * Available from http://www-2.cs.cmu.edu/~jrs/jrspapers.html */ /* * Notation: * * here there meaning * * iter i current cg iteration * siter j current secant iteration * iter2 k current cg iteration since last restart * maxiters2 n PARA cg iterations before restart * alpha secant step size * beta cg step size * delta cg distance * epsilon PARA secant error tolerance (straight epsilon) * minerror vareps PARA cg error tolerance (curved epsilon) * eta secant distance * stepsize sigma PARA secand method step parameter * * dir d cg direction * -calcres f' PARA nonlinear operator * res r cg residual * prs s preconditioned cg residual * var x INIT unknown variable * * -wgt M preconditioning matrix (not in this version) */ /* * Algorithm: * (Preconditioned nonlinear conjugate gradients with secant and * Polak-Ribière) * * 01. i <= 0 * 02. k <= 0 * 03. r <= - f'(x) * 04. calculate M \approx f''(x) * 05. s <= M^-1 r * 06. d <= s * 07. delta_new <= r^T d * 08. delta_0 <= delta_new * 09. WHILE i < i_max AND delta_new > vareps^2 delta_0 DO * 10. j <= 0 * 11. delta_d <= d^T d * 12. alpha <= - sigma_0 * 13. eta_prev <= [f'(x + sigma_0 d)]^T d * 14. DO * 15. eta <= [f'(x)]^T d * 16. alpha <= alpha (eta) / (eta_prev - eta) * 17. x <= x + alpha d * 18. eta_prev <= eta * 19. j <= j + 1 * 20. WHILE j < j_max AND alpha^2 delta_d > epsilon^2 * 21. r <= - f'(x) * 22. delta_old <= delta_new * 23. delta_mid <= r^T s * 24. calculate M \approx f''(x) * 25. s <= M^-1 r * 26. delta_new <= r^T s * 27. beta <= (delta_new - delta_mid) / (delta_old) * 28. k <= k + 1 * 29. IF k = n OR beta <= 0 THEN * 30. d <= s * 31. k <= 0 * 32. ELSE * 33. d <= s + beta d * 34. i <= i + 1 */ // Pointers to grid variables and temporary storage vector varptrs(nvars); vector resptrs(nvars); // CCTK_REAL * restrict * restrict prsptrs; // CCTK_REAL * restrict * restrict dirptrs; // Get storage pointers for (n=0; n= nvars); vector prsptrs(cg_maxsolvevars); vector dirptrs(cg_maxsolvevars); for (n=0; n= 0)&&(prsptrs[n] < CCTK_NumVars())); sprintf(varname,"CarpetCG::dirvars[%d]",n); dirptrs[n] = CCTK_VarIndex(varname); assert ((dirptrs[n] >= 0)&&(dirptrs[n] < CCTK_NumVars())); } nexttime = time(0); /* 01. i <= 0 */ iter = 0; /* 02. k <= 0 */ iter2 = 0; /* 03. r <= - f'(x) */ /* 04. calculate M \approx f''(x) */ common::ierr = 0; CallLocalFunction((cGH *)cctkGH, common::call_calcres); ierr = common::ierr; assert (! ierr); for (int n = 0; n < nvars; n++) { common::fromindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_correct_residual_sign); /* 05. s <= M^-1 r */ // No preconditioning for (int n = 0; n < nvars; n++) { common::fromindex[n] = resptrs[n]; common::toindex[n] = prsptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } // cout << "dirptrs " << dirptrs[0] << endl; CallLocalFunction((cGH *)cctkGH, common::call_apply_preconditioner); // cout << "dirptrs " << dirptrs[0] << endl; /* 06. d <= s */ for (int n = 0; n < nvars; n++) { common::fromindex[n] = prsptrs[n]; common::toindex[n] = dirptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } // cout << "dirptrs " << dirptrs[0] << endl; CallLocalFunction((cGH *)cctkGH, common::call_copy); // cout << "dirptrs " << dirptrs[0] << endl; /* 07. delta_new <= r^T d */ // output (cctkGH, var, res, wgt, nvars, iter); common::realoutput_count = 0; for (int n = 0; n < nvars; n++) { common::fromindex[n] = prsptrs[n]; common::toindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } // cout << "dirptrs " << dirptrs[0] << endl; CallLocalFunction((cGH *)cctkGH, common::call_dot_product); delta_new = common::realoutput; gsize = common::realoutput_count; // cout << "delta_new " << delta_new << " gsize " << gsize << endl; for (int n = 0; n < nvars; n++) { common::fromindex[n] = resptrs[n]; common::toindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_dot_product); epsilon = common::realoutput; // cout << "epsilon " << epsilon << endl; /* 08. delta_0 <= delta_new */ delta_0 = delta_new; /* 09. WHILE i < i_max AND delta_new > vareps^2 delta_0 DO */ while (iter < maxiters && epsilon / (nvars * gsize) > ipow(minerror,2)) { if (verbose || veryverbose) { currenttime = time(0); if (veryverbose || (iter % outevery == 0 && currenttime >= nexttime)) { CCTK_VInfo (CCTK_THORNSTRING, "Iteration %d (%d since restart): residual is %g (%g,%d,%g)", iter, iter2, (double)sqrt(epsilon / (nvars * gsize)), (double)epsilon,nvars,(double)gsize); if (outeveryseconds > 0) { while (nexttime <= currenttime) nexttime += outeveryseconds; } } } /* 10. j <= 0 */ siter = 0; /* 11. delta_d <= d^T d */ for (int n = 0; n < nvars; n++) { common::fromindex[n] = dirptrs[n]; common::toindex[n] = dirptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_dot_product); delta_d = common::realoutput; // cout << "delta_d " << delta_d << endl; /* 12. alpha <= - sigma_0 */ alpha = - stepsize; sum_alpha = alpha; do_abort = 0; /* 13. eta_prev <= [f'(x + sigma_0 d)]^T d */ for (int n = 0; n < nvars; n++) { common::fromindex[n] = dirptrs[n]; common::toindex[n] = varptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } common::realconstant = -alpha; // cout << "Add scaled, const " << -alpha << endl; CallLocalFunction((cGH *)cctkGH, common::call_add_scaled); common::ierr = 0; CallLocalFunction((cGH *)cctkGH, common::call_applybnds); assert (! ierr); ierr = 0; CallLocalFunction((cGH *)cctkGH, common::call_calcres); assert (! ierr); for (int n = 0; n < nvars; n++) { common::fromindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_correct_residual_sign); for (int n = 0; n < nvars; n++) { common::fromindex[n] = dirptrs[n]; common::toindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_dot_product); eta = - common::realoutput; // cout << "eta " << eta << endl; for (int n = 0; n < nvars; n++) { common::fromindex[n] = dirptrs[n]; common::toindex[n] = varptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } common::realconstant = alpha; // cout << "Add scaled, const " << alpha << endl; CallLocalFunction((cGH *)cctkGH, common::call_add_scaled); ierr = 0; CallLocalFunction((cGH *)cctkGH, common::call_applybnds); assert (! ierr); eta_prev = eta; /* 14. DO */ do { if (veryverbose) { CCTK_VInfo (CCTK_THORNSTRING, " Secant iteration %d: step size is %g, orthogonality is %g", siter, (double)alpha, (double)sqrt(fabs(eta_prev) / (nvars * gsize))); } /* 15. eta <= [f'(x)]^T d */ ierr = 0; CallLocalFunction((cGH *)cctkGH, common::call_calcres); assert (! ierr); for (int n = 0; n < nvars; n++) { common::fromindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_correct_residual_sign); for (int n = 0; n < nvars; n++) { common::fromindex[n] = dirptrs[n]; common::toindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_dot_product); eta = - common::realoutput; // cout << "eta " << eta << endl; /* 16. alpha <= alpha (eta) / (eta_prev - eta) */ alpha_old = alpha; alpha *= eta / (eta_prev - eta); sum_alpha += alpha; if (veryverbose) { CCTK_VInfo (CCTK_THORNSTRING, " Changing step size, iteration %d: was %g, now %g (%g, %g)", siter, (double)alpha_old, (double)alpha, (double)eta, (double)eta_prev); } assert (sum_alpha > - maxstepsize); if (sum_alpha > maxstepsize) { alpha = maxstepsize - alpha_old; do_abort = 1; if (verbose) { CCTK_VInfo (CCTK_THORNSTRING, " Secant iteration %d: limiting total step size", siter); } } /* 17. x <= x + alpha d */ for (int n = 0; n < nvars; n++) { common::fromindex[n] = dirptrs[n]; common::toindex[n] = varptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } common::realconstant = alpha; // cout << "Add scaled, const " << alpha << endl; CallLocalFunction((cGH *)cctkGH, common::call_add_scaled); ierr = 0; CallLocalFunction((cGH *)cctkGH, common::call_applybnds); assert (! ierr); /* 18. eta_prev <= eta */ eta_prev = eta; /* 19. j <= j + 1 */ ++ siter; /* 20. WHILE j < j_max AND alpha^2 delta_d > epsilon^2 */ } while (siter < smaxiters && ipow(alpha,2) * delta_d > ipow(sminerror,2) && !do_abort); if (veryverbose) { CCTK_VInfo (CCTK_THORNSTRING, " Secant iteration %d: step size is %g, orthogonality is %g", siter, (double)alpha, (double)sqrt(fabs(eta_prev) / (nvars * gsize))); } /* 21. r <= - f'(x) */ /* 24. calculate M \approx f''(x) */ ierr = 0; CallLocalFunction((cGH *)cctkGH, common::call_calcres); assert (! ierr); for (int n = 0; n < nvars; n++) { common::fromindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_correct_residual_sign); /* 23. delta_mid <= r^T s */ for (int n = 0; n < nvars; n++) { common::fromindex[n] = prsptrs[n]; common::toindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_dot_product); delta_mid = common::realoutput; // cout << "delta_mid " << delta_mid << endl; /* 25. s <= M^-1 r */ for (int n = 0; n < nvars; n++) { common::fromindex[n] = resptrs[n]; common::toindex[n] = prsptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_apply_preconditioner); /* 22. delta_old <= delta_new */ delta_old = delta_new; /* 26. delta_new <= r^T s */ // output (cctkGH, var, res, wgt, nvars, iter+1); for (int n = 0; n < nvars; n++) { common::fromindex[n] = prsptrs[n]; common::toindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_dot_product); delta_new = common::realoutput; // cout << "delta_new " << delta_new << endl; for (int n = 0; n < nvars; n++) { common::fromindex[n] = resptrs[n]; common::toindex[n] = resptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_dot_product); epsilon = common::realoutput; // cout << "epsilon " << epsilon << endl; /* 27. beta <= (delta_new - delta_mid) / (delta_old) */ beta = (delta_new - delta_mid) / delta_old; /* 28. k <= k + 1 */ ++ iter2; /* 29. IF k = n OR beta <= 0 THEN */ if (iter2 >= maxiters2 || beta <= 0 || do_abort) { /* Restart */ /* 30. d <= s */ for (int n = 0; n < nvars; n++) { common::fromindex[n] = prsptrs[n]; common::toindex[n] = dirptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } CallLocalFunction((cGH *)cctkGH, common::call_copy); /* 31. k <= 0 */ iter2 = 0; /* 32. ELSE */ } else { /* Continue with CG iterations */ /* 33. d <= s + beta d */ for (int n = 0; n < nvars; n++) { common::fromindex[n] = prsptrs[n]; common::toindex[n] = dirptrs[n]; } for (int n = nvars; n < cg_maxsolvevars; n++) { common::fromindex[n] = -1; common::toindex[n] = -1; } common::realconstant = beta; // cout << "beta " << beta << endl; CallLocalFunction((cGH *)cctkGH, common::call_scale_and_add); } /* if iter2 */ /* 34. i <= i + 1 */ ++ iter; } /* for iter */ if (verbose || veryverbose) { CCTK_VInfo (CCTK_THORNSTRING, "Iteration %d (%d since restart): residual is %g", iter, iter2, (double)sqrt(epsilon / (nvars * gsize))); } /* Free temporary memory */ // for (n=0; n