From e8a9716dc8f5a069be7485fae8b8ef3e92cf152b Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Tue, 22 Nov 2016 15:45:58 +0100 Subject: qms: make sure the context is always initialized --- src/qms.c | 116 ++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 72 insertions(+), 44 deletions(-) diff --git a/src/qms.c b/src/qms.c index b5b8100..71c5cba 100644 --- a/src/qms.c +++ b/src/qms.c @@ -176,12 +176,78 @@ static CoordPatch *get_coord_patch(QMSContext *ms, posix_memalign((void**)&cp->transform_z, 32, sizeof(*cp->transform_z) * cctkGH->cctk_lsh[2] * ms->solver->nb_coeffs[0]); #endif +#if 0 + nb_threads = getenv("OMP_NUM_THREADS"); + if (nb_threads) + cp->nb_threads = atoi(nb_threads); + if (cp->nb_threads <= 0) + cp->nb_threads = 1; + qms_threadpool_init(&cp->tp, cp->nb_threads); + cp->ec = calloc(cp->nb_threads, sizeof(*cp->ec)); + + block_size = (ms->gh->cctk_lsh[2] + cp->nb_threads - 1) / cp->nb_threads; + + for (int i = 0; i < cp->nb_threads; i++) { + EvalContext *ec = &cp->ec[i]; + + ec->qms = ms; + + ec->nb_coeffs[0] = ms->solver->nb_coeffs[0]; + ec->nb_coeffs[1] = ms->solver->nb_coeffs[1]; + + posix_memalign((void**)&ec->eval_tmp[0], 32, sizeof(*ec->eval_tmp[0]) * ec->nb_coeffs[0]); + posix_memalign((void**)&ec->eval_tmp[1], 32, sizeof(*ec->eval_tmp[1]) * ec->nb_coeffs[1]); + + ec->x_idx_start = 0; + ec->x_idx_end = ms->gh->cctk_lsh[0]; + + ec->z_idx_start = block_size * i; + ec->z_idx_end = MIN(block_size * (i + 1), ms->gh->cctk_lsh[2]); + } +#endif + ms->nb_patches++; return cp; } static QMSContext *qms_context; +static int context_init(cGH *cctkGH) +{ + QMSContext *qms; + int ret; + + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + qms = calloc(1, sizeof(*qms)); + if (!qms) + return -ENOMEM; + + qms->gh = cctkGH; + + ret = qms_solver_init(&qms->solver, cctkGH, basis_order_r, basis_order_z, + scale_factor, filter_power, 0.0); + if (ret < 0) + return ret; + + ret = posix_memalign((void**)&qms->coeffs_eval, 32, + basis_order_r * basis_order_z * sizeof(*qms->coeffs_eval)); + if (ret) + return -ENOMEM; + + for (int i = 0; i < ARRAY_ELEMS(qms->solution_cache); i++) { + ret = posix_memalign((void**)&qms->solution_cache[i].coeffs, 32, + basis_order_r * basis_order_z * sizeof(*qms->solution_cache[i].coeffs)); + if (ret) + return -ENOMEM; + } + + qms_context = qms; + + return 0; +} + void quasimaximal_slicing_axi_solve(CCTK_ARGUMENTS) { QMSContext *ms; @@ -191,6 +257,9 @@ void quasimaximal_slicing_axi_solve(CCTK_ARGUMENTS) double time; + if (!qms_context) + context_init(cctkGH); + ms = qms_context; time = cctkGH->cctk_time / ms->gh->cctk_delta_time; @@ -234,6 +303,9 @@ void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS) double *coeffs = NULL; int i, ret; + if (!qms_context) + context_init(cctkGH); + ms = qms_context; cp = get_coord_patch(ms, x, y, z, scale_factor, scale_power); @@ -315,7 +387,6 @@ void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS) W[CCTK_GFINDEX3D(cctkGH, i, cp->y_idx, j)] = val; } #else - //memcpy(alp, cp->one, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*alp)); memset(W, 0, cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2] * sizeof(*W)); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, ms->solver->nb_coeffs[0], cctk_lsh[2], ms->solver->nb_coeffs[1], 1.0, @@ -325,13 +396,6 @@ void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS) cctk_lsh[1] * cctk_lsh[0], cctk_lsh[2], ms->solver->nb_coeffs[0], 1.0, cp->basis_val_r, cctk_lsh[0] * cctk_lsh[1], cp->transform_z, ms->solver->nb_coeffs[0], 1.0, W, cctk_lsh[0] * cctk_lsh[1]); - -// { -// const int grid_size = cctk_lsh[0] * cctk_lsh[1] * cctk_lsh[2]; -//#pragma omp parallel for -// for (int i = 0; i < grid_size; i++) -// W[i] *= cp->w_scale[i]; -// } #endif ms->grid_expand_time += gettime() - expand_start; @@ -352,42 +416,6 @@ void quasimaximal_slicing_axi_eval(CCTK_ARGUMENTS) } } -static int context_init(cGH *cctkGH) -{ - QMSContext *qms; - int ret; - - DECLARE_CCTK_ARGUMENTS; - DECLARE_CCTK_PARAMETERS; - - qms = calloc(1, sizeof(*qms)); - if (!qms) - return -ENOMEM; - - qms->gh = cctkGH; - - ret = qms_solver_init(&qms->solver, cctkGH, basis_order_r, basis_order_z, - scale_factor, filter_power, 0.0); - if (ret < 0) - return ret; - - ret = posix_memalign((void**)&qms->coeffs_eval, 32, - basis_order_r * basis_order_z * sizeof(*qms->coeffs_eval)); - if (ret) - return -ENOMEM; - - for (int i = 0; i < ARRAY_ELEMS(qms->solution_cache); i++) { - ret = posix_memalign((void**)&qms->solution_cache[i].coeffs, 32, - basis_order_r * basis_order_z * sizeof(*qms->solution_cache[i].coeffs)); - if (ret) - return -ENOMEM; - } - - qms_context = qms; - - return 0; -} - void qms_init(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS; -- cgit v1.2.3