From 9c32be19bf768706a874aa68d7e90964d1d80f08 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 16 Sep 2022 11:25:35 +0200 Subject: Remove disabled and pointless OpenCL support. --- bicgstab.c | 220 ++----------------------------------------------------------- bicgstab.h | 13 +--- config.h | 1 - init.c | 81 ----------------------- nlsolve.c | 10 --- nlsolve.h | 5 -- pssolve.c | 2 +- pssolve.h | 3 - 8 files changed, 6 insertions(+), 329 deletions(-) diff --git a/bicgstab.c b/bicgstab.c index ab3e862..a3ef27a 100644 --- a/bicgstab.c +++ b/bicgstab.c @@ -18,11 +18,6 @@ #include "config.h" -#if HAVE_OPENCL -#include -#include -#endif - #include #include #include @@ -41,138 +36,8 @@ struct BiCGStabContext { double *p, *v, *y, *z, *t; double *res, *res0; double *k; - -#if HAVE_OPENCL - cl_context ocl_ctx; - cl_command_queue ocl_queue; - - cl_mem cl_x; - cl_mem cl_p, cl_v, cl_y, cl_z, cl_t; - cl_mem cl_res, cl_res0; - cl_mem cl_k, cl_mat; - cl_mem cl_rho, cl_alpha, cl_beta, cl_omega, cl_omega1; - cl_mem cl_tmp, cl_tmp1; -#endif }; -#if HAVE_OPENCL -static int solve_cl(BiCGStabContext *ctx, - const double *mat, const double *rhs, double *x) -{ - cl_command_queue ocl_q = ctx->ocl_queue; - const int N = ctx->N; - const double rhs_norm = cblas_dnrm2(N, rhs, 1); - - double rho, rho_prev = 1.0; - double omega[2] = { 1.0 }; - double alpha = 1.0; - - double err; - int i; - - cl_event events[8]; - - // upload the matrix and RHS - clEnqueueWriteBuffer(ocl_q, ctx->cl_res, 0, 0, N * sizeof(double), rhs, 0, NULL, &events[0]); - clEnqueueWriteBuffer(ocl_q, ctx->cl_mat, 0, 0, N * N * sizeof(double), mat, 0, NULL, &events[1]); - - // initialize the residual - clblasDgemv(CblasColMajor, CblasNoTrans, N, N, -1.0, - ctx->cl_mat, 0, N, ctx->cl_x, 0, 1, 1.0, ctx->cl_res, 0, 1, - 1, &ocl_q, 2, events, &events[2]); - clEnqueueCopyBuffer(ocl_q, ctx->cl_res, ctx->cl_res0, 0, 0, N * sizeof(double), - 1, &events[2], &events[3]); - clEnqueueCopyBuffer(ocl_q, ctx->cl_res, ctx->cl_p, 0, 0, N * sizeof(double), - 1, &events[2], &events[4]); - - clWaitForEvents(5, events); - // BARRIER - - for (i = 0; i < MAXITER; i++) { - clblasDdot(N, ctx->cl_rho, 0, ctx->cl_res, 0, 1, ctx->cl_res0, 0, 1, - ctx->cl_tmp, 1, &ocl_q, 0, NULL, &events[0]); - clEnqueueReadBuffer(ocl_q, ctx->cl_rho, 1, 0, sizeof(double), &rho, - 1, &events[0], NULL); - // BARRIER - - if (i) { - double beta = (rho / rho_prev) * (alpha / omega[0]); - - clblasDaxpy(N, -omega[0], ctx->cl_v, 0, 1, ctx->cl_p, 0, 1, - 1, &ocl_q, 0, NULL, &events[0]); - clblasDscal(N, beta, ctx->cl_p, 0, 1, - 1, &ocl_q, 1, &events[0], &events[1]); - clblasDaxpy(N, 1, ctx->cl_res, 0, 1, ctx->cl_p, 0, 1, - 1, &ocl_q, 1, &events[1], &events[0]); - clWaitForEvents(1, &events[0]); - // BARRIER - } - - clblasDgemv(CblasColMajor, CblasNoTrans, N, N, 1.0, - ctx->cl_k, 0, N, ctx->cl_p, 0, 1, 0.0, ctx->cl_y, 0, 1, - 1, &ocl_q, 0, NULL, &events[0]); - - clblasDgemv(CblasColMajor, CblasNoTrans, N, N, 1.0, - ctx->cl_mat, 0, N, ctx->cl_y, 0, 1, 0.0, ctx->cl_v, 0, 1, - 1, &ocl_q, 1, &events[0], &events[1]); - - clblasDdot(N, ctx->cl_alpha, 0, ctx->cl_res0, 0, 1, ctx->cl_v, 0, 1, - ctx->cl_tmp, 1, &ocl_q, 1, &events[1], &events[0]); - clEnqueueReadBuffer(ocl_q, ctx->cl_alpha, 1, 0, sizeof(double), &alpha, - 1, &events[0], NULL); - // BARRIER - - alpha = rho / alpha; - - clblasDaxpy(N, -alpha, ctx->cl_v, 0, 1, ctx->cl_res, 0, 1, - 1, &ocl_q, 0, NULL, &events[0]); - - clblasDgemv(CblasColMajor, CblasNoTrans, N, N, 1.0, - ctx->cl_k, 0, N, ctx->cl_res, 0, 1, 0.0, ctx->cl_z, 0, 1, - 1, &ocl_q, 1, &events[0], &events[1]); - clblasDgemv(CblasColMajor, CblasNoTrans, N, N, 1.0, - ctx->cl_mat, 0, N, ctx->cl_z, 0, 1, 0.0, ctx->cl_t, 0, 1, - 1, &ocl_q, 1, &events[1], &events[0]); - - clblasDdot(N, ctx->cl_omega, 0, ctx->cl_t, 0, 1, ctx->cl_res, 0, 1, - ctx->cl_tmp, 1, &ocl_q, 1, &events[0], &events[1]); - clblasDdot(N, ctx->cl_omega, 1, ctx->cl_t, 0, 1, ctx->cl_t, 0, 1, - ctx->cl_tmp1, 1, &ocl_q, 1, &events[0], &events[2]); - - clEnqueueReadBuffer(ocl_q, ctx->cl_omega, 1, 0, sizeof(omega), omega, - 2, &events[1], NULL); - // BARRIER - - omega[0] /= omega[1]; - - clblasDaxpy(N, alpha, ctx->cl_y, 0, 1, ctx->cl_x, 0, 1, - 1, &ocl_q, 0, NULL, &events[0]); - clblasDaxpy(N, omega[0], ctx->cl_z, 0, 1, ctx->cl_x, 0, 1, - 1, &ocl_q, 1, &events[0], &events[1]); - - clblasDaxpy(N, -omega[0], ctx->cl_t, 0, 1, ctx->cl_res, 0, 1, - 1, &ocl_q, 0, NULL, &events[0]); - clblasDnrm2(N, ctx->cl_tmp, 0, ctx->cl_res, 0, 1, ctx->cl_tmp1, - 1, &ocl_q, 1, &events[0], &events[2]); - clEnqueueReadBuffer(ocl_q, ctx->cl_tmp, 1, 0, sizeof(double), &err, - 1, &events[2], NULL); - clWaitForEvents(1, &events[1]); - // BARRIER - - if (err < BICGSTAB_TOL) - break; - - rho_prev = rho; - } - if (i == ctx->maxiter) - return -1; - - clEnqueueReadBuffer(ocl_q, ctx->cl_x, 1, 0, sizeof(double) * N, - x, 0, NULL, NULL); - return i; -} -#endif - // based on the wikipedia article // and http://www.netlib.org/templates/matlab/bicgstab.m static int solve_sw(BiCGStabContext *ctx, @@ -249,14 +114,7 @@ static int solve_sw(BiCGStabContext *ctx, int tdi_bicgstab_solve(BiCGStabContext *ctx, const double *mat, const double *rhs, double *x) { - int ret; - -#if HAVE_OPENCL - if (ctx->ocl_ctx) - ret = solve_cl(ctx, mat, rhs, x); - else -#endif - ret = solve_sw(ctx, mat, rhs, x); + int ret = solve_sw(ctx, mat, rhs, x); if (ret < 0) return ret; @@ -280,26 +138,13 @@ int tdi_bicgstab_solve(BiCGStabContext *ctx, const double *mat, const double *rh int tdi_bicgstab_init(BiCGStabContext *ctx, const double *k, const double *x0) { -#if HAVE_OPENCL - if (ctx->ocl_ctx) { - cl_event events[2]; - clEnqueueWriteBuffer(ctx->ocl_queue, ctx->cl_k, 0, 0, ctx->N * ctx->N * sizeof(double), - k, 0, NULL, &events[0]); - clEnqueueWriteBuffer(ctx->ocl_queue, ctx->cl_x, 0, 0, ctx->N * sizeof(double), - x0, 0, NULL, &events[1]); - clWaitForEvents(2, events); - } else -#endif - { - memcpy(ctx->x, x0, ctx->N * sizeof(*x0)); - memcpy(ctx->k, k, ctx->N * ctx->N * sizeof(*k)); - } + memcpy(ctx->x, x0, ctx->N * sizeof(*x0)); + memcpy(ctx->k, k, ctx->N * ctx->N * sizeof(*k)); return 0; } -int tdi_bicgstab_context_alloc(BiCGStabContext **pctx, int N, int maxiter, - cl_context ocl_ctx, cl_command_queue ocl_q) +int tdi_bicgstab_context_alloc(BiCGStabContext **pctx, int N, int maxiter) { BiCGStabContext *ctx; int ret = 0; @@ -311,39 +156,6 @@ int tdi_bicgstab_context_alloc(BiCGStabContext **pctx, int N, int maxiter, ctx->N = N; ctx->maxiter = maxiter; -#if HAVE_OPENCL - if (ocl_ctx) { - ctx->ocl_ctx = ocl_ctx; - ctx->ocl_queue = ocl_q; - -#define ALLOC(dst, size) \ -do { \ - ctx->dst = clCreateBuffer(ocl_ctx, 0, size, NULL, &ret); \ - if (ret != CL_SUCCESS) \ - goto fail; \ -} while (0) - - ALLOC(cl_x, N * sizeof(double)); - ALLOC(cl_p, N * sizeof(double)); - ALLOC(cl_v, N * sizeof(double)); - ALLOC(cl_y, N * sizeof(double)); - ALLOC(cl_z, N * sizeof(double)); - ALLOC(cl_t, N * sizeof(double)); - ALLOC(cl_res, N * sizeof(double)); - ALLOC(cl_res0, N * sizeof(double)); - ALLOC(cl_tmp, N * sizeof(double)); - ALLOC(cl_tmp1, N * 2 * sizeof(double)); - - ALLOC(cl_k, N * N * sizeof(double)); - ALLOC(cl_mat, N * N * sizeof(double)); - - ALLOC(cl_rho, sizeof(double)); - ALLOC(cl_alpha, sizeof(double)); - ALLOC(cl_beta, sizeof(double)); - ALLOC(cl_omega, 2 * sizeof(double)); - ALLOC(cl_omega1, sizeof(double)); - } else -#endif { ret |= posix_memalign((void**)&ctx->x, 32, sizeof(double) * N); ret |= posix_memalign((void**)&ctx->p, 32, sizeof(double) * N); @@ -383,30 +195,6 @@ void tdi_bicgstab_context_free(BiCGStabContext **pctx) free(ctx->res0); free(ctx->k); -#if HAVE_OPENCL - if (ctx->ocl_ctx) { - clReleaseMemObject(ctx->cl_x); - clReleaseMemObject(ctx->cl_p); - clReleaseMemObject(ctx->cl_v); - clReleaseMemObject(ctx->cl_y); - clReleaseMemObject(ctx->cl_z); - clReleaseMemObject(ctx->cl_t); - clReleaseMemObject(ctx->cl_res); - clReleaseMemObject(ctx->cl_res0); - clReleaseMemObject(ctx->cl_tmp); - clReleaseMemObject(ctx->cl_tmp1); - - clReleaseMemObject(ctx->cl_k); - clReleaseMemObject(ctx->cl_mat); - - clReleaseMemObject(ctx->cl_rho); - clReleaseMemObject(ctx->cl_alpha); - clReleaseMemObject(ctx->cl_beta); - clReleaseMemObject(ctx->cl_omega); - clReleaseMemObject(ctx->cl_omega1); - } -#endif - free(ctx); *pctx = NULL; } diff --git a/bicgstab.h b/bicgstab.h index cecc06d..249da18 100644 --- a/bicgstab.h +++ b/bicgstab.h @@ -21,23 +21,12 @@ #include "config.h" -#if HAVE_OPENCL -#include -#else -typedef void* cl_context; -typedef void* cl_command_queue; -#endif - typedef struct BiCGStabContext BiCGStabContext; /** * Allocate and initialize the solver for the NxN system. - * - * If the OpenCL context and command queue are provided (non-NULL), the solver - * will run using clBLAS. */ -int tdi_bicgstab_context_alloc(BiCGStabContext **ctx, int N, int maxiter, - cl_context ocl_ctx, cl_command_queue ocl_q); +int tdi_bicgstab_context_alloc(BiCGStabContext **ctx, int N, int maxiter); /** * Free the solver and all its internal state. diff --git a/config.h b/config.h index da1df56..228549d 100644 --- a/config.h +++ b/config.h @@ -6,6 +6,5 @@ #define ARCH_X86_64 1 #define ARCH_X86 1 #define HAVE_SCHED_GETAFFINITY 1 -#define HAVE_OPENCL 0 #define TD_VERIFY 0 #define TD_POLAR 1 diff --git a/init.c b/init.c index 26b3b27..0370d46 100644 --- a/init.c +++ b/init.c @@ -26,11 +26,6 @@ #include -#if HAVE_OPENCL -#include -#include -#endif - #include #include "basis.h" @@ -71,63 +66,6 @@ double tdi_scalarproduct_metric_sse3(size_t len1, size_t len2, double *mat, double tdi_scalarproduct_metric_c(size_t len1, size_t len2, double *mat, double *vec1, double *vec2); -static void init_opencl(TDPriv *s) -#if HAVE_OPENCL -{ - int err, count; - cl_platform_id platform; - cl_context_properties props[3]; - cl_device_id ocl_device; - - err = clGetPlatformIDs(1, &platform, &count); - if (err != CL_SUCCESS || count < 1) { - tdi_log(&s->logger, 0, "Could not get an OpenCL platform ID\n"); - return; - } - - err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ocl_device, &count); - if (err != CL_SUCCESS || count < 1) { - tdi_log(&s->logger, 0, "Could not get an OpenCL device ID\n"); - return; - } - - props[0] = CL_CONTEXT_PLATFORM; - props[1] = (cl_context_properties)platform; - props[2] = 0; - - s->ocl_ctx = clCreateContext(props, 1, &ocl_device, NULL, NULL, &err); - if (err != CL_SUCCESS || !s->ocl_ctx) { - tdi_log(&s->logger, 0, "Could not create an OpenCL context\n"); - return; - } - - s->ocl_queue = clCreateCommandQueue(s->ocl_ctx, ocl_device, 0, &err); - if (err != CL_SUCCESS || !s->ocl_queue) { - tdi_log(&s->logger, 0, "Could not create an OpenCL command queue: %d\n", err); - goto fail; - } - - err = clblasSetup(); - if (err != CL_SUCCESS) { - tdi_log(&s->logger, 0, "Error setting up clBLAS\n"); - goto fail; - } - - return; -fail: - if (s->ocl_queue) - clReleaseCommandQueue(s->ocl_queue); - s->ocl_queue = 0; - - if (s->ocl_ctx) - clReleaseContext(s->ocl_ctx); - s->ocl_ctx = 0; -} -#else -{ -} -#endif - static const enum BasisFamily basis_sets[NB_EQUATIONS][2] = { { BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN }, { BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN }, @@ -157,8 +95,6 @@ static int teukolsky_init_check_options(TDContext *td) if (ret < 0) return ret; - init_opencl(s); - s->logger.log = log_callback; s->logger.opaque = td; @@ -276,11 +212,6 @@ static int nlsolve_alloc(const TDContext *td, NLSolveContext **pnl) memcpy(nl->basis, s->basis, sizeof(s->basis)); memcpy(nl->solve_order, s->basis_order, sizeof(s->basis_order)); -#if HAVE_OPENCL - nl->ocl_ctx = s->ocl_ctx; - nl->ocl_queue = s->ocl_queue; -#endif - ret = tdi_nlsolve_context_init(nl); if (ret < 0) { tdi_log(&s->logger, 0, "Error initializing the non-linear solver\n"); @@ -491,13 +422,6 @@ void td_context_free(TDContext **ptd) tp_free(&s->tp); -#if HAVE_OPENCL - if (s->ocl_queue) - clReleaseCommandQueue(s->ocl_queue); - if (s->ocl_ctx) - clReleaseContext(s->ocl_ctx); -#endif - for (int i = 0; i < ARRAY_ELEMS(s->basis); i++) for (int j = 0; j < ARRAY_ELEMS(s->basis[i]); j++) tdi_basis_free(&s->basis[i][j]); @@ -669,11 +593,6 @@ static int lapse_solve_max(const TDContext *td) nl->solve_order[0][0] = priv->basis_order[0][0]; nl->solve_order[0][1] = priv->basis_order[0][1]; -#if HAVE_OPENCL - nl->ocl_ctx = priv->ocl_ctx; - nl->ocl_queue = priv->ocl_queue; -#endif - ret = tdi_nlsolve_context_init(nl); if (ret < 0) { tdi_log(&priv->logger, 0, "Error initializing the non-linear solver\n"); diff --git a/nlsolve.c b/nlsolve.c index 6f89e8b..ee08ec7 100644 --- a/nlsolve.c +++ b/nlsolve.c @@ -29,11 +29,6 @@ #include -#if HAVE_OPENCL -#include -#include -#endif - #include #include "basis.h" @@ -486,11 +481,6 @@ int tdi_nlsolve_context_init(NLSolveContext *ctx) s->ps_ctx->logger = ctx->logger; s->ps_ctx->tp = s->tp; -#if HAVE_OPENCL - s->ps_ctx->ocl_ctx = ctx->ocl_ctx; - s->ps_ctx->ocl_queue = ctx->ocl_queue; -#endif - memcpy(s->ps_ctx->basis, ctx->basis, s->nb_vars * sizeof(*ctx->basis)); memcpy(s->ps_ctx->solve_order, ctx->solve_order, s->nb_vars * sizeof(*ctx->solve_order)); diff --git a/nlsolve.h b/nlsolve.h index eaa2b64..d0e3c38 100644 --- a/nlsolve.h +++ b/nlsolve.h @@ -84,11 +84,6 @@ typedef struct NLSolveContext { */ double *(*colloc_grid)[2]; -#if HAVE_OPENCL - cl_context ocl_ctx; - cl_command_queue ocl_queue; -#endif - // solver parameters unsigned int maxiter; double atol; diff --git a/pssolve.c b/pssolve.c index 36e09cf..5de63f6 100644 --- a/pssolve.c +++ b/pssolve.c @@ -414,7 +414,7 @@ int tdi_pssolve_context_init(PSSolveContext *ctx) s->steps_since_inverse = INT_MAX; /* init the BiCGStab solver */ - ret = tdi_bicgstab_context_alloc(&s->bicgstab, N, 64, ctx->ocl_ctx, ctx->ocl_queue); + ret = tdi_bicgstab_context_alloc(&s->bicgstab, N, 64); if (ret < 0) return ret; diff --git a/pssolve.h b/pssolve.h index 160e179..b350a1a 100644 --- a/pssolve.h +++ b/pssolve.h @@ -125,9 +125,6 @@ typedef struct PSSolveContext { */ TPContext *tp; - cl_context ocl_ctx; - cl_command_queue ocl_queue; - uint64_t lu_solves_count; uint64_t lu_solves_time; -- cgit v1.2.3