aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2022-09-16 11:25:35 +0200
committerAnton Khirnov <anton@khirnov.net>2022-09-16 11:27:10 +0200
commit9c32be19bf768706a874aa68d7e90964d1d80f08 (patch)
tree256ac6229adfd7df88c61c8b8f7877ff8d9d1e0c
parentb43cc61bc85781e384eb183c3818b08421f0b84f (diff)
Remove disabled and pointless OpenCL support.
-rw-r--r--bicgstab.c220
-rw-r--r--bicgstab.h13
-rw-r--r--config.h1
-rw-r--r--init.c81
-rw-r--r--nlsolve.c10
-rw-r--r--nlsolve.h5
-rw-r--r--pssolve.c2
-rw-r--r--pssolve.h3
8 files changed, 6 insertions, 329 deletions
diff --git a/bicgstab.c b/bicgstab.c
index ab3e862..a3ef27a 100644
--- a/bicgstab.c
+++ b/bicgstab.c
@@ -18,11 +18,6 @@
#include "config.h"
-#if HAVE_OPENCL
-#include <cl.h>
-#include <clBLAS.h>
-#endif
-
#include <cblas.h>
#include <errno.h>
#include <stdlib.h>
@@ -41,138 +36,8 @@ struct BiCGStabContext {
double *p, *v, *y, *z, *t;
double *res, *res0;
double *k;
-
-#if HAVE_OPENCL
- cl_context ocl_ctx;
- cl_command_queue ocl_queue;
-
- cl_mem cl_x;
- cl_mem cl_p, cl_v, cl_y, cl_z, cl_t;
- cl_mem cl_res, cl_res0;
- cl_mem cl_k, cl_mat;
- cl_mem cl_rho, cl_alpha, cl_beta, cl_omega, cl_omega1;
- cl_mem cl_tmp, cl_tmp1;
-#endif
};
-#if HAVE_OPENCL
-static int solve_cl(BiCGStabContext *ctx,
- const double *mat, const double *rhs, double *x)
-{
- cl_command_queue ocl_q = ctx->ocl_queue;
- const int N = ctx->N;
- const double rhs_norm = cblas_dnrm2(N, rhs, 1);
-
- double rho, rho_prev = 1.0;
- double omega[2] = { 1.0 };
- double alpha = 1.0;
-
- double err;
- int i;
-
- cl_event events[8];
-
- // upload the matrix and RHS
- clEnqueueWriteBuffer(ocl_q, ctx->cl_res, 0, 0, N * sizeof(double), rhs, 0, NULL, &events[0]);
- clEnqueueWriteBuffer(ocl_q, ctx->cl_mat, 0, 0, N * N * sizeof(double), mat, 0, NULL, &events[1]);
-
- // initialize the residual
- clblasDgemv(CblasColMajor, CblasNoTrans, N, N, -1.0,
- ctx->cl_mat, 0, N, ctx->cl_x, 0, 1, 1.0, ctx->cl_res, 0, 1,
- 1, &ocl_q, 2, events, &events[2]);
- clEnqueueCopyBuffer(ocl_q, ctx->cl_res, ctx->cl_res0, 0, 0, N * sizeof(double),
- 1, &events[2], &events[3]);
- clEnqueueCopyBuffer(ocl_q, ctx->cl_res, ctx->cl_p, 0, 0, N * sizeof(double),
- 1, &events[2], &events[4]);
-
- clWaitForEvents(5, events);
- // BARRIER
-
- for (i = 0; i < MAXITER; i++) {
- clblasDdot(N, ctx->cl_rho, 0, ctx->cl_res, 0, 1, ctx->cl_res0, 0, 1,
- ctx->cl_tmp, 1, &ocl_q, 0, NULL, &events[0]);
- clEnqueueReadBuffer(ocl_q, ctx->cl_rho, 1, 0, sizeof(double), &rho,
- 1, &events[0], NULL);
- // BARRIER
-
- if (i) {
- double beta = (rho / rho_prev) * (alpha / omega[0]);
-
- clblasDaxpy(N, -omega[0], ctx->cl_v, 0, 1, ctx->cl_p, 0, 1,
- 1, &ocl_q, 0, NULL, &events[0]);
- clblasDscal(N, beta, ctx->cl_p, 0, 1,
- 1, &ocl_q, 1, &events[0], &events[1]);
- clblasDaxpy(N, 1, ctx->cl_res, 0, 1, ctx->cl_p, 0, 1,
- 1, &ocl_q, 1, &events[1], &events[0]);
- clWaitForEvents(1, &events[0]);
- // BARRIER
- }
-
- clblasDgemv(CblasColMajor, CblasNoTrans, N, N, 1.0,
- ctx->cl_k, 0, N, ctx->cl_p, 0, 1, 0.0, ctx->cl_y, 0, 1,
- 1, &ocl_q, 0, NULL, &events[0]);
-
- clblasDgemv(CblasColMajor, CblasNoTrans, N, N, 1.0,
- ctx->cl_mat, 0, N, ctx->cl_y, 0, 1, 0.0, ctx->cl_v, 0, 1,
- 1, &ocl_q, 1, &events[0], &events[1]);
-
- clblasDdot(N, ctx->cl_alpha, 0, ctx->cl_res0, 0, 1, ctx->cl_v, 0, 1,
- ctx->cl_tmp, 1, &ocl_q, 1, &events[1], &events[0]);
- clEnqueueReadBuffer(ocl_q, ctx->cl_alpha, 1, 0, sizeof(double), &alpha,
- 1, &events[0], NULL);
- // BARRIER
-
- alpha = rho / alpha;
-
- clblasDaxpy(N, -alpha, ctx->cl_v, 0, 1, ctx->cl_res, 0, 1,
- 1, &ocl_q, 0, NULL, &events[0]);
-
- clblasDgemv(CblasColMajor, CblasNoTrans, N, N, 1.0,
- ctx->cl_k, 0, N, ctx->cl_res, 0, 1, 0.0, ctx->cl_z, 0, 1,
- 1, &ocl_q, 1, &events[0], &events[1]);
- clblasDgemv(CblasColMajor, CblasNoTrans, N, N, 1.0,
- ctx->cl_mat, 0, N, ctx->cl_z, 0, 1, 0.0, ctx->cl_t, 0, 1,
- 1, &ocl_q, 1, &events[1], &events[0]);
-
- clblasDdot(N, ctx->cl_omega, 0, ctx->cl_t, 0, 1, ctx->cl_res, 0, 1,
- ctx->cl_tmp, 1, &ocl_q, 1, &events[0], &events[1]);
- clblasDdot(N, ctx->cl_omega, 1, ctx->cl_t, 0, 1, ctx->cl_t, 0, 1,
- ctx->cl_tmp1, 1, &ocl_q, 1, &events[0], &events[2]);
-
- clEnqueueReadBuffer(ocl_q, ctx->cl_omega, 1, 0, sizeof(omega), omega,
- 2, &events[1], NULL);
- // BARRIER
-
- omega[0] /= omega[1];
-
- clblasDaxpy(N, alpha, ctx->cl_y, 0, 1, ctx->cl_x, 0, 1,
- 1, &ocl_q, 0, NULL, &events[0]);
- clblasDaxpy(N, omega[0], ctx->cl_z, 0, 1, ctx->cl_x, 0, 1,
- 1, &ocl_q, 1, &events[0], &events[1]);
-
- clblasDaxpy(N, -omega[0], ctx->cl_t, 0, 1, ctx->cl_res, 0, 1,
- 1, &ocl_q, 0, NULL, &events[0]);
- clblasDnrm2(N, ctx->cl_tmp, 0, ctx->cl_res, 0, 1, ctx->cl_tmp1,
- 1, &ocl_q, 1, &events[0], &events[2]);
- clEnqueueReadBuffer(ocl_q, ctx->cl_tmp, 1, 0, sizeof(double), &err,
- 1, &events[2], NULL);
- clWaitForEvents(1, &events[1]);
- // BARRIER
-
- if (err < BICGSTAB_TOL)
- break;
-
- rho_prev = rho;
- }
- if (i == ctx->maxiter)
- return -1;
-
- clEnqueueReadBuffer(ocl_q, ctx->cl_x, 1, 0, sizeof(double) * N,
- x, 0, NULL, NULL);
- return i;
-}
-#endif
-
// based on the wikipedia article
// and http://www.netlib.org/templates/matlab/bicgstab.m
static int solve_sw(BiCGStabContext *ctx,
@@ -249,14 +114,7 @@ static int solve_sw(BiCGStabContext *ctx,
int tdi_bicgstab_solve(BiCGStabContext *ctx, const double *mat, const double *rhs, double *x)
{
- int ret;
-
-#if HAVE_OPENCL
- if (ctx->ocl_ctx)
- ret = solve_cl(ctx, mat, rhs, x);
- else
-#endif
- ret = solve_sw(ctx, mat, rhs, x);
+ int ret = solve_sw(ctx, mat, rhs, x);
if (ret < 0)
return ret;
@@ -280,26 +138,13 @@ int tdi_bicgstab_solve(BiCGStabContext *ctx, const double *mat, const double *rh
int tdi_bicgstab_init(BiCGStabContext *ctx, const double *k, const double *x0)
{
-#if HAVE_OPENCL
- if (ctx->ocl_ctx) {
- cl_event events[2];
- clEnqueueWriteBuffer(ctx->ocl_queue, ctx->cl_k, 0, 0, ctx->N * ctx->N * sizeof(double),
- k, 0, NULL, &events[0]);
- clEnqueueWriteBuffer(ctx->ocl_queue, ctx->cl_x, 0, 0, ctx->N * sizeof(double),
- x0, 0, NULL, &events[1]);
- clWaitForEvents(2, events);
- } else
-#endif
- {
- memcpy(ctx->x, x0, ctx->N * sizeof(*x0));
- memcpy(ctx->k, k, ctx->N * ctx->N * sizeof(*k));
- }
+ memcpy(ctx->x, x0, ctx->N * sizeof(*x0));
+ memcpy(ctx->k, k, ctx->N * ctx->N * sizeof(*k));
return 0;
}
-int tdi_bicgstab_context_alloc(BiCGStabContext **pctx, int N, int maxiter,
- cl_context ocl_ctx, cl_command_queue ocl_q)
+int tdi_bicgstab_context_alloc(BiCGStabContext **pctx, int N, int maxiter)
{
BiCGStabContext *ctx;
int ret = 0;
@@ -311,39 +156,6 @@ int tdi_bicgstab_context_alloc(BiCGStabContext **pctx, int N, int maxiter,
ctx->N = N;
ctx->maxiter = maxiter;
-#if HAVE_OPENCL
- if (ocl_ctx) {
- ctx->ocl_ctx = ocl_ctx;
- ctx->ocl_queue = ocl_q;
-
-#define ALLOC(dst, size) \
-do { \
- ctx->dst = clCreateBuffer(ocl_ctx, 0, size, NULL, &ret); \
- if (ret != CL_SUCCESS) \
- goto fail; \
-} while (0)
-
- ALLOC(cl_x, N * sizeof(double));
- ALLOC(cl_p, N * sizeof(double));
- ALLOC(cl_v, N * sizeof(double));
- ALLOC(cl_y, N * sizeof(double));
- ALLOC(cl_z, N * sizeof(double));
- ALLOC(cl_t, N * sizeof(double));
- ALLOC(cl_res, N * sizeof(double));
- ALLOC(cl_res0, N * sizeof(double));
- ALLOC(cl_tmp, N * sizeof(double));
- ALLOC(cl_tmp1, N * 2 * sizeof(double));
-
- ALLOC(cl_k, N * N * sizeof(double));
- ALLOC(cl_mat, N * N * sizeof(double));
-
- ALLOC(cl_rho, sizeof(double));
- ALLOC(cl_alpha, sizeof(double));
- ALLOC(cl_beta, sizeof(double));
- ALLOC(cl_omega, 2 * sizeof(double));
- ALLOC(cl_omega1, sizeof(double));
- } else
-#endif
{
ret |= posix_memalign((void**)&ctx->x, 32, sizeof(double) * N);
ret |= posix_memalign((void**)&ctx->p, 32, sizeof(double) * N);
@@ -383,30 +195,6 @@ void tdi_bicgstab_context_free(BiCGStabContext **pctx)
free(ctx->res0);
free(ctx->k);
-#if HAVE_OPENCL
- if (ctx->ocl_ctx) {
- clReleaseMemObject(ctx->cl_x);
- clReleaseMemObject(ctx->cl_p);
- clReleaseMemObject(ctx->cl_v);
- clReleaseMemObject(ctx->cl_y);
- clReleaseMemObject(ctx->cl_z);
- clReleaseMemObject(ctx->cl_t);
- clReleaseMemObject(ctx->cl_res);
- clReleaseMemObject(ctx->cl_res0);
- clReleaseMemObject(ctx->cl_tmp);
- clReleaseMemObject(ctx->cl_tmp1);
-
- clReleaseMemObject(ctx->cl_k);
- clReleaseMemObject(ctx->cl_mat);
-
- clReleaseMemObject(ctx->cl_rho);
- clReleaseMemObject(ctx->cl_alpha);
- clReleaseMemObject(ctx->cl_beta);
- clReleaseMemObject(ctx->cl_omega);
- clReleaseMemObject(ctx->cl_omega1);
- }
-#endif
-
free(ctx);
*pctx = NULL;
}
diff --git a/bicgstab.h b/bicgstab.h
index cecc06d..249da18 100644
--- a/bicgstab.h
+++ b/bicgstab.h
@@ -21,23 +21,12 @@
#include "config.h"
-#if HAVE_OPENCL
-#include <cl.h>
-#else
-typedef void* cl_context;
-typedef void* cl_command_queue;
-#endif
-
typedef struct BiCGStabContext BiCGStabContext;
/**
* Allocate and initialize the solver for the NxN system.
- *
- * If the OpenCL context and command queue are provided (non-NULL), the solver
- * will run using clBLAS.
*/
-int tdi_bicgstab_context_alloc(BiCGStabContext **ctx, int N, int maxiter,
- cl_context ocl_ctx, cl_command_queue ocl_q);
+int tdi_bicgstab_context_alloc(BiCGStabContext **ctx, int N, int maxiter);
/**
* Free the solver and all its internal state.
diff --git a/config.h b/config.h
index da1df56..228549d 100644
--- a/config.h
+++ b/config.h
@@ -6,6 +6,5 @@
#define ARCH_X86_64 1
#define ARCH_X86 1
#define HAVE_SCHED_GETAFFINITY 1
-#define HAVE_OPENCL 0
#define TD_VERIFY 0
#define TD_POLAR 1
diff --git a/init.c b/init.c
index 26b3b27..0370d46 100644
--- a/init.c
+++ b/init.c
@@ -26,11 +26,6 @@
#include <cblas.h>
-#if HAVE_OPENCL
-#include <cl.h>
-#include <clBLAS.h>
-#endif
-
#include <threadpool.h>
#include "basis.h"
@@ -71,63 +66,6 @@ double tdi_scalarproduct_metric_sse3(size_t len1, size_t len2, double *mat,
double tdi_scalarproduct_metric_c(size_t len1, size_t len2, double *mat,
double *vec1, double *vec2);
-static void init_opencl(TDPriv *s)
-#if HAVE_OPENCL
-{
- int err, count;
- cl_platform_id platform;
- cl_context_properties props[3];
- cl_device_id ocl_device;
-
- err = clGetPlatformIDs(1, &platform, &count);
- if (err != CL_SUCCESS || count < 1) {
- tdi_log(&s->logger, 0, "Could not get an OpenCL platform ID\n");
- return;
- }
-
- err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ocl_device, &count);
- if (err != CL_SUCCESS || count < 1) {
- tdi_log(&s->logger, 0, "Could not get an OpenCL device ID\n");
- return;
- }
-
- props[0] = CL_CONTEXT_PLATFORM;
- props[1] = (cl_context_properties)platform;
- props[2] = 0;
-
- s->ocl_ctx = clCreateContext(props, 1, &ocl_device, NULL, NULL, &err);
- if (err != CL_SUCCESS || !s->ocl_ctx) {
- tdi_log(&s->logger, 0, "Could not create an OpenCL context\n");
- return;
- }
-
- s->ocl_queue = clCreateCommandQueue(s->ocl_ctx, ocl_device, 0, &err);
- if (err != CL_SUCCESS || !s->ocl_queue) {
- tdi_log(&s->logger, 0, "Could not create an OpenCL command queue: %d\n", err);
- goto fail;
- }
-
- err = clblasSetup();
- if (err != CL_SUCCESS) {
- tdi_log(&s->logger, 0, "Error setting up clBLAS\n");
- goto fail;
- }
-
- return;
-fail:
- if (s->ocl_queue)
- clReleaseCommandQueue(s->ocl_queue);
- s->ocl_queue = 0;
-
- if (s->ocl_ctx)
- clReleaseContext(s->ocl_ctx);
- s->ocl_ctx = 0;
-}
-#else
-{
-}
-#endif
-
static const enum BasisFamily basis_sets[NB_EQUATIONS][2] = {
{ BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN },
{ BASIS_FAMILY_SB_EVEN, BASIS_FAMILY_COS_EVEN },
@@ -157,8 +95,6 @@ static int teukolsky_init_check_options(TDContext *td)
if (ret < 0)
return ret;
- init_opencl(s);
-
s->logger.log = log_callback;
s->logger.opaque = td;
@@ -276,11 +212,6 @@ static int nlsolve_alloc(const TDContext *td, NLSolveContext **pnl)
memcpy(nl->basis, s->basis, sizeof(s->basis));
memcpy(nl->solve_order, s->basis_order, sizeof(s->basis_order));
-#if HAVE_OPENCL
- nl->ocl_ctx = s->ocl_ctx;
- nl->ocl_queue = s->ocl_queue;
-#endif
-
ret = tdi_nlsolve_context_init(nl);
if (ret < 0) {
tdi_log(&s->logger, 0, "Error initializing the non-linear solver\n");
@@ -491,13 +422,6 @@ void td_context_free(TDContext **ptd)
tp_free(&s->tp);
-#if HAVE_OPENCL
- if (s->ocl_queue)
- clReleaseCommandQueue(s->ocl_queue);
- if (s->ocl_ctx)
- clReleaseContext(s->ocl_ctx);
-#endif
-
for (int i = 0; i < ARRAY_ELEMS(s->basis); i++)
for (int j = 0; j < ARRAY_ELEMS(s->basis[i]); j++)
tdi_basis_free(&s->basis[i][j]);
@@ -669,11 +593,6 @@ static int lapse_solve_max(const TDContext *td)
nl->solve_order[0][0] = priv->basis_order[0][0];
nl->solve_order[0][1] = priv->basis_order[0][1];
-#if HAVE_OPENCL
- nl->ocl_ctx = priv->ocl_ctx;
- nl->ocl_queue = priv->ocl_queue;
-#endif
-
ret = tdi_nlsolve_context_init(nl);
if (ret < 0) {
tdi_log(&priv->logger, 0, "Error initializing the non-linear solver\n");
diff --git a/nlsolve.c b/nlsolve.c
index 6f89e8b..ee08ec7 100644
--- a/nlsolve.c
+++ b/nlsolve.c
@@ -29,11 +29,6 @@
#include <cblas.h>
-#if HAVE_OPENCL
-#include <cl.h>
-#include <clBLAS.h>
-#endif
-
#include <threadpool.h>
#include "basis.h"
@@ -486,11 +481,6 @@ int tdi_nlsolve_context_init(NLSolveContext *ctx)
s->ps_ctx->logger = ctx->logger;
s->ps_ctx->tp = s->tp;
-#if HAVE_OPENCL
- s->ps_ctx->ocl_ctx = ctx->ocl_ctx;
- s->ps_ctx->ocl_queue = ctx->ocl_queue;
-#endif
-
memcpy(s->ps_ctx->basis, ctx->basis, s->nb_vars * sizeof(*ctx->basis));
memcpy(s->ps_ctx->solve_order, ctx->solve_order, s->nb_vars * sizeof(*ctx->solve_order));
diff --git a/nlsolve.h b/nlsolve.h
index eaa2b64..d0e3c38 100644
--- a/nlsolve.h
+++ b/nlsolve.h
@@ -84,11 +84,6 @@ typedef struct NLSolveContext {
*/
double *(*colloc_grid)[2];
-#if HAVE_OPENCL
- cl_context ocl_ctx;
- cl_command_queue ocl_queue;
-#endif
-
// solver parameters
unsigned int maxiter;
double atol;
diff --git a/pssolve.c b/pssolve.c
index 36e09cf..5de63f6 100644
--- a/pssolve.c
+++ b/pssolve.c
@@ -414,7 +414,7 @@ int tdi_pssolve_context_init(PSSolveContext *ctx)
s->steps_since_inverse = INT_MAX;
/* init the BiCGStab solver */
- ret = tdi_bicgstab_context_alloc(&s->bicgstab, N, 64, ctx->ocl_ctx, ctx->ocl_queue);
+ ret = tdi_bicgstab_context_alloc(&s->bicgstab, N, 64);
if (ret < 0)
return ret;
diff --git a/pssolve.h b/pssolve.h
index 160e179..b350a1a 100644
--- a/pssolve.h
+++ b/pssolve.h
@@ -125,9 +125,6 @@ typedef struct PSSolveContext {
*/
TPContext *tp;
- cl_context ocl_ctx;
- cl_command_queue ocl_queue;
-
uint64_t lu_solves_count;
uint64_t lu_solves_time;