CarpetLib: Move some OpenMP parallelisation into the operators

author: Erik Schnetter <schnetter@cct.lsu.edu> 2012-02-24 15:47:08 -0500
committer: Erik Schnetter <schnetter@cct.lsu.edu> 2012-02-24 15:47:08 -0500
commit: 34181a5c6c82ece159f7466fea9e61e083b87f8c (patch)
tree: 88370f24f0ecd94a9506853c0cd504dc50d079ba
parent: 3bd376a48fe9f635d336f2fc669da3fcb6288906 (diff)
8 files changed, 79 insertions, 75 deletions
diff --git a/Carpet/CarpetLib/src/copy_3d.cc b/Carpet/CarpetLib/src/copy_3d.cc
index 81544a265..5e2eba236 100644
--- a/Carpet/CarpetLib/src/copy_3d.cc
+++ b/Carpet/CarpetLib/src/copy_3d.cc
@@ -133,19 +133,14 @@ namespace CarpetLib {
     
     
     // Loop over region
+#pragma omp parallel for collapse(3)
     for (int k=0; k<regkext; ++k) {
       for (int j=0; j<regjext; ++j) {
-#if 1
         for (int i=0; i<regiext; ++i) {
           
           dst [DSTIND3(i, j, k)] = src [SRCIND3(i, j, k)];
           
         }
-#else
-        memcpy (& dst [DSTIND3(0, j, k)],
-                & src [SRCIND3(0, j, k)],
-                regiext * sizeof(T));
-#endif
       }
     }
     
diff --git a/Carpet/CarpetLib/src/copy_4d.cc b/Carpet/CarpetLib/src/copy_4d.cc
index 232ab0914..b0012e196 100644
--- a/Carpet/CarpetLib/src/copy_4d.cc
+++ b/Carpet/CarpetLib/src/copy_4d.cc
@@ -112,6 +112,7 @@ namespace CarpetLib {
     
     
     // Loop over region
+#pragma omp parallel for collapse(4)
     for (int l=0; l<reglext; ++l) {
       for (int k=0; k<regkext; ++k) {
         for (int j=0; j<regjext; ++j) {
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index 1ee6c94e7..633b8bf5d 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -543,21 +543,21 @@ copy_from_innerloop (gdata const * const gsrc,
   ibbox const& dstbox = this->extent();
   
 #if CARPET_DIM == 3
-  call_operator<T> (& copy_3d,
-                    static_cast <T const *> (src->storage()),
-                    src->shape(),
-                    static_cast <T *> (this->storage()),
-                    this->shape(),
-                    srcbox, dstbox,
-                    srcregbox, dstregbox, (void*)slabinfo);
+  // Don't use call_operator, because we parallelise ourselves
+  copy_3d(static_cast <T const *> (src->storage()),
+          src->shape(),
+          static_cast <T *> (this->storage()),
+          this->shape(),
+          srcbox, dstbox,
+          srcregbox, dstregbox, (void*)slabinfo);
 #elif CARPET_DIM == 4
-  call_operator<T> (& copy_4d,
-                    static_cast <T const *> (src->storage()),
-                    src->shape(),
-                    static_cast <T *> (this->storage()),
-                    this->shape(),
-                    srcbox, dstbox,
-                    srcregbox, dstregbox, (void*)slabinfo);
+  // Don't use call_operator, because we parallelise ourselves
+  copy_4d(static_cast <T const *> (src->storage()),
+          src->shape(),
+          static_cast <T *> (this->storage()),
+          this->shape(),
+          srcbox, dstbox,
+          srcregbox, dstregbox, (void*)slabinfo);
 #else
 #  error "Value for CARPET_DIM not supported"
 #endif
@@ -776,14 +776,14 @@ transfer_prolongate (data const * const src,
     }
     case cell_centered: {
       if (use_dgfe) {
-        call_operator<T>(prolongate_3d_dgfe_rf2<T,5>,
-                         static_cast<T const *>(src->storage()),
-                         src->shape(),
-                         static_cast<T *>(this->storage()),
-                         this->shape(),
-                         src->extent(),
-                         this->extent(),
-                         srcbox, dstbox, NULL);
+        // Don't use call_operator, because we parallelise ourselves
+        prolongate_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()),
+                                    src->shape(),
+                                    static_cast<T *>(this->storage()),
+                                    this->shape(),
+                                    src->extent(),
+                                    this->extent(),
+                                    srcbox, dstbox, NULL);
         break;
       }
       static
@@ -1135,51 +1135,51 @@ transfer_restrict (data const * const src,
       
       if (all(is_centered == ivect(1,1,1))) {
         if (use_dgfe) {
-          call_operator<T>(restrict_3d_dgfe_rf2<T,5>,
-                           static_cast<T const *>(src->storage()),
+          // Don't use call_operator, because we parallelise ourselves
+          restrict_3d_dgfe_rf2<T,5>(static_cast<T const *>(src->storage()),
+                                    src->shape(),
+                                    static_cast<T *>(this->storage()),
+                                    this->shape(),
+                                    srcbox,
+                                    dstbox,
+                                    srcregbox, dstregbox, NULL);
+          break;
+        }
+        // Don't use call_operator, because we parallelise ourselves
+        restrict_3d_cc_rf2(static_cast <T const *> (src->storage()),
                            src->shape(),
-                           static_cast<T *>(this->storage()),
+                           static_cast <T *> (this->storage()),
                            this->shape(),
                            srcbox,
                            dstbox,
                            srcregbox, dstregbox, NULL);
-          break;
-        }
-        call_operator<T> (& restrict_3d_cc_rf2,
-                          static_cast <T const *> (src->storage()),
-                          src->shape(),
-                          static_cast <T *> (this->storage()),
-                          this->shape(),
-                          srcbox,
-                          dstbox,
-                          srcregbox, dstregbox, NULL);
       } else if (all(is_centered == ivect(0,1,1))) {
-        call_operator<T> (& restrict_3d_vc_rf2<T,0,1,1>,
-                          static_cast <T const *> (src->storage()),
-                          src->shape(),
-                          static_cast <T *> (this->storage()),
-                          this->shape(),
-                          srcbox,
-                          dstbox,
-                          srcregbox, dstregbox, NULL);
+        // Don't use call_operator, because we parallelise ourselves
+        restrict_3d_vc_rf2<T,0,1,1>(static_cast <T const *> (src->storage()),
+                                    src->shape(),
+                                    static_cast <T *> (this->storage()),
+                                    this->shape(),
+                                    srcbox,
+                                    dstbox,
+                                    srcregbox, dstregbox, NULL);
       } else if (all(is_centered == ivect(1,0,1))) {
-        call_operator<T> (& restrict_3d_vc_rf2<T,1,0,1>,
-                          static_cast <T const *> (src->storage()),
-                          src->shape(),
-                          static_cast <T *> (this->storage()),
-                          this->shape(),
-                          srcbox,
-                          dstbox,
-                          srcregbox, dstregbox, NULL);
+        // Don't use call_operator, because we parallelise ourselves
+        restrict_3d_vc_rf2<T,1,0,1>(static_cast <T const *> (src->storage()),
+                                    src->shape(),
+                                    static_cast <T *> (this->storage()),
+                                    this->shape(),
+                                    srcbox,
+                                    dstbox,
+                                    srcregbox, dstregbox, NULL);
       } else if (all(is_centered == ivect(1,1,0))) {
-        call_operator<T> (& restrict_3d_vc_rf2<T,1,1,0>,
-                          static_cast <T const *> (src->storage()),
-                          src->shape(),
-                          static_cast <T *> (this->storage()),
-                          this->shape(),
-                          srcbox,
-                          dstbox,
-                          srcregbox, dstregbox, NULL);
+        // Don't use call_operator, because we parallelise ourselves
+        restrict_3d_vc_rf2<T,1,1,0>(static_cast <T const *> (src->storage()),
+                                    src->shape(),
+                                    static_cast <T *> (this->storage()),
+                                    this->shape(),
+                                    srcbox,
+                                    dstbox,
+                                    srcregbox, dstregbox, NULL);
       } else {
         assert (0);
       }
@@ -1203,14 +1203,14 @@ transfer_restrict (data const * const src,
     // enum centering { vertex_centered, cell_centered };
     switch (cent) {
     case vertex_centered:
-      call_operator<T> (& restrict_4d_rf2,
-                        static_cast <T const *> (src->storage()),
-                        src->shape(),
-                        static_cast <T *> (this->storage()),
-                        this->shape(),
-                        src->extent(),
-                        this->extent(),
-                        srcregbox, dstregbox, NULL);
+      // Don't use call_operator, because we parallelise ourselves
+      restrict_4d_rf2(static_cast <T const *> (src->storage()),
+                      src->shape(),
+                      static_cast <T *> (this->storage()),
+                      this->shape(),
+                      src->extent(),
+                      this->extent(),
+                      srcregbox, dstregbox, NULL);
       break;
     default:
       assert (0);
diff --git a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc
index 87a219007..b6bd0c5d2 100644
--- a/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc
+++ b/Carpet/CarpetLib/src/prolongate_3d_dgfe_rf2.cc
@@ -138,9 +138,10 @@ namespace CarpetLib {
       int const dststr2d[2] = {dstdj, dstdk};
       
       // Loop over fine region
+      ptrdiff_t const i=0;
+#pragma omp parallel for collapse(2)
       for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) {
         for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) {
-          ptrdiff_t const i=0;
 #ifdef HRSCC_HH
           GLLElement<ORDER>::prolongate_2D
             (&src[SRCIND3(srcioff+i, srcjoff+j, srckoff+k)], srcstr2d,
@@ -176,8 +177,9 @@ namespace CarpetLib {
       int const dststr2d[2]= {dstdi, dstdk};
       
       // Loop over fine region
+      ptrdiff_t const j=0;
+#pragma omp parallel for collapse(2)
       for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) {
-        ptrdiff_t const j=0;
         for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) {
 #ifdef HRSCC_HH
           GLLElement<ORDER>::prolongate_2D
@@ -215,6 +217,7 @@ namespace CarpetLib {
       
       // Loop over fine region
       ptrdiff_t const k=0;
+#pragma omp parallel for collapse(2)
       for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) {
         for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) {
 #ifdef HRSCC_HH
@@ -250,6 +253,7 @@ namespace CarpetLib {
       int const dststr[3] = {dstdi, dstdj, dstdk};
       
       // Loop over fine region
+#pragma omp parallel for collapse(3)
       for (ptrdiff_t k=0; k<regkext; k+=2*(ORDER+1)) {
         for (ptrdiff_t j=0; j<regjext; j+=2*(ORDER+1)) {
           for (ptrdiff_t i=0; i<regiext; i+=2*(ORDER+1)) {
diff --git a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
index 767476db3..c9c4bfc2d 100644
--- a/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_cc_rf2.cc
@@ -119,6 +119,7 @@ namespace CarpetLib {
     
     
     // Loop over coarse region
+#pragma omp parallel for collapse(3)
     for (int k=0; k<regkext; ++k) {
       for (int j=0; j<regjext; ++j) {
         for (int i=0; i<regiext; ++i) {
diff --git a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc
index 42a087692..cab088347 100644
--- a/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_dgfe_rf2.cc
@@ -123,6 +123,7 @@ namespace CarpetLib {
     assert(all(regext % (ORDER+1) == 0));
     
     // Loop over coarse region
+#pragma omp parallel for collapse(3)
     for (ptrdiff_t k=0; k<regkext; k+=ORDER+1) {
       for (ptrdiff_t j=0; j<regjext; j+=ORDER+1) {
         for (ptrdiff_t i=0; i<regiext; i+=ORDER+1) {
diff --git a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
index acf27fb95..f750ca149 100644
--- a/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_3d_vc_rf2.cc
@@ -229,6 +229,7 @@ namespace CarpetLib {
     
     
     // Loop over coarse region
+#pragma omp parallel for collapse(3)
     for (int k=0; k<regkext; ++k) {
       for (int j=0; j<regjext; ++j) {
         for (int i=0; i<regiext; ++i) {
diff --git a/Carpet/CarpetLib/src/restrict_4d_rf2.cc b/Carpet/CarpetLib/src/restrict_4d_rf2.cc
index 10d84b890..1a89171d7 100644
--- a/Carpet/CarpetLib/src/restrict_4d_rf2.cc
+++ b/Carpet/CarpetLib/src/restrict_4d_rf2.cc
@@ -111,6 +111,7 @@ namespace CarpetLib {
     
     
     // Loop over coarse region
+#pragma omp parallel for collapse(4)
     for (int l=0; l<reglext; ++l) {
       for (int k=0; k<regkext; ++k) {
         for (int j=0; j<regjext; ++j) {
author	Erik Schnetter <schnetter@cct.lsu.edu>	2012-02-24 15:47:08 -0500
committer	Erik Schnetter <schnetter@cct.lsu.edu>	2012-02-24 15:47:08 -0500
commit	34181a5c6c82ece159f7466fea9e61e083b87f8c (patch)
tree	88370f24f0ecd94a9506853c0cd504dc50d079ba
parent	3bd376a48fe9f635d336f2fc669da3fcb6288906 (diff)