aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@aei.mpg.de>2004-12-08 22:25:00 +0000
committerErik Schnetter <schnetter@aei.mpg.de>2004-12-08 22:25:00 +0000
commit841ec5b3a4246d1a626debc96b3d6f37ea413992 (patch)
tree5f854b9fc28c284e3a66c10ac4648437bced3867
parent6781c02ef3265d592dc407c03c53552943979c4b (diff)
CarpetLib: Introduce parameter to post MPI_Irecv and MPI_Isend at the same time
Introduce parameter a parameter to post MPI_Irecv and MPI_Isend at the same time. Use two queues instead of one vector to store the MPI_Requests. darcs-hash:20041208222541-891bb-c7c8994a0c41b6cfb37f6dc023bc1172238f3619.gz
-rw-r--r--Carpet/CarpetLib/param.ccl4
-rw-r--r--Carpet/CarpetLib/src/data.cc23
-rw-r--r--Carpet/CarpetLib/src/data.hh3
-rw-r--r--Carpet/CarpetLib/src/gdata.cc144
-rw-r--r--Carpet/CarpetLib/src/gdata.hh10
5 files changed, 126 insertions, 58 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl
index 8ba0207d2..30b205a2d 100644
--- a/Carpet/CarpetLib/param.ccl
+++ b/Carpet/CarpetLib/param.ccl
@@ -33,3 +33,7 @@ INT max_mpi_tags "Maximum number of MPI tags to use"
BOOLEAN use_waitall "Use MPI_Waitall instead many MPI_Wait statements"
{
} "no"
+
+BOOLEAN combine_recv_send "Combine MPI_Irecv and MPI_Isend calls"
+{
+} "no"
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index e4993a900..6c4c677d9 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -224,28 +224,6 @@ T* data<T,D>::vectordata (const int vectorindex) const
// Processor management
template<class T, int D>
-void data<T,D>::change_processor (comm_state<D>& state,
- const int newproc,
- void* const mem)
-{
- switch (state.thestate) {
- case state_recv:
- change_processor_recv (state, newproc, mem);
- break;
- case state_send:
- change_processor_send (state, newproc, mem);
- break;
- case state_wait:
- change_processor_wait (state, newproc, mem);
- break;
- default:
- assert(0);
- }
-}
-
-
-
-template<class T, int D>
void data<T,D>::change_processor_recv (comm_state<D>& state,
const int newproc,
void* const mem)
@@ -354,6 +332,7 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
if (use_waitall) {
if (! state.requests.empty()) {
+ // wait for all requests at once
const double wtime1 = MPI_Wtime();
MPI_Waitall
(state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE);
diff --git a/Carpet/CarpetLib/src/data.hh b/Carpet/CarpetLib/src/data.hh
index 61953b30b..9445bf165 100644
--- a/Carpet/CarpetLib/src/data.hh
+++ b/Carpet/CarpetLib/src/data.hh
@@ -77,9 +77,6 @@ private:
public:
// Processor management
- virtual void change_processor (comm_state<D>& state,
- const int newproc,
- void* const mem=0);
private:
virtual void change_processor_recv (comm_state<D>& state,
const int newproc,
diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc
index bec027fce..e6881aa8f 100644
--- a/Carpet/CarpetLib/src/gdata.cc
+++ b/Carpet/CarpetLib/src/gdata.cc
@@ -25,18 +25,55 @@ using namespace std;
// Communication state control
template<int D>
comm_state<D>::comm_state ()
- : thestate(state_recv),
- current(0)
+ : thestate(state_recv)
{
}
template<int D>
void comm_state<D>::step ()
{
+ DECLARE_CCTK_PARAMETERS;
+
assert (thestate!=state_done);
- assert (current==tmps.size());
- thestate=astate(size_t(thestate)+1);
- current=0;
+ if (combine_recv_send) {
+ switch (thestate) {
+ case state_recv:
+ assert (tmps1.empty());
+ thestate = state_wait;
+ break;
+ case state_send:
+ assert (0);
+ case state_wait:
+ assert (tmps1.empty());
+ assert (tmps2.empty());
+ thestate = state_done;
+ break;
+ case state_done:
+ assert (0);
+ default:
+ assert (0);
+ }
+ } else {
+ switch (thestate) {
+ case state_recv:
+ assert (tmps2.empty());
+ thestate = state_send;
+ break;
+ case state_send:
+ assert (tmps1.empty());
+ thestate = state_wait;
+ break;
+ case state_wait:
+ assert (tmps1.empty());
+ assert (tmps2.empty());
+ thestate = state_done;
+ break;
+ case state_done:
+ assert (0);
+ default:
+ assert (0);
+ }
+ }
}
template<int D>
@@ -49,10 +86,8 @@ template<int D>
comm_state<D>::~comm_state ()
{
assert (thestate==state_recv || thestate==state_done);
- assert (current == 0);
- for (size_t n=0; n<tmps.size(); ++n) {
- assert (tmps.at(n) == NULL);
- }
+ assert (tmps1.empty());
+ assert (tmps2.empty());
assert (requests.empty());
}
@@ -100,17 +135,62 @@ gdata<D>::~gdata ()
+// Processor management
+template<int D>
+void gdata<D>::change_processor (comm_state<D>& state,
+ const int newproc,
+ void* const mem)
+{
+ DECLARE_CCTK_PARAMETERS;
+
+ switch (state.thestate) {
+ case state_recv:
+ if (combine_recv_send) {
+ change_processor_recv (state, newproc, mem);
+ change_processor_send (state, newproc, mem);
+ } else {
+ change_processor_recv (state, newproc, mem);
+ }
+ break;
+ case state_send:
+ if (combine_recv_send) {
+ // do nothing
+ } else {
+ change_processor_send (state, newproc, mem);
+ }
+ break;
+ case state_wait:
+ change_processor_wait (state, newproc, mem);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+
// Data manipulators
template<int D>
void gdata<D>::copy_from (comm_state<D>& state,
const gdata* src, const ibbox& box)
{
+ DECLARE_CCTK_PARAMETERS;
+
switch (state.thestate) {
case state_recv:
- copy_from_recv (state, src, box);
+ if (combine_recv_send) {
+ copy_from_recv (state, src, box);
+ copy_from_send (state, src, box);
+ } else {
+ copy_from_recv (state, src, box);
+ }
break;
case state_send:
- copy_from_send (state, src, box);
+ if (combine_recv_send) {
+ // do nothing
+ } else {
+ copy_from_send (state, src, box);
+ }
break;
case state_wait:
copy_from_wait (state, src, box);
@@ -171,9 +251,7 @@ void gdata<D>::copy_from_recv (comm_state<D>& state,
// copy to different processor
gdata<D>* const tmp = make_typed(varindex, transport_operator);
- // TODO: is this efficient?
- state.tmps.push_back (tmp);
- ++state.current;
+ state.tmps1.push (tmp);
tmp->allocate (box, src->proc());
tmp->change_processor_recv (state, proc());
@@ -206,8 +284,9 @@ void gdata<D>::copy_from_send (comm_state<D>& state,
} else {
// copy to different processor
- gdata<D>* const tmp = state.tmps.at(state.current);
- ++state.current;
+ gdata<D>* const tmp = state.tmps1.front();
+ state.tmps1.pop();
+ state.tmps2.push (tmp);
assert (tmp);
tmp->copy_from_nocomm (src, box);
tmp->change_processor_send (state, proc());
@@ -239,13 +318,12 @@ void gdata<D>::copy_from_wait (comm_state<D>& state,
} else {
// copy to different processor
- gdata<D>* const tmp = state.tmps.at(state.current);
+ gdata<D>* const tmp = state.tmps2.front();
+ state.tmps2.pop();
assert (tmp);
tmp->change_processor_wait (state, proc());
copy_from_nocomm (tmp, box);
delete tmp;
- state.tmps.at(state.current) = NULL;
- ++state.current;
}
}
@@ -261,14 +339,25 @@ void gdata<D>
const int order_space,
const int order_time)
{
+ DECLARE_CCTK_PARAMETERS;
+
assert (transport_operator != op_error);
if (transport_operator == op_none) return;
switch (state.thestate) {
case state_recv:
- interpolate_from_recv (state, srcs, times, box, time, order_space, order_time);
+ if (combine_recv_send) {
+ interpolate_from_recv (state, srcs, times, box, time, order_space, order_time);
+ interpolate_from_send (state, srcs, times, box, time, order_space, order_time);
+ } else {
+ interpolate_from_recv (state, srcs, times, box, time, order_space, order_time);
+ }
break;
case state_send:
- interpolate_from_send (state, srcs, times, box, time, order_space, order_time);
+ if (combine_recv_send) {
+ // do nothing
+ } else {
+ interpolate_from_send (state, srcs, times, box, time, order_space, order_time);
+ }
break;
case state_wait:
interpolate_from_wait (state, srcs, times, box, time, order_space, order_time);
@@ -348,9 +437,7 @@ void gdata<D>
// interpolate from other processor
gdata<D>* const tmp = make_typed(varindex, transport_operator);
- // TODO: is this efficient?
- state.tmps.push_back (tmp);
- ++state.current;
+ state.tmps1.push (tmp);
tmp->allocate (box, srcs.at(0)->proc());
tmp->change_processor_recv (state, proc());
@@ -391,7 +478,9 @@ void gdata<D>
} else {
// interpolate from other processor
- gdata<D>* const tmp = state.tmps.at(state.current++);
+ gdata<D>* const tmp = state.tmps1.front();
+ state.tmps1.pop();
+ state.tmps2.push (tmp);
assert (tmp);
tmp->interpolate_from_nocomm (srcs, times, box, time, order_space, order_time);
tmp->change_processor_send (state, proc());
@@ -431,13 +520,12 @@ void gdata<D>
} else {
// interpolate from other processor
- gdata<D>* const tmp = state.tmps.at(state.current);
+ gdata<D>* const tmp = state.tmps2.front();
+ state.tmps2.pop();
assert (tmp);
tmp->change_processor_wait (state, proc());
copy_from_nocomm (tmp, box);
delete tmp;
- state.tmps.at(state.current) = NULL;
- ++state.current;
}
}
diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh
index 9099dcc6a..12ff7b4cc 100644
--- a/Carpet/CarpetLib/src/gdata.hh
+++ b/Carpet/CarpetLib/src/gdata.hh
@@ -6,6 +6,7 @@
#include <assert.h>
#include <stdlib.h>
+#include <queue>
#include <iostream>
#include <string>
#include <vector>
@@ -44,9 +45,8 @@ private:
comm_state& operator= (comm_state const &);
public:
- vector<gdata<D>*> tmps;
+ queue<gdata<D>*> tmps1, tmps2;
vector<MPI_Request> requests; // for use_waitall
- size_t current;
};
@@ -100,9 +100,9 @@ public:
const operator_type transport_operator = op_error) const = 0;
// Processor management
- virtual void change_processor (comm_state<D>& state,
- const int newproc,
- void* const mem=0) = 0;
+ void change_processor (comm_state<D>& state,
+ const int newproc,
+ void* const mem=0);
protected:
virtual void change_processor_recv (comm_state<D>& state,
const int newproc,