aboutsummaryrefslogtreecommitdiff
path: root/Carpet
diff options
context:
space:
mode:
Diffstat (limited to 'Carpet')
-rw-r--r--Carpet/CarpetLib/param.ccl4
-rw-r--r--Carpet/CarpetLib/schedule.ccl6
-rw-r--r--Carpet/CarpetLib/src/data.cc53
-rw-r--r--Carpet/CarpetLib/src/gdata.cc133
-rw-r--r--Carpet/CarpetLib/src/gdata.hh46
5 files changed, 219 insertions, 23 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl
index 30b205a2d..cce1c3e21 100644
--- a/Carpet/CarpetLib/param.ccl
+++ b/Carpet/CarpetLib/param.ccl
@@ -19,6 +19,10 @@ BOOLEAN output_bboxes "Output bounding box information to the screen" STEERABLE=
{
} "no"
+BOOLEAN print_timestats "Print timing statistics at every iteration" STEERABLE=always
+{
+} "no"
+
BOOLEAN save_memory_during_regridding "Save some memory during regridding at the expense of speed"
{
} "no"
diff --git a/Carpet/CarpetLib/schedule.ccl b/Carpet/CarpetLib/schedule.ccl
index 7c7935593..e102e6f36 100644
--- a/Carpet/CarpetLib/schedule.ccl
+++ b/Carpet/CarpetLib/schedule.ccl
@@ -1,2 +1,8 @@
# Schedule definitions for thorn CarpetLib
# $Header:$
+
+SCHEDULE CarpetLib_printtimestats AT analysis
+{
+ LANG: C
+ OPTIONS: global
+} "Print timing statistics if desired"
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index f1b4672dd..67a11fd3f 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -27,7 +27,10 @@ using namespace std;
-static size_t total_allocated_bytes; // total number of allocated bytes
+// Total number of currently allocated bytes and objects
+static size_t total_allocated_bytes = 0;
+static size_t total_allocated_objects = 0;
+
static const CCTK_REAL eps = 1.0e-10;
@@ -128,12 +131,15 @@ void data<T,D>::getmem (const size_t nelems)
} catch (...) {
T Tdummy;
CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING,
- "Failed to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated.",
- (double)nbytes, nbytes/1.0e6,
+ "Failed to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated in %d objects.",
+ double(nbytes), double(nbytes/1.0e6),
typestring(Tdummy),
- (double)total_allocated_bytes, total_allocated_bytes/1.0e6);
+ double(total_allocated_bytes),
+ double(total_allocated_bytes/1.0e6),
+ int(total_allocated_objects));
}
total_allocated_bytes += nbytes;
+ ++ total_allocated_objects;
}
@@ -142,7 +148,10 @@ template<class T, int D>
void data<T,D>::freemem ()
{
delete [] _storage;
+ assert (total_allocated_bytes > this->_allocated_bytes);
+ assert (total_allocated_objects > 0);
total_allocated_bytes -= this->_allocated_bytes;
+ -- total_allocated_objects;
this->_allocated_bytes = 0;
}
@@ -237,6 +246,8 @@ void data<T,D>::change_processor_recv (comm_state<D>& state,
return;
}
+ wtime_changeproc_recv.start();
+
if (this->_has_storage) {
if (this->this_processor_is ( newproc)) {
// copy from other processor
@@ -249,12 +260,12 @@ void data<T,D>::change_processor_recv (comm_state<D>& state,
_storage = (T*)mem;
}
+ wtime_irecv.start();
const double wtime1 = MPI_Wtime();
T dummy;
MPI_Irecv (_storage, this->_size, dist::datatype(dummy), this->proc(),
this->tag, dist::comm, &this->request);
- const double wtime2 = MPI_Wtime();
- this->wtime_irecv += wtime2 - wtime1;
+ wtime_irecv.stop();
if (use_waitall) {
state.requests.push_back (this->request);
}
@@ -267,6 +278,8 @@ void data<T,D>::change_processor_recv (comm_state<D>& state,
assert (!_storage);
}
}
+
+ wtime_changeproc_recv.stop();
}
@@ -285,6 +298,8 @@ void data<T,D>::change_processor_send (comm_state<D>& state,
return;
}
+ wtime_changeproc_send.start();
+
if (this->_has_storage) {
if (this->this_processor_is ( newproc)) {
// copy from other processor
@@ -295,12 +310,11 @@ void data<T,D>::change_processor_send (comm_state<D>& state,
assert (!mem);
assert (_storage);
- const double wtime1 = MPI_Wtime();
+ wtime_isend.start();
T dummy;
MPI_Isend (_storage, this->_size, dist::datatype(dummy), newproc,
this->tag, dist::comm, &this->request);
- const double wtime2 = MPI_Wtime();
- this->wtime_isend += wtime2 - wtime1;
+ wtime_isend.stop();
if (use_waitall) {
state.requests.push_back (this->request);
}
@@ -310,6 +324,8 @@ void data<T,D>::change_processor_send (comm_state<D>& state,
assert (!_storage);
}
}
+
+ wtime_changeproc_send.stop();
}
@@ -328,15 +344,16 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
assert (!mem);
return;
}
+
+ wtime_changeproc_wait.start();
if (use_waitall) {
if (! state.requests.empty()) {
// wait for all requests at once
- const double wtime1 = MPI_Wtime();
+ wtime_irecvwait.start();
MPI_Waitall
(state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE);
- const double wtime2 = MPI_Wtime();
- this->wtime_irecvwait += wtime2 - wtime1;
+ wtime_irecvwait.stop();
state.requests.clear();
}
}
@@ -346,11 +363,10 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
// copy from other processor
if (! use_waitall) {
- const double wtime1 = MPI_Wtime();
+ wtime_irecvwait.start();
MPI_Status status;
MPI_Wait (&this->request, &status);
- const double wtime2 = MPI_Wtime();
- this->wtime_irecvwait += wtime2 - wtime1;
+ wtime_irecvwait.stop();
}
} else if (this->lives_on_this_processor()) {
@@ -360,11 +376,10 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
assert (_storage);
if (! use_waitall) {
- const double wtime1 = MPI_Wtime();
+ wtime_isendwait.start();
MPI_Status status;
MPI_Wait (&this->request, &status);
- const double wtime2 = MPI_Wtime();
- this->wtime_isendwait += wtime2 - wtime1;
+ wtime_isendwait.stop();
}
if (this->_owns_storage) {
@@ -379,6 +394,8 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
}
this->_proc = newproc;
+
+ wtime_changeproc_wait.stop();
}
diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc
index f04267508..dacb24115 100644
--- a/Carpet/CarpetLib/src/gdata.cc
+++ b/Carpet/CarpetLib/src/gdata.cc
@@ -1,10 +1,15 @@
// $Header:$
+#include <algorithm>
#include <cassert>
+#include <cmath>
#include <cstdlib>
#include <iostream>
+#include <mpi.h>
+
#include "cctk.h"
+#include "cctk_Arguments.h"
#include "cctk_Parameters.h"
#include "util_ErrorCodes.h"
@@ -106,12 +111,106 @@ static int nexttag ()
+timestat::timestat () : wtime(0.0), wtime2(0.0), count(0.0), running(false)
+{
+}
+
+void timestat::addstat (double const t)
+{
+ wtime += t;
+ wtime2 += t*t;
+ ++count;
+}
+
+void timestat::start ()
+{
+ assert (! running);
+ running = true;
+ starttime = MPI_Wtime();
+}
+
+void timestat::stop ()
+{
+ assert (running);
+ running = false;
+ double const endtime = MPI_Wtime();
+ addstat (endtime - starttime);
+}
+
+ostream& operator<< (ostream& os, const timestat& wt)
+{
+ double const avg = wt.wtime / wt.count;
+ double const stddev = sqrt(max(0.0, wt.wtime2 / wt.count - avg * avg));
+ os << "timestat[seconds]:"
+ << " cnt: " << wt.count
+ << " sum: " << wt.wtime
+ << " avg: " << avg
+ << " stddev: " << stddev;
+ return os;
+}
+
+timestat wtime_copyfrom_recv;
+timestat wtime_copyfrom_send;
+timestat wtime_copyfrom_wait;
+
+timestat wtime_copyfrom_recv_maketyped;
+timestat wtime_copyfrom_recv_allocate;
+timestat wtime_copyfrom_recv_changeproc_recv;
+timestat wtime_copyfrom_send_copyfrom_nocomm1;
+timestat wtime_copyfrom_send_copyfrom_nocomm2;
+timestat wtime_copyfrom_send_changeproc_send;
+timestat wtime_copyfrom_wait_changeproc_wait;
+timestat wtime_copyfrom_wait_copyfrom_nocomm;
+timestat wtime_copyfrom_wait_delete;
+
+timestat wtime_changeproc_recv;
+timestat wtime_changeproc_send;
+timestat wtime_changeproc_wait;
+
+timestat wtime_irecv;
+timestat wtime_isend;
+timestat wtime_isendwait;
+timestat wtime_irecvwait;
+
+extern "C" void CarpetLib_printtimestats (CCTK_ARGUMENTS);
+void CarpetLib_printtimestats (CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+ if (print_timestats) {
+ cout << "Timing statistics from CarpetLib:" << endl
+ << " wtime_copyfrom_recv: " << wtime_copyfrom_recv << endl
+ << " wtime_copyfrom_send: " << wtime_copyfrom_send << endl
+ << " wtime_copyfrom_wait: " << wtime_copyfrom_wait << endl
+ << endl
+ << " wtime_copyfrom_recv_maketyped: " << wtime_copyfrom_recv_maketyped << endl
+ << " wtime_copyfrom_recv_allocate: " << wtime_copyfrom_recv_allocate << endl
+ << " wtime_copyfrom_recv_changeproc_recv: " << wtime_copyfrom_recv_changeproc_recv << endl
+ << " wtime_copyfrom_send_copyfrom_nocomm1: " << wtime_copyfrom_send_copyfrom_nocomm1 << endl
+ << " wtime_copyfrom_send_copyfrom_nocomm2: " << wtime_copyfrom_send_copyfrom_nocomm2 << endl
+ << " wtime_copyfrom_send_changeproc_send: " << wtime_copyfrom_send_changeproc_send << endl
+ << " wtime_copyfrom_wait_changeproc_wait: " << wtime_copyfrom_wait_changeproc_wait << endl
+ << " wtime_copyfrom_wait_copyfrom_nocomm2: " << wtime_copyfrom_wait_copyfrom_nocomm << endl
+ << " wtime_copyfrom_wait_delete: " << wtime_copyfrom_wait_delete << endl
+ << endl
+ << " wtime_changeproc_recv: " << wtime_changeproc_recv << endl
+ << " wtime_changeproc_send: " << wtime_changeproc_send << endl
+ << " wtime_changeproc_wait: " << wtime_changeproc_wait << endl
+ << endl
+ << " wtime_irecv: " << wtime_irecv << endl
+ << " wtime_isend: " << wtime_isend << endl
+ << " wtime_isendwait: " << wtime_isendwait << endl
+ << " wtime_irecvwait: " << wtime_irecvwait << endl
+ << endl;
+ }
+}
+
+
+
// Constructors
template<int D>
gdata<D>::gdata (const int varindex_, const operator_type transport_operator_)
: varindex(varindex_), transport_operator(transport_operator_),
- wtime_isend(0.0), wtime_isendwait(0.0),
- wtime_irecv(0.0), wtime_irecvwait(0.0),
_has_storage(false),
comm_active(false),
tag(nexttag())
@@ -243,18 +342,28 @@ void gdata<D>::copy_from_recv (comm_state<D>& state,
if (box.empty()) return;
+ wtime_copyfrom_recv.start();
+
if (proc() == src->proc()) {
// copy on same processor
} else {
// copy to different processor
+ wtime_copyfrom_recv_maketyped.start();
gdata<D>* const tmp = make_typed(varindex, transport_operator);
+ wtime_copyfrom_recv_maketyped.stop();
state.tmps1.push (tmp);
+ wtime_copyfrom_recv_allocate.start();
tmp->allocate (box, src->proc());
+ wtime_copyfrom_recv_allocate.stop();
+ wtime_copyfrom_recv_changeproc_recv.start();
tmp->change_processor_recv (state, proc());
+ wtime_copyfrom_recv_changeproc_recv.stop();
}
+
+ wtime_copyfrom_recv.stop();
}
@@ -275,10 +384,14 @@ void gdata<D>::copy_from_send (comm_state<D>& state,
if (box.empty()) return;
+ wtime_copyfrom_send.start();
+
if (proc() == src->proc()) {
// copy on same processor
+ wtime_copyfrom_send_copyfrom_nocomm1.start();
copy_from_nocomm (src, box);
+ wtime_copyfrom_send_copyfrom_nocomm1.stop();
} else {
@@ -287,10 +400,16 @@ void gdata<D>::copy_from_send (comm_state<D>& state,
state.tmps1.pop();
state.tmps2.push (tmp);
assert (tmp);
+ wtime_copyfrom_send_copyfrom_nocomm2.start();
tmp->copy_from_nocomm (src, box);
+ wtime_copyfrom_send_copyfrom_nocomm2.stop();
+ wtime_copyfrom_send_changeproc_send.start();
tmp->change_processor_send (state, proc());
+ wtime_copyfrom_send_changeproc_send.stop();
}
+
+ wtime_copyfrom_send.stop();
}
@@ -311,6 +430,8 @@ void gdata<D>::copy_from_wait (comm_state<D>& state,
if (box.empty()) return;
+ wtime_copyfrom_wait.start();
+
if (proc() == src->proc()) {
// copy on same processor
@@ -320,11 +441,19 @@ void gdata<D>::copy_from_wait (comm_state<D>& state,
gdata<D>* const tmp = state.tmps2.front();
state.tmps2.pop();
assert (tmp);
+ wtime_copyfrom_wait_changeproc_wait.start();
tmp->change_processor_wait (state, proc());
+ wtime_copyfrom_wait_changeproc_wait.stop();
+ wtime_copyfrom_wait_copyfrom_nocomm.start();
copy_from_nocomm (tmp, box);
+ wtime_copyfrom_wait_copyfrom_nocomm.stop();
+ wtime_copyfrom_wait_delete.start();
delete tmp;
+ wtime_copyfrom_wait_delete.stop();
}
+
+ wtime_copyfrom_wait.stop();
}
diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh
index fc65004e6..de4d512c3 100644
--- a/Carpet/CarpetLib/src/gdata.hh
+++ b/Carpet/CarpetLib/src/gdata.hh
@@ -50,6 +50,49 @@ public:
+// Time (in seconds) spend during various operations
+struct timestat {
+ double wtime;
+ double wtime2;
+ double count;
+ timestat ();
+private:
+ bool running;
+ double starttime;
+public:
+ void start();
+ void stop();
+private:
+ void addstat (double const t);
+};
+
+ostream& operator<< (ostream& os, const timestat& wt);
+
+extern timestat wtime_copyfrom_recv;
+extern timestat wtime_copyfrom_send;
+extern timestat wtime_copyfrom_wait;
+
+extern timestat wtime_copyfrom_recv_maketyped;
+extern timestat wtime_copyfrom_recv_allocate;
+extern timestat wtime_copyfrom_recv_changeproc_recv;
+extern timestat wtime_copyfrom_send_copyfrom_nocomm1;
+extern timestat wtime_copyfrom_send_copyfrom_nocomm2;
+extern timestat wtime_copyfrom_send_changeproc_send;
+extern timestat wtime_copyfrom_wait_changeproc_wait;
+extern timestat wtime_copyfrom_wait_copyfrom_nocomm;
+extern timestat wtime_copyfrom_wait_delete;
+
+extern timestat wtime_changeproc_recv;
+extern timestat wtime_changeproc_send;
+extern timestat wtime_changeproc_wait;
+
+extern timestat wtime_irecv;
+extern timestat wtime_isend;
+extern timestat wtime_irecvwait;
+extern timestat wtime_isendwait;
+
+
+
// A generic data storage without type information
template<int D>
class gdata {
@@ -64,9 +107,6 @@ protected: // should be readonly
const int varindex; // Cactus variable index, or -1
operator_type transport_operator;
- double wtime_isend, wtime_isendwait;
- double wtime_irecv, wtime_irecvwait;
-
bool _has_storage; // has storage associated (on some processor)
bool _owns_storage; // owns the storage
// (only valid if there is storage on this processor; it means that