aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@aei.mpg.de>2004-12-30 21:21:00 +0000
committerErik Schnetter <schnetter@aei.mpg.de>2004-12-30 21:21:00 +0000
commitf527714bcb9e2a2c672cee14c7e612a177475309 (patch)
treea061092722eab7d8fd46c7d71c673803a6ba3005
parent191478e48a3c8236e023d4c302145d6b443fc8f2 (diff)
CarpetLib: Collect more timing statistics
Collect more timing statistics in the data class. Print these statistics to stdout when the Cactus parameter print_timestats is set. Create a timer class "timestat". This is a timer that can be started, stopped, and it prints the total time as well as some statistics. For memory allocation statistics, count the number of objects as well as the number of bytes. darcs-hash:20041230212136-891bb-c14edfa7d539ae9b135eee76afadaad51fd0b098.gz
-rw-r--r--Carpet/CarpetLib/param.ccl4
-rw-r--r--Carpet/CarpetLib/schedule.ccl6
-rw-r--r--Carpet/CarpetLib/src/data.cc53
-rw-r--r--Carpet/CarpetLib/src/gdata.cc133
-rw-r--r--Carpet/CarpetLib/src/gdata.hh46
5 files changed, 219 insertions, 23 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl
index 30b205a2d..cce1c3e21 100644
--- a/Carpet/CarpetLib/param.ccl
+++ b/Carpet/CarpetLib/param.ccl
@@ -19,6 +19,10 @@ BOOLEAN output_bboxes "Output bounding box information to the screen" STEERABLE=
{
} "no"
+BOOLEAN print_timestats "Print timing statistics at every iteration" STEERABLE=always
+{
+} "no"
+
BOOLEAN save_memory_during_regridding "Save some memory during regridding at the expense of speed"
{
} "no"
diff --git a/Carpet/CarpetLib/schedule.ccl b/Carpet/CarpetLib/schedule.ccl
index 7c7935593..e102e6f36 100644
--- a/Carpet/CarpetLib/schedule.ccl
+++ b/Carpet/CarpetLib/schedule.ccl
@@ -1,2 +1,8 @@
# Schedule definitions for thorn CarpetLib
# $Header:$
+
+SCHEDULE CarpetLib_printtimestats AT analysis
+{
+ LANG: C
+ OPTIONS: global
+} "Print timing statistics if desired"
diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc
index f1b4672dd..67a11fd3f 100644
--- a/Carpet/CarpetLib/src/data.cc
+++ b/Carpet/CarpetLib/src/data.cc
@@ -27,7 +27,10 @@ using namespace std;
-static size_t total_allocated_bytes; // total number of allocated bytes
+// Total number of currently allocated bytes and objects
+static size_t total_allocated_bytes = 0;
+static size_t total_allocated_objects = 0;
+
static const CCTK_REAL eps = 1.0e-10;
@@ -128,12 +131,15 @@ void data<T,D>::getmem (const size_t nelems)
} catch (...) {
T Tdummy;
CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING,
- "Failed to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated.",
- (double)nbytes, nbytes/1.0e6,
+ "Failed to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated in %d objects.",
+ double(nbytes), double(nbytes/1.0e6),
typestring(Tdummy),
- (double)total_allocated_bytes, total_allocated_bytes/1.0e6);
+ double(total_allocated_bytes),
+ double(total_allocated_bytes/1.0e6),
+ int(total_allocated_objects));
}
total_allocated_bytes += nbytes;
+ ++ total_allocated_objects;
}
@@ -142,7 +148,10 @@ template<class T, int D>
void data<T,D>::freemem ()
{
delete [] _storage;
+ assert (total_allocated_bytes > this->_allocated_bytes);
+ assert (total_allocated_objects > 0);
total_allocated_bytes -= this->_allocated_bytes;
+ -- total_allocated_objects;
this->_allocated_bytes = 0;
}
@@ -237,6 +246,8 @@ void data<T,D>::change_processor_recv (comm_state<D>& state,
return;
}
+ wtime_changeproc_recv.start();
+
if (this->_has_storage) {
if (this->this_processor_is ( newproc)) {
// copy from other processor
@@ -249,12 +260,12 @@ void data<T,D>::change_processor_recv (comm_state<D>& state,
_storage = (T*)mem;
}
+ wtime_irecv.start();
const double wtime1 = MPI_Wtime();
T dummy;
MPI_Irecv (_storage, this->_size, dist::datatype(dummy), this->proc(),
this->tag, dist::comm, &this->request);
- const double wtime2 = MPI_Wtime();
- this->wtime_irecv += wtime2 - wtime1;
+ wtime_irecv.stop();
if (use_waitall) {
state.requests.push_back (this->request);
}
@@ -267,6 +278,8 @@ void data<T,D>::change_processor_recv (comm_state<D>& state,
assert (!_storage);
}
}
+
+ wtime_changeproc_recv.stop();
}
@@ -285,6 +298,8 @@ void data<T,D>::change_processor_send (comm_state<D>& state,
return;
}
+ wtime_changeproc_send.start();
+
if (this->_has_storage) {
if (this->this_processor_is ( newproc)) {
// copy from other processor
@@ -295,12 +310,11 @@ void data<T,D>::change_processor_send (comm_state<D>& state,
assert (!mem);
assert (_storage);
- const double wtime1 = MPI_Wtime();
+ wtime_isend.start();
T dummy;
MPI_Isend (_storage, this->_size, dist::datatype(dummy), newproc,
this->tag, dist::comm, &this->request);
- const double wtime2 = MPI_Wtime();
- this->wtime_isend += wtime2 - wtime1;
+ wtime_isend.stop();
if (use_waitall) {
state.requests.push_back (this->request);
}
@@ -310,6 +324,8 @@ void data<T,D>::change_processor_send (comm_state<D>& state,
assert (!_storage);
}
}
+
+ wtime_changeproc_send.stop();
}
@@ -328,15 +344,16 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
assert (!mem);
return;
}
+
+ wtime_changeproc_wait.start();
if (use_waitall) {
if (! state.requests.empty()) {
// wait for all requests at once
- const double wtime1 = MPI_Wtime();
+ wtime_irecvwait.start();
MPI_Waitall
(state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE);
- const double wtime2 = MPI_Wtime();
- this->wtime_irecvwait += wtime2 - wtime1;
+ wtime_irecvwait.stop();
state.requests.clear();
}
}
@@ -346,11 +363,10 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
// copy from other processor
if (! use_waitall) {
- const double wtime1 = MPI_Wtime();
+ wtime_irecvwait.start();
MPI_Status status;
MPI_Wait (&this->request, &status);
- const double wtime2 = MPI_Wtime();
- this->wtime_irecvwait += wtime2 - wtime1;
+ wtime_irecvwait.stop();
}
} else if (this->lives_on_this_processor()) {
@@ -360,11 +376,10 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
assert (_storage);
if (! use_waitall) {
- const double wtime1 = MPI_Wtime();
+ wtime_isendwait.start();
MPI_Status status;
MPI_Wait (&this->request, &status);
- const double wtime2 = MPI_Wtime();
- this->wtime_isendwait += wtime2 - wtime1;
+ wtime_isendwait.stop();
}
if (this->_owns_storage) {
@@ -379,6 +394,8 @@ void data<T,D>::change_processor_wait (comm_state<D>& state,
}
this->_proc = newproc;
+
+ wtime_changeproc_wait.stop();
}
diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc
index f04267508..dacb24115 100644
--- a/Carpet/CarpetLib/src/gdata.cc
+++ b/Carpet/CarpetLib/src/gdata.cc
@@ -1,10 +1,15 @@
// $Header:$
+#include <algorithm>
#include <cassert>
+#include <cmath>
#include <cstdlib>
#include <iostream>
+#include <mpi.h>
+
#include "cctk.h"
+#include "cctk_Arguments.h"
#include "cctk_Parameters.h"
#include "util_ErrorCodes.h"
@@ -106,12 +111,106 @@ static int nexttag ()
+timestat::timestat () : wtime(0.0), wtime2(0.0), count(0.0), running(false)
+{
+}
+
+void timestat::addstat (double const t)
+{
+ wtime += t;
+ wtime2 += t*t;
+ ++count;
+}
+
+void timestat::start ()
+{
+ assert (! running);
+ running = true;
+ starttime = MPI_Wtime();
+}
+
+void timestat::stop ()
+{
+ assert (running);
+ running = false;
+ double const endtime = MPI_Wtime();
+ addstat (endtime - starttime);
+}
+
+ostream& operator<< (ostream& os, const timestat& wt)
+{
+ double const avg = wt.wtime / wt.count;
+ double const stddev = sqrt(max(0.0, wt.wtime2 / wt.count - avg * avg));
+ os << "timestat[seconds]:"
+ << " cnt: " << wt.count
+ << " sum: " << wt.wtime
+ << " avg: " << avg
+ << " stddev: " << stddev;
+ return os;
+}
+
+timestat wtime_copyfrom_recv;
+timestat wtime_copyfrom_send;
+timestat wtime_copyfrom_wait;
+
+timestat wtime_copyfrom_recv_maketyped;
+timestat wtime_copyfrom_recv_allocate;
+timestat wtime_copyfrom_recv_changeproc_recv;
+timestat wtime_copyfrom_send_copyfrom_nocomm1;
+timestat wtime_copyfrom_send_copyfrom_nocomm2;
+timestat wtime_copyfrom_send_changeproc_send;
+timestat wtime_copyfrom_wait_changeproc_wait;
+timestat wtime_copyfrom_wait_copyfrom_nocomm;
+timestat wtime_copyfrom_wait_delete;
+
+timestat wtime_changeproc_recv;
+timestat wtime_changeproc_send;
+timestat wtime_changeproc_wait;
+
+timestat wtime_irecv;
+timestat wtime_isend;
+timestat wtime_isendwait;
+timestat wtime_irecvwait;
+
+extern "C" void CarpetLib_printtimestats (CCTK_ARGUMENTS);
+void CarpetLib_printtimestats (CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+ if (print_timestats) {
+ cout << "Timing statistics from CarpetLib:" << endl
+ << " wtime_copyfrom_recv: " << wtime_copyfrom_recv << endl
+ << " wtime_copyfrom_send: " << wtime_copyfrom_send << endl
+ << " wtime_copyfrom_wait: " << wtime_copyfrom_wait << endl
+ << endl
+ << " wtime_copyfrom_recv_maketyped: " << wtime_copyfrom_recv_maketyped << endl
+ << " wtime_copyfrom_recv_allocate: " << wtime_copyfrom_recv_allocate << endl
+ << " wtime_copyfrom_recv_changeproc_recv: " << wtime_copyfrom_recv_changeproc_recv << endl
+ << " wtime_copyfrom_send_copyfrom_nocomm1: " << wtime_copyfrom_send_copyfrom_nocomm1 << endl
+ << " wtime_copyfrom_send_copyfrom_nocomm2: " << wtime_copyfrom_send_copyfrom_nocomm2 << endl
+ << " wtime_copyfrom_send_changeproc_send: " << wtime_copyfrom_send_changeproc_send << endl
+ << " wtime_copyfrom_wait_changeproc_wait: " << wtime_copyfrom_wait_changeproc_wait << endl
+ << " wtime_copyfrom_wait_copyfrom_nocomm2: " << wtime_copyfrom_wait_copyfrom_nocomm << endl
+ << " wtime_copyfrom_wait_delete: " << wtime_copyfrom_wait_delete << endl
+ << endl
+ << " wtime_changeproc_recv: " << wtime_changeproc_recv << endl
+ << " wtime_changeproc_send: " << wtime_changeproc_send << endl
+ << " wtime_changeproc_wait: " << wtime_changeproc_wait << endl
+ << endl
+ << " wtime_irecv: " << wtime_irecv << endl
+ << " wtime_isend: " << wtime_isend << endl
+ << " wtime_isendwait: " << wtime_isendwait << endl
+ << " wtime_irecvwait: " << wtime_irecvwait << endl
+ << endl;
+ }
+}
+
+
+
// Constructors
template<int D>
gdata<D>::gdata (const int varindex_, const operator_type transport_operator_)
: varindex(varindex_), transport_operator(transport_operator_),
- wtime_isend(0.0), wtime_isendwait(0.0),
- wtime_irecv(0.0), wtime_irecvwait(0.0),
_has_storage(false),
comm_active(false),
tag(nexttag())
@@ -243,18 +342,28 @@ void gdata<D>::copy_from_recv (comm_state<D>& state,
if (box.empty()) return;
+ wtime_copyfrom_recv.start();
+
if (proc() == src->proc()) {
// copy on same processor
} else {
// copy to different processor
+ wtime_copyfrom_recv_maketyped.start();
gdata<D>* const tmp = make_typed(varindex, transport_operator);
+ wtime_copyfrom_recv_maketyped.stop();
state.tmps1.push (tmp);
+ wtime_copyfrom_recv_allocate.start();
tmp->allocate (box, src->proc());
+ wtime_copyfrom_recv_allocate.stop();
+ wtime_copyfrom_recv_changeproc_recv.start();
tmp->change_processor_recv (state, proc());
+ wtime_copyfrom_recv_changeproc_recv.stop();
}
+
+ wtime_copyfrom_recv.stop();
}
@@ -275,10 +384,14 @@ void gdata<D>::copy_from_send (comm_state<D>& state,
if (box.empty()) return;
+ wtime_copyfrom_send.start();
+
if (proc() == src->proc()) {
// copy on same processor
+ wtime_copyfrom_send_copyfrom_nocomm1.start();
copy_from_nocomm (src, box);
+ wtime_copyfrom_send_copyfrom_nocomm1.stop();
} else {
@@ -287,10 +400,16 @@ void gdata<D>::copy_from_send (comm_state<D>& state,
state.tmps1.pop();
state.tmps2.push (tmp);
assert (tmp);
+ wtime_copyfrom_send_copyfrom_nocomm2.start();
tmp->copy_from_nocomm (src, box);
+ wtime_copyfrom_send_copyfrom_nocomm2.stop();
+ wtime_copyfrom_send_changeproc_send.start();
tmp->change_processor_send (state, proc());
+ wtime_copyfrom_send_changeproc_send.stop();
}
+
+ wtime_copyfrom_send.stop();
}
@@ -311,6 +430,8 @@ void gdata<D>::copy_from_wait (comm_state<D>& state,
if (box.empty()) return;
+ wtime_copyfrom_wait.start();
+
if (proc() == src->proc()) {
// copy on same processor
@@ -320,11 +441,19 @@ void gdata<D>::copy_from_wait (comm_state<D>& state,
gdata<D>* const tmp = state.tmps2.front();
state.tmps2.pop();
assert (tmp);
+ wtime_copyfrom_wait_changeproc_wait.start();
tmp->change_processor_wait (state, proc());
+ wtime_copyfrom_wait_changeproc_wait.stop();
+ wtime_copyfrom_wait_copyfrom_nocomm.start();
copy_from_nocomm (tmp, box);
+ wtime_copyfrom_wait_copyfrom_nocomm.stop();
+ wtime_copyfrom_wait_delete.start();
delete tmp;
+ wtime_copyfrom_wait_delete.stop();
}
+
+ wtime_copyfrom_wait.stop();
}
diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh
index fc65004e6..de4d512c3 100644
--- a/Carpet/CarpetLib/src/gdata.hh
+++ b/Carpet/CarpetLib/src/gdata.hh
@@ -50,6 +50,49 @@ public:
+// Time (in seconds) spend during various operations
+struct timestat {
+ double wtime;
+ double wtime2;
+ double count;
+ timestat ();
+private:
+ bool running;
+ double starttime;
+public:
+ void start();
+ void stop();
+private:
+ void addstat (double const t);
+};
+
+ostream& operator<< (ostream& os, const timestat& wt);
+
+extern timestat wtime_copyfrom_recv;
+extern timestat wtime_copyfrom_send;
+extern timestat wtime_copyfrom_wait;
+
+extern timestat wtime_copyfrom_recv_maketyped;
+extern timestat wtime_copyfrom_recv_allocate;
+extern timestat wtime_copyfrom_recv_changeproc_recv;
+extern timestat wtime_copyfrom_send_copyfrom_nocomm1;
+extern timestat wtime_copyfrom_send_copyfrom_nocomm2;
+extern timestat wtime_copyfrom_send_changeproc_send;
+extern timestat wtime_copyfrom_wait_changeproc_wait;
+extern timestat wtime_copyfrom_wait_copyfrom_nocomm;
+extern timestat wtime_copyfrom_wait_delete;
+
+extern timestat wtime_changeproc_recv;
+extern timestat wtime_changeproc_send;
+extern timestat wtime_changeproc_wait;
+
+extern timestat wtime_irecv;
+extern timestat wtime_isend;
+extern timestat wtime_irecvwait;
+extern timestat wtime_isendwait;
+
+
+
// A generic data storage without type information
template<int D>
class gdata {
@@ -64,9 +107,6 @@ protected: // should be readonly
const int varindex; // Cactus variable index, or -1
operator_type transport_operator;
- double wtime_isend, wtime_isendwait;
- double wtime_irecv, wtime_irecvwait;
-
bool _has_storage; // has storage associated (on some processor)
bool _owns_storage; // owns the storage
// (only valid if there is storage on this processor; it means that