diff options
Diffstat (limited to 'Carpet')
-rw-r--r-- | Carpet/CarpetLib/param.ccl | 4 | ||||
-rw-r--r-- | Carpet/CarpetLib/schedule.ccl | 6 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/data.cc | 53 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/gdata.cc | 133 | ||||
-rw-r--r-- | Carpet/CarpetLib/src/gdata.hh | 46 |
5 files changed, 219 insertions, 23 deletions
diff --git a/Carpet/CarpetLib/param.ccl b/Carpet/CarpetLib/param.ccl index 30b205a2d..cce1c3e21 100644 --- a/Carpet/CarpetLib/param.ccl +++ b/Carpet/CarpetLib/param.ccl @@ -19,6 +19,10 @@ BOOLEAN output_bboxes "Output bounding box information to the screen" STEERABLE= { } "no" +BOOLEAN print_timestats "Print timing statistics at every iteration" STEERABLE=always +{ +} "no" + BOOLEAN save_memory_during_regridding "Save some memory during regridding at the expense of speed" { } "no" diff --git a/Carpet/CarpetLib/schedule.ccl b/Carpet/CarpetLib/schedule.ccl index 7c7935593..e102e6f36 100644 --- a/Carpet/CarpetLib/schedule.ccl +++ b/Carpet/CarpetLib/schedule.ccl @@ -1,2 +1,8 @@ # Schedule definitions for thorn CarpetLib # $Header:$ + +SCHEDULE CarpetLib_printtimestats AT analysis +{ + LANG: C + OPTIONS: global +} "Print timing statistics if desired" diff --git a/Carpet/CarpetLib/src/data.cc b/Carpet/CarpetLib/src/data.cc index f1b4672dd..67a11fd3f 100644 --- a/Carpet/CarpetLib/src/data.cc +++ b/Carpet/CarpetLib/src/data.cc @@ -27,7 +27,10 @@ using namespace std; -static size_t total_allocated_bytes; // total number of allocated bytes +// Total number of currently allocated bytes and objects +static size_t total_allocated_bytes = 0; +static size_t total_allocated_objects = 0; + static const CCTK_REAL eps = 1.0e-10; @@ -128,12 +131,15 @@ void data<T,D>::getmem (const size_t nelems) } catch (...) { T Tdummy; CCTK_VWarn (0, __LINE__, __FILE__, CCTK_THORNSTRING, - "Failed to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated.", - (double)nbytes, nbytes/1.0e6, + "Failed to allocate %.0f bytes (%.3f MB) of memory for type %s. %.0f bytes (%.3f MB) are currently allocated in %d objects.", + double(nbytes), double(nbytes/1.0e6), typestring(Tdummy), - (double)total_allocated_bytes, total_allocated_bytes/1.0e6); + double(total_allocated_bytes), + double(total_allocated_bytes/1.0e6), + int(total_allocated_objects)); } total_allocated_bytes += nbytes; + ++ total_allocated_objects; } @@ -142,7 +148,10 @@ template<class T, int D> void data<T,D>::freemem () { delete [] _storage; + assert (total_allocated_bytes > this->_allocated_bytes); + assert (total_allocated_objects > 0); total_allocated_bytes -= this->_allocated_bytes; + -- total_allocated_objects; this->_allocated_bytes = 0; } @@ -237,6 +246,8 @@ void data<T,D>::change_processor_recv (comm_state<D>& state, return; } + wtime_changeproc_recv.start(); + if (this->_has_storage) { if (this->this_processor_is ( newproc)) { // copy from other processor @@ -249,12 +260,12 @@ void data<T,D>::change_processor_recv (comm_state<D>& state, _storage = (T*)mem; } + wtime_irecv.start(); const double wtime1 = MPI_Wtime(); T dummy; MPI_Irecv (_storage, this->_size, dist::datatype(dummy), this->proc(), this->tag, dist::comm, &this->request); - const double wtime2 = MPI_Wtime(); - this->wtime_irecv += wtime2 - wtime1; + wtime_irecv.stop(); if (use_waitall) { state.requests.push_back (this->request); } @@ -267,6 +278,8 @@ void data<T,D>::change_processor_recv (comm_state<D>& state, assert (!_storage); } } + + wtime_changeproc_recv.stop(); } @@ -285,6 +298,8 @@ void data<T,D>::change_processor_send (comm_state<D>& state, return; } + wtime_changeproc_send.start(); + if (this->_has_storage) { if (this->this_processor_is ( newproc)) { // copy from other processor @@ -295,12 +310,11 @@ void data<T,D>::change_processor_send (comm_state<D>& state, assert (!mem); assert (_storage); - const double wtime1 = MPI_Wtime(); + wtime_isend.start(); T dummy; MPI_Isend (_storage, this->_size, dist::datatype(dummy), newproc, this->tag, dist::comm, &this->request); - const double wtime2 = MPI_Wtime(); - this->wtime_isend += wtime2 - wtime1; + wtime_isend.stop(); if (use_waitall) { state.requests.push_back (this->request); } @@ -310,6 +324,8 @@ void data<T,D>::change_processor_send (comm_state<D>& state, assert (!_storage); } } + + wtime_changeproc_send.stop(); } @@ -328,15 +344,16 @@ void data<T,D>::change_processor_wait (comm_state<D>& state, assert (!mem); return; } + + wtime_changeproc_wait.start(); if (use_waitall) { if (! state.requests.empty()) { // wait for all requests at once - const double wtime1 = MPI_Wtime(); + wtime_irecvwait.start(); MPI_Waitall (state.requests.size(), &state.requests.front(), MPI_STATUSES_IGNORE); - const double wtime2 = MPI_Wtime(); - this->wtime_irecvwait += wtime2 - wtime1; + wtime_irecvwait.stop(); state.requests.clear(); } } @@ -346,11 +363,10 @@ void data<T,D>::change_processor_wait (comm_state<D>& state, // copy from other processor if (! use_waitall) { - const double wtime1 = MPI_Wtime(); + wtime_irecvwait.start(); MPI_Status status; MPI_Wait (&this->request, &status); - const double wtime2 = MPI_Wtime(); - this->wtime_irecvwait += wtime2 - wtime1; + wtime_irecvwait.stop(); } } else if (this->lives_on_this_processor()) { @@ -360,11 +376,10 @@ void data<T,D>::change_processor_wait (comm_state<D>& state, assert (_storage); if (! use_waitall) { - const double wtime1 = MPI_Wtime(); + wtime_isendwait.start(); MPI_Status status; MPI_Wait (&this->request, &status); - const double wtime2 = MPI_Wtime(); - this->wtime_isendwait += wtime2 - wtime1; + wtime_isendwait.stop(); } if (this->_owns_storage) { @@ -379,6 +394,8 @@ void data<T,D>::change_processor_wait (comm_state<D>& state, } this->_proc = newproc; + + wtime_changeproc_wait.stop(); } diff --git a/Carpet/CarpetLib/src/gdata.cc b/Carpet/CarpetLib/src/gdata.cc index f04267508..dacb24115 100644 --- a/Carpet/CarpetLib/src/gdata.cc +++ b/Carpet/CarpetLib/src/gdata.cc @@ -1,10 +1,15 @@ // $Header:$ +#include <algorithm> #include <cassert> +#include <cmath> #include <cstdlib> #include <iostream> +#include <mpi.h> + #include "cctk.h" +#include "cctk_Arguments.h" #include "cctk_Parameters.h" #include "util_ErrorCodes.h" @@ -106,12 +111,106 @@ static int nexttag () +timestat::timestat () : wtime(0.0), wtime2(0.0), count(0.0), running(false) +{ +} + +void timestat::addstat (double const t) +{ + wtime += t; + wtime2 += t*t; + ++count; +} + +void timestat::start () +{ + assert (! running); + running = true; + starttime = MPI_Wtime(); +} + +void timestat::stop () +{ + assert (running); + running = false; + double const endtime = MPI_Wtime(); + addstat (endtime - starttime); +} + +ostream& operator<< (ostream& os, const timestat& wt) +{ + double const avg = wt.wtime / wt.count; + double const stddev = sqrt(max(0.0, wt.wtime2 / wt.count - avg * avg)); + os << "timestat[seconds]:" + << " cnt: " << wt.count + << " sum: " << wt.wtime + << " avg: " << avg + << " stddev: " << stddev; + return os; +} + +timestat wtime_copyfrom_recv; +timestat wtime_copyfrom_send; +timestat wtime_copyfrom_wait; + +timestat wtime_copyfrom_recv_maketyped; +timestat wtime_copyfrom_recv_allocate; +timestat wtime_copyfrom_recv_changeproc_recv; +timestat wtime_copyfrom_send_copyfrom_nocomm1; +timestat wtime_copyfrom_send_copyfrom_nocomm2; +timestat wtime_copyfrom_send_changeproc_send; +timestat wtime_copyfrom_wait_changeproc_wait; +timestat wtime_copyfrom_wait_copyfrom_nocomm; +timestat wtime_copyfrom_wait_delete; + +timestat wtime_changeproc_recv; +timestat wtime_changeproc_send; +timestat wtime_changeproc_wait; + +timestat wtime_irecv; +timestat wtime_isend; +timestat wtime_isendwait; +timestat wtime_irecvwait; + +extern "C" void CarpetLib_printtimestats (CCTK_ARGUMENTS); +void CarpetLib_printtimestats (CCTK_ARGUMENTS) +{ + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + if (print_timestats) { + cout << "Timing statistics from CarpetLib:" << endl + << " wtime_copyfrom_recv: " << wtime_copyfrom_recv << endl + << " wtime_copyfrom_send: " << wtime_copyfrom_send << endl + << " wtime_copyfrom_wait: " << wtime_copyfrom_wait << endl + << endl + << " wtime_copyfrom_recv_maketyped: " << wtime_copyfrom_recv_maketyped << endl + << " wtime_copyfrom_recv_allocate: " << wtime_copyfrom_recv_allocate << endl + << " wtime_copyfrom_recv_changeproc_recv: " << wtime_copyfrom_recv_changeproc_recv << endl + << " wtime_copyfrom_send_copyfrom_nocomm1: " << wtime_copyfrom_send_copyfrom_nocomm1 << endl + << " wtime_copyfrom_send_copyfrom_nocomm2: " << wtime_copyfrom_send_copyfrom_nocomm2 << endl + << " wtime_copyfrom_send_changeproc_send: " << wtime_copyfrom_send_changeproc_send << endl + << " wtime_copyfrom_wait_changeproc_wait: " << wtime_copyfrom_wait_changeproc_wait << endl + << " wtime_copyfrom_wait_copyfrom_nocomm2: " << wtime_copyfrom_wait_copyfrom_nocomm << endl + << " wtime_copyfrom_wait_delete: " << wtime_copyfrom_wait_delete << endl + << endl + << " wtime_changeproc_recv: " << wtime_changeproc_recv << endl + << " wtime_changeproc_send: " << wtime_changeproc_send << endl + << " wtime_changeproc_wait: " << wtime_changeproc_wait << endl + << endl + << " wtime_irecv: " << wtime_irecv << endl + << " wtime_isend: " << wtime_isend << endl + << " wtime_isendwait: " << wtime_isendwait << endl + << " wtime_irecvwait: " << wtime_irecvwait << endl + << endl; + } +} + + + // Constructors template<int D> gdata<D>::gdata (const int varindex_, const operator_type transport_operator_) : varindex(varindex_), transport_operator(transport_operator_), - wtime_isend(0.0), wtime_isendwait(0.0), - wtime_irecv(0.0), wtime_irecvwait(0.0), _has_storage(false), comm_active(false), tag(nexttag()) @@ -243,18 +342,28 @@ void gdata<D>::copy_from_recv (comm_state<D>& state, if (box.empty()) return; + wtime_copyfrom_recv.start(); + if (proc() == src->proc()) { // copy on same processor } else { // copy to different processor + wtime_copyfrom_recv_maketyped.start(); gdata<D>* const tmp = make_typed(varindex, transport_operator); + wtime_copyfrom_recv_maketyped.stop(); state.tmps1.push (tmp); + wtime_copyfrom_recv_allocate.start(); tmp->allocate (box, src->proc()); + wtime_copyfrom_recv_allocate.stop(); + wtime_copyfrom_recv_changeproc_recv.start(); tmp->change_processor_recv (state, proc()); + wtime_copyfrom_recv_changeproc_recv.stop(); } + + wtime_copyfrom_recv.stop(); } @@ -275,10 +384,14 @@ void gdata<D>::copy_from_send (comm_state<D>& state, if (box.empty()) return; + wtime_copyfrom_send.start(); + if (proc() == src->proc()) { // copy on same processor + wtime_copyfrom_send_copyfrom_nocomm1.start(); copy_from_nocomm (src, box); + wtime_copyfrom_send_copyfrom_nocomm1.stop(); } else { @@ -287,10 +400,16 @@ void gdata<D>::copy_from_send (comm_state<D>& state, state.tmps1.pop(); state.tmps2.push (tmp); assert (tmp); + wtime_copyfrom_send_copyfrom_nocomm2.start(); tmp->copy_from_nocomm (src, box); + wtime_copyfrom_send_copyfrom_nocomm2.stop(); + wtime_copyfrom_send_changeproc_send.start(); tmp->change_processor_send (state, proc()); + wtime_copyfrom_send_changeproc_send.stop(); } + + wtime_copyfrom_send.stop(); } @@ -311,6 +430,8 @@ void gdata<D>::copy_from_wait (comm_state<D>& state, if (box.empty()) return; + wtime_copyfrom_wait.start(); + if (proc() == src->proc()) { // copy on same processor @@ -320,11 +441,19 @@ void gdata<D>::copy_from_wait (comm_state<D>& state, gdata<D>* const tmp = state.tmps2.front(); state.tmps2.pop(); assert (tmp); + wtime_copyfrom_wait_changeproc_wait.start(); tmp->change_processor_wait (state, proc()); + wtime_copyfrom_wait_changeproc_wait.stop(); + wtime_copyfrom_wait_copyfrom_nocomm.start(); copy_from_nocomm (tmp, box); + wtime_copyfrom_wait_copyfrom_nocomm.stop(); + wtime_copyfrom_wait_delete.start(); delete tmp; + wtime_copyfrom_wait_delete.stop(); } + + wtime_copyfrom_wait.stop(); } diff --git a/Carpet/CarpetLib/src/gdata.hh b/Carpet/CarpetLib/src/gdata.hh index fc65004e6..de4d512c3 100644 --- a/Carpet/CarpetLib/src/gdata.hh +++ b/Carpet/CarpetLib/src/gdata.hh @@ -50,6 +50,49 @@ public: +// Time (in seconds) spend during various operations +struct timestat { + double wtime; + double wtime2; + double count; + timestat (); +private: + bool running; + double starttime; +public: + void start(); + void stop(); +private: + void addstat (double const t); +}; + +ostream& operator<< (ostream& os, const timestat& wt); + +extern timestat wtime_copyfrom_recv; +extern timestat wtime_copyfrom_send; +extern timestat wtime_copyfrom_wait; + +extern timestat wtime_copyfrom_recv_maketyped; +extern timestat wtime_copyfrom_recv_allocate; +extern timestat wtime_copyfrom_recv_changeproc_recv; +extern timestat wtime_copyfrom_send_copyfrom_nocomm1; +extern timestat wtime_copyfrom_send_copyfrom_nocomm2; +extern timestat wtime_copyfrom_send_changeproc_send; +extern timestat wtime_copyfrom_wait_changeproc_wait; +extern timestat wtime_copyfrom_wait_copyfrom_nocomm; +extern timestat wtime_copyfrom_wait_delete; + +extern timestat wtime_changeproc_recv; +extern timestat wtime_changeproc_send; +extern timestat wtime_changeproc_wait; + +extern timestat wtime_irecv; +extern timestat wtime_isend; +extern timestat wtime_irecvwait; +extern timestat wtime_isendwait; + + + // A generic data storage without type information template<int D> class gdata { @@ -64,9 +107,6 @@ protected: // should be readonly const int varindex; // Cactus variable index, or -1 operator_type transport_operator; - double wtime_isend, wtime_isendwait; - double wtime_irecv, wtime_irecvwait; - bool _has_storage; // has storage associated (on some processor) bool _owns_storage; // owns the storage // (only valid if there is storage on this processor; it means that |