aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorswhite <swhite@e5a5a894-0e4f-0410-be11-d22c8b0a171a>2006-04-26 14:49:06 +0000
committerswhite <swhite@e5a5a894-0e4f-0410-be11-d22c8b0a171a>2006-04-26 14:49:06 +0000
commitd69c71d3983cee0cfd3793734a5c9fa8b1391b21 (patch)
treea00610f272508fd476741c884b9b2ccc16f210f0
parent30eb89d539b87ded1d82298c43f205b5a3b4837e (diff)
Christian's TriggerTerminationManual, renamed and expanded
git-svn-id: http://svn.aei.mpg.de/numrel/AEIThorns/ManualTermination/trunk@2 e5a5a894-0e4f-0410-be11-d22c8b0a171a
-rw-r--r--README10
-rw-r--r--doc/documentation.tex100
-rw-r--r--interface.ccl8
-rw-r--r--param.ccl35
-rw-r--r--schedule.ccl30
-rw-r--r--src/ManualTermination.c140
-rw-r--r--src/ManualTermination.h8
-rw-r--r--src/ManualTerminationFile.c48
-rw-r--r--src/make.code.defn8
9 files changed, 387 insertions, 0 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..d299580
--- /dev/null
+++ b/README
@@ -0,0 +1,10 @@
+CVS info : $Header$
+
+Cactus Code Thorn ManualTermination, based on TriggerTerminationManual
+Thorn Author(s) : Christian D. Ott <cott@aei.mpg.de>
+Thorn Maintainer(s) : Christian D. Ott <cott@aei.mpg.de>
+--------------------------------------------------------------------------
+
+Purpose of the thorn:
+
+Watch the gone by walltime. Trigger termination if only n minutes are left to some limit set by the user.
diff --git a/doc/documentation.tex b/doc/documentation.tex
new file mode 100644
index 0000000..33668ca
--- /dev/null
+++ b/doc/documentation.tex
@@ -0,0 +1,100 @@
+% $Header$
+
+\documentclass{article}
+
+% Use the Cactus ThornGuide style file
+% (Automatically used from Cactus distribution, if you have a
+% thorn without the Cactus Flesh download this from the Cactus
+% homepage at www.cactuscode.org)
+\usepackage{../../../../doc/latex/cactus}
+\RequirePackage{alltt}
+\RequirePackage{fancyvrb}
+
+\begin{document}
+
+% The author of the documentation
+\author{Steve White \textless swhite@aei.mpg.de\textgreater}
+
+% The title of the document (not necessarily the name of the Thorn)
+\title{ManualTermination\\
+ Manual Termination of Cactus Simulations}
+
+% the date your document was last changed, if your document is in CVS,
+% please use:
+\date{$ $Date$ $}
+
+\maketitle
+
+% Do not delete next line
+% START CACTUS THORNGUIDE
+
+\begin{abstract}
+Thorn \textbf{ManualTermination} safely terminates Cactus
+simulation jobs, and can be configured to allow other users to
+terminate the job.
+
+The thorn can also be configured to terminate a certain number of minutes
+before a given maximum walltime has elapsed. Also, it can be configured
+to periodically check the contents of a given file, and terminate based
+on the contents of that file.
+
+In either case, the job should be checkpointed.
+\end{abstract}
+
+
+
+\section{Requirements}
+
+The program must be set up for checkpointing. (It can be argued that
+checkpointing functionality is common sense and good etiquette for
+long-running programs in a multi-user environment.)
+
+For termination from a file, the PBS batch system is used.
+
+\section{Setup}
+
+
+\begin{Verbatim}[commandchars=\\\{\},frame=single]
+cmd="mpirun \textsl{command...}"
+/opt/NumRelScript/chain\_job "$0" "$cmd"
+\end{Verbatim}
+
+
+\begin{Verbatim}[commandchars=\\\{\},frame=single]
+# # # # # # # # # # # # # # # Checkpointing / Recovery
+ActiveThorns = "IOHDF5Util IOHDF5"
+
+IO::checkpoint_dir = "cpr/"
+IO::checkpoint_file = "chain" # Name to taste
+IO::checkpoint_on_terminate = "yes"
+IO::recover_dir = "cpr/"
+IO::recover_file = "chain" # Same name
+IO::recover = "autoprobe"
+IOHDF5::checkpoint = "yes"
+
+# # # # # # # # # # # # # # # Termination
+ActiveThorns = "ManualTermination"
+
+ManualTermination::on_remaining_walltime=1400 #minutes before termination
+ManualTermination::max_walltime=12 # hours
+ManualTermination::termination_from_file=yes
+ManualTermination::check_file_every=10 #evolution steps
+ManualTermination::output_remtime_every_minutes=2 # how often to remind user
+
+\end{Verbatim}
+
+\section{Use}
+
+\section{Licensing and Support}
+
+Thorn \textbf{JobChaining} is distributed under the GNU Lesser Public
+License.
+For details please see the file \texttt{COPYING.LIB} in the top-level
+directory of this thorn.
+
+Please send any suggestions or comments to the maintainer of the thorn.
+
+% Do not delete next line
+% END CACTUS THORNGUIDE
+
+\end{document}
diff --git a/interface.ccl b/interface.ccl
new file mode 100644
index 0000000..b2d0b22
--- /dev/null
+++ b/interface.ccl
@@ -0,0 +1,8 @@
+# Interface definition for thorn ManualTermination
+# $Header$
+
+IMPLEMENTS: ManualTermination
+
+
+
+CCTK_REAL watchminutes TYPE=scalar
diff --git a/param.ccl b/param.ccl
new file mode 100644
index 0000000..d4024ff
--- /dev/null
+++ b/param.ccl
@@ -0,0 +1,35 @@
+# Parameter definitions for thorn ManualTermination
+# $Header$
+
+private:
+
+INT on_remaining_walltime "When to trigger termination in MINUTES" STEERABLE = ALWAYS
+{
+ 0:0 :: "Don't trigger termination"
+ 1:* :: "So many minutes before your job walltime is over"
+} 0
+
+CCTK_REAL max_walltime "Walltime in HOURS allocated for this job" STEERABLE = ALWAYS
+{
+ 0.:* :: "Should be positive, right"
+} 0.0
+
+BOOLEAN termination_from_file "Use termination file; specified by termination_filename" STEERABLE = ALWAYS
+{
+} "no"
+
+STRING termination_file "Termination file name (full path)" STEERABLE = ALWAYS
+{
+ .* :: "Termination file"
+} ""
+
+INT check_file_every "Check termination file every n timesteps" STEERABLE = ALWAYS
+{
+ 1: :: "Should be greater than or equal to one"
+} 1
+
+INT output_remtime_every_minutes "Output remaining time every n minutes" STEERABLE = ALWAYS
+{
+ 0:0 :: "No output"
+ 1: :: "Positive..."
+} 60
diff --git a/schedule.ccl b/schedule.ccl
new file mode 100644
index 0000000..99684d3
--- /dev/null
+++ b/schedule.ccl
@@ -0,0 +1,30 @@
+# Schedule definitions for thorn ManualTermination
+# $Header$
+
+STORAGE: watchminutes
+
+if (on_remaining_walltime > 0)
+{
+ schedule ManualTermination_StartTimer at WRAGH
+ {
+ LANG:C
+ } "Start timer"
+
+ schedule ManualTermination_ResetMinutes at POST_RECOVER_VARIABLES
+ {
+ LANG:C
+ } "Reset Watchtime"
+
+ schedule ManualTermination_CheckWalltime at EVOL
+ {
+ LANG:C
+ } "Check elapsed job walltime"
+}
+
+if (termination_from_file)
+{
+ schedule ManualTerminationFile at EVOL
+ {
+ LANG:C
+ } "Check termination file"
+}
diff --git a/src/ManualTermination.c b/src/ManualTermination.c
new file mode 100644
index 0000000..0dce2fe
--- /dev/null
+++ b/src/ManualTermination.c
@@ -0,0 +1,140 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "cctk.h"
+#include "cctk_Arguments.h"
+#include "cctk_Parameters.h"
+#include "cctk_Termination.h"
+#include "cctk_Timers.h"
+#include "ManualTermination.h"
+
+enum{ BUFLEN = 128 };
+
+/* On first call, pass parameter terminate_filename.
+ If it is null will construct file name in /tmp based on PBS_JOBID.
+ Subsequent calls ignore the argument, and return a static buffer.
+*/
+const char * MT_get_terminate_filename( CCTK_STRING terminate_filename )
+{
+ static char buf[BUFLEN];
+
+ if( strlen( buf ) != 0 )
+ return buf;
+
+ if( strlen( terminate_filename ) == 0 )
+ {
+ const char * pbs_jobid = getenv("PBS_JOBID");
+ snprintf( buf, BUFLEN, "/tmp/cactus_terminate.%s", pbs_jobid );
+ }
+ else
+ {
+ snprintf( buf, BUFLEN, "%s", terminate_filename );
+ }
+ return buf;
+}
+
+int ManualTermination_StartTimer (CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ int retval = 0;
+ int ierr;
+ int TimerIndex;
+
+ /* only one processor needs to query the elapsed runtime */
+ if (CCTK_MyProc (cctkGH) != 0)
+ {
+ return (0);
+ }
+ /* Create timer */
+ TimerIndex = CCTK_TimerCreate("WatchWalltime");
+ /* Start timer */
+ ierr = CCTK_TimerStart("WatchWalltime");
+
+ *watchminutes = output_remtime_every_minutes*1.0e0;
+
+ CCTK_VInfo (CCTK_THORNSTRING, "Started Timer");
+ CCTK_VInfo (CCTK_THORNSTRING, "Reminding you every %d "
+ "minutes about remaining walltime.",
+ output_remtime_every_minutes);
+
+ if( termination_from_file )
+ {
+ FILE *termfile = fopen( MT_get_terminate_filename(termination_file), "w" );
+ if( termfile != NULL )
+ {
+ fprintf( termfile, "%d", 0 );
+ fclose( termfile );
+ }
+ else
+ {
+ CCTK_VWarn (CCTK_WARN_ABORT, __LINE__, __FILE__, "ManualTermination",
+ "Could not open termination file '%s'. Error: %d",
+ MT_get_terminate_filename(NULL), strerror(errno));
+ }
+ }
+ return (retval);
+}
+
+int ManualTermination_ResetMinutes (CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ *watchminutes = output_remtime_every_minutes*1.0e0;
+
+ return 0;
+}
+
+int ManualTermination_CheckWalltime (CCTK_ARGUMENTS)
+{
+ DECLARE_CCTK_ARGUMENTS;
+ DECLARE_CCTK_PARAMETERS;
+
+ int retval,ierr;
+ cTimerData *info;
+ const cTimerVal *walltime;
+ CCTK_REAL time;
+
+ /* only one processor needs to query the elapsed runtime */
+ if (CCTK_MyProc (cctkGH) != 0)
+ {
+ return (0);
+ }
+
+ info = CCTK_TimerCreateData();
+ ierr = CCTK_Timer("WatchWalltime",info);
+
+ /* stop timer */
+ ierr = CCTK_TimerStop("WatchWalltime");
+ /* get walltime */
+ walltime = CCTK_GetClockValue("gettimeofday",info);
+ time = CCTK_TimerClockSeconds(walltime);
+ CCTK_TimerDestroyData(info);
+ /* Start timer */
+ ierr = CCTK_TimerStart("WatchWalltime");
+
+ if ( (time/60.0e0 > *watchminutes) && *watchminutes != 0)
+ {
+ *watchminutes = (*watchminutes)+output_remtime_every_minutes*1.0e0;
+ CCTK_INFO ("***********************************************************");
+ CCTK_VInfo (CCTK_THORNSTRING, "Remaining wallclock time for your job "
+ "is %1.2f minutes. :-)", (max_walltime*60.0-time/60.0));
+ CCTK_INFO ("***********************************************************");
+ }
+
+ if (time/60.0e0 >= (max_walltime*60.0e0 - on_remaining_walltime*1.0e0))
+ {
+ CCTK_VInfo (CCTK_THORNSTRING, "Remaining wallclock time for your job "
+ "is %1.2f minutes. Triggering termination ...",
+ (max_walltime*60.0-time/60.0));
+ CCTK_TerminateNext (cctkGH);
+ }
+
+ retval = 0;
+
+ return (retval);
+}
diff --git a/src/ManualTermination.h b/src/ManualTermination.h
new file mode 100644
index 0000000..bca3f3c
--- /dev/null
+++ b/src/ManualTermination.h
@@ -0,0 +1,8 @@
+#ifndef MANUALTERMINATION_H
+#define MANUALTERMINATION_H
+
+#include "cctk.h"
+
+const char * MT_get_terminate_filename( CCTK_STRING );
+
+#endif
diff --git a/src/ManualTerminationFile.c b/src/ManualTerminationFile.c
new file mode 100644
index 0000000..342ac3f
--- /dev/null
+++ b/src/ManualTerminationFile.c
@@ -0,0 +1,48 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cctk.h"
+#include "cctk_Arguments.h"
+#include "cctk_Parameters.h"
+#include "cctk_Termination.h"
+#include "cctk_Timers.h"
+#include "ManualTermination.h"
+
+
+int ManualTerminationFile (CCTK_ARGUMENTS)
+{
+ int retval = 0, terminate;
+ FILE *terminationfile;
+
+ DECLARE_CCTK_PARAMETERS;
+
+ /* only one processor needs to query the elapsed runtime */
+ if (CCTK_MyProc (cctkGH) != 0)
+ {
+ return (0);
+ }
+
+ if ( ((cctkGH->cctk_iteration-1) % check_file_every*1.0e0) != 0)
+ {
+ return(0);
+ }
+
+ terminationfile = fopen(MT_get_terminate_filename(NULL),"r");
+
+ if(terminationfile!=NULL)
+ {
+ terminate=0;
+ fscanf(terminationfile,"%d",&terminate);
+ fclose(terminationfile);
+
+ if (terminate==1)
+ {
+ CCTK_VInfo (CCTK_THORNSTRING, "OH MY GOD! Found termination signal "
+ "in termination file! TERMINATION NOW!!!!");
+ CCTK_TerminateNext (cctkGH);
+ }
+ }
+
+ return (retval);
+}
diff --git a/src/make.code.defn b/src/make.code.defn
new file mode 100644
index 0000000..bf7f9e4
--- /dev/null
+++ b/src/make.code.defn
@@ -0,0 +1,8 @@
+# Main make.code.defn file for thorn ManualTermination
+# $Header$
+
+# Source files in this directory
+SRCS = ManualTermination.c ManualTerminationFile.c
+
+# Subdirectories containing source files
+SUBDIRS =