Source code for drmaa.session

# -----------------------------------------------------------
#  Copyright (C) 2008 StatPro Italia s.r.l.
#
#  StatPro Italia
#  Via G. B. Vico 4
#  I-20123 Milano
#  ITALY
#
#  phone: +39 02 96875 1
#  fax:   +39 02 96875 605
#
#  This program is distributed in the hope that it will be
#  useful, but WITHOUT ANY WARRANTY; without even the
#  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#  PURPOSE. See the license for more details.
# -----------------------------------------------------------
#
#  Author: Enrico Sirola <enrico.sirola@statpro.com>
'''
Everything related to sessions and jobs.
'''

from __future__ import absolute_import, print_function, unicode_literals

import sys
from collections import namedtuple
from ctypes import byref, c_int, create_string_buffer, pointer, POINTER, sizeof

from drmaa.const import (BLOCK_EMAIL, DEADLINE_TIME, DURATION_HLIMIT,
                         DURATION_SLIMIT, ENCODING, ERROR_PATH, INPUT_PATH,
                         JOB_CATEGORY, JOB_IDS_SESSION_ALL, JOB_IDS_SESSION_ANY,
                         JOB_NAME, JobState, JobControlAction,
                         JobSubmissionState, JOIN_FILES, JS_STATE,
                         NATIVE_SPECIFICATION, OUTPUT_PATH, REMOTE_COMMAND,
                         SIGNAL_BUFFER, START_TIME, status_to_string,
                         string_to_control_action, TIMEOUT_NO_WAIT,
                         TIMEOUT_WAIT_FOREVER, TRANSFER_FILES, V_ARGV, V_EMAIL,
                         V_ENV, WCT_HLIMIT, WCT_SLIMIT, WD)
from drmaa.helpers import (adapt_rusage, Attribute, attribute_names_iterator,
                           BoolConverter, c, DictAttribute, IntConverter,
                           run_bulk_job, SessionStringAttribute,
                           SessionVersionAttribute, string_vector,
                           VectorAttribute, Version)
from drmaa.wrappers import (drmaa_allocate_job_template, drmaa_attr_values_t,
                            drmaa_control, drmaa_delete_job_template,
                            drmaa_get_contact, drmaa_get_DRM_system,
                            drmaa_get_DRMAA_implementation, drmaa_job_ps,
                            drmaa_job_template_t, drmaa_run_job,
                            drmaa_synchronize, drmaa_wait, drmaa_wcoredump,
                            drmaa_wexitstatus, drmaa_wifaborted,
                            drmaa_wifexited, drmaa_wifsignaled, drmaa_wtermsig,
                            py_drmaa_exit, py_drmaa_init)


# Python 3 compatability help
if sys.version_info < (3, 0):
    bytes = str
    str = unicode


JobInfo = namedtuple("JobInfo",
                     """jobId hasExited hasSignal terminatedSignal hasCoreDump
                        wasAborted exitStatus resourceUsage""")


[docs]class JobTemplate(object): """A job to be submitted to the DRM.""" HOME_DIRECTORY = '$drmaa_hd_ph$' """Home directory placeholder.""" WORKING_DIRECTORY = '$drmaa_wd_ph$' """Working directory placeholder.""" PARAMETRIC_INDEX = '$drmaa_incr_ph$' """Parametric index (for job arrays / bulk jobs) placeholder.""" @property def attributeNames(self): """ The list of supported DRMAA scalar attribute names. This is apparently useless now, and should probably substituted by the list of attribute names of the JobTemplate instances. """ return list(attribute_names_iterator()) # scalar attributes remoteCommand = Attribute(REMOTE_COMMAND) """The command to be executed.""" jobSubmissionState = Attribute(JS_STATE) """The job status.""" workingDirectory = Attribute(WD) """The job working directory.""" jobCategory = Attribute(JOB_CATEGORY) """The job category.""" nativeSpecification = Attribute(NATIVE_SPECIFICATION) """ A (DRM-dependant) opaque string to be passed to the DRM representing other directives. """ blockEmail = Attribute(BLOCK_EMAIL, type_converter=BoolConverter(true='1', false='0')) """False if this job should send an email, True otherwise.""" startTime = Attribute(START_TIME) """The job start time, a partial timestamp string.""" jobName = Attribute(JOB_NAME) """The job Name.""" inputPath = Attribute(INPUT_PATH) """The path to a file representing job's stdin.""" outputPath = Attribute(OUTPUT_PATH) """The path to a file representing job's stdout.""" errorPath = Attribute(ERROR_PATH) """The path to a file representing job's stderr.""" joinFiles = Attribute(JOIN_FILES, type_converter=BoolConverter()) """True if stdin and stdout should be merged, False otherwise.""" # the following is available on ge6.2 only if enabled via cluster # configuration transferFiles = Attribute(TRANSFER_FILES) """ True if file transfer should be enabled, False otherwise. This option might require specific DRM configuration (it does on SGE). """ # the following are apparently not available on ge 6.2 # it will raise if you try to access these attrs deadlineTime = Attribute(DEADLINE_TIME) """The job deadline time, a partial timestamp string.""" hardWallclockTimeLimit = Attribute(WCT_HLIMIT, IntConverter) """ 'Hard' Wallclock time limit, in seconds. The job will be killed by the DRM if it takes more than 'hardWallclockTimeLimit' to complete. """ softWallclockTimeLimit = Attribute(WCT_SLIMIT, IntConverter) """ 'Soft' Wallclock time limit, in seconds. The job will be signaled by the DRM if it takes more than 'hardWallclockTimeLimit' to complete. """ hardRunDurationLimit = Attribute(DURATION_HLIMIT, IntConverter) softRunDurationLimit = Attribute(DURATION_SLIMIT, IntConverter) # vector attributes email = VectorAttribute(V_EMAIL) """email addresses to whom send job completion info.""" args = VectorAttribute(V_ARGV) """The job's command argument list.""" # dict attributes jobEnvironment = DictAttribute(V_ENV) """The job's environment dict.""" _as_parameter_ = None def __init__(self, **kwargs): """ Builds a JobTemplate instance. Attributes can be passed as keyword arguments. """ jt = pointer(POINTER(drmaa_job_template_t)()) c(drmaa_allocate_job_template, jt) self._jt = self._as_parameter_ = jt.contents try: for aname in kwargs: setattr(self, aname, kwargs.get(aname)) except: self.delete() raise
[docs] def delete(self): """Deallocate the underlying DRMAA job template.""" c(drmaa_delete_job_template, self)
def __enter__(self): """context manager enter routine""" return self def __exit__(self, *_): """ context manager exit routine. Stops communication with the DRM. """ self.delete() return False
[docs]class Session(object): """ The DRMAA Session. This class is the entry point for communicating with the DRM system """ TIMEOUT_WAIT_FOREVER = TIMEOUT_WAIT_FOREVER TIMEOUT_NO_WAIT = TIMEOUT_NO_WAIT JOB_IDS_SESSION_ANY = JOB_IDS_SESSION_ANY JOB_IDS_SESSION_ALL = JOB_IDS_SESSION_ALL contact = SessionStringAttribute(drmaa_get_contact) """ a comma delimited string list containing the contact strings available from the default DRMAA implementation, one element per DRM system available. If called after initialize(), this method returns the contact String for the DRM system to which the session is attached. The returned strings are implementation dependent. """ drmsInfo = SessionStringAttribute(drmaa_get_DRM_system) """ If called before initialize(), this method returns a comma delimited list of DRM systems, one element per DRM system implementation provided. If called after initialize(), this method returns the selected DRM system. The returned String is implementation dependent. """ drmaaImplementation = SessionStringAttribute(drmaa_get_DRMAA_implementation) """ If called before initialize(), this method returns a comma delimited list of DRMAA implementations, one element for each DRMAA implementation provided. If called after initialize(), this method returns the selected DRMAA implementation. The returned String is implementation dependent and may contain the DRM system as a component. """ version = SessionVersionAttribute() """ a Version object containing the major and minor version numbers of the DRMAA library. For DRMAA 1.0, major is 1 and minor is 0. """ def __init__(self, contactString=None): self.contactString = contactString # no return value
[docs] @staticmethod def initialize(contactString=None): """ Used to initialize a DRMAA session for use. :Parameters: contactString : string or None implementation-dependent string that may be used to specify which DRM system to use This method must be called before any other DRMAA calls. If contactString is None, the default DRM system is used, provided there is only one DRMAA implementation available. If there is more than one DRMAA implementation available, initialize() throws a NoDefaultContactStringSelectedException. initialize() should be called only once, by only one of the threads. The main thread is recommended. A call to initialize() by another thread or additional calls to initialize() by the same thread with throw a SessionAlreadyActiveException. """ py_drmaa_init(contactString)
# no return value
[docs] @staticmethod def exit(): """ Used to disengage from DRM. This routine ends the current DRMAA session but doesn't affect any jobs (e.g., queued and running jobs remain queued and running). exit() should be called only once, by only one of the threads. Additional calls to exit() beyond the first will throw a NoActiveSessionException. """ py_drmaa_exit()
# returns JobTemplate instance
[docs] @staticmethod def createJobTemplate(): """ Allocates a new job template. The job template is used to set the environment for jobs to be submitted. Once the job template has been created, it should also be deleted (via deleteJobTemplate()) when no longer needed. Failure to do so may result in a memory leak. """ return JobTemplate()
# takes JobTemplate instance, no return value
[docs] @staticmethod def deleteJobTemplate(jobTemplate): """ Deallocate a job template. :Parameters: jobTemplate : JobTemplate the job temptare to be deleted This routine has no effect on running jobs. """ jobTemplate.delete()
# takes JobTemplate instance, returns string
[docs] @staticmethod def runJob(jobTemplate): """ Submit a job with attributes defined in the job template. :Parameters: jobTemplate : JobTemplate the template representing the job to be run The returned job identifier is a String identical to that returned from the underlying DRM system. """ jid = create_string_buffer(128) c(drmaa_run_job, jid, sizeof(jid), jobTemplate) return jid.value.decode()
# takes JobTemplate instance and num values, returns string list
[docs] @staticmethod def runBulkJobs(jobTemplate, beginIndex, endIndex, step): """ Submit a set of parametric jobs, each with attributes defined in the job template. :Parameters: jobTemplate : JobTemplate the template representng jobs to be run beginIndex : int index of the first job endIndex : int index of the last job step : int the step between job ids The returned job identifiers are Strings identical to those returned from the underlying DRM system. The JobTemplate class defines a `JobTemplate.PARAMETRIC_INDEX` placeholder for use in specifying paths. This placeholder is used to represent the individual identifiers of the tasks submitted through this method. """ return list(run_bulk_job(jobTemplate, beginIndex, endIndex, step))
# takes string and JobControlAction value, no return value
[docs] @staticmethod def control(jobId, operation): """ Used to hold, release, suspend, resume, or kill the job identified by jobId. :Parameters: jobId : string if jobId is `Session.JOB_IDS_SESSION_ALL` then this routine acts on all jobs submitted during this DRMAA session up to the moment control() is called. The legal values for action and their meanings are operation : string possible values are: `JobControlAction.SUSPEND` stop the job `JobControlAction.RESUME` (re)start the job `JobControlAction.HOLD` put the job on-hold `JobControlAction.RELEASE` release the hold on the job `JobControlAction.TERMINATE` kill the job To avoid thread races in multithreaded applications, the DRMAA implementation user should explicitly synchronize this call with any other job submission calls or control calls that may change the number of remote jobs. This method returns once the action has been acknowledged by the DRM system, but does not necessarily wait until the action has been completed. Some DRMAA implementations may allow this method to be used to control jobs submitted external to the DRMAA session, such as jobs submitted by other DRMAA session in other DRMAA implementations or jobs submitted via native utilities. """ if isinstance(jobId, str): jobId = jobId.encode(ENCODING) c(drmaa_control, jobId, string_to_control_action(operation))
# takes string list, num value and boolean, no return value
[docs] @staticmethod def synchronize(jobIds, timeout=-1, dispose=False): """ Waits until all jobs specified by jobList have finished execution. :Parameters: jobIds If jobIds contains `Session.JOB_IDS_SESSION_ALL`, then this method waits for all jobs submitted during this DRMAA session up to the moment synchronize() is called timeout : int maximum time (in seconds) to be waited for the completion of a job. The value `Session.TIMEOUT_WAIT_FOREVER` may be specified to wait indefinitely for a result. The value `Session.TIMEOUT_NO_WAIT` may be specified to return immediately if no result is available. dispose : bool specifies how to treat the reaping of the remote job's internal data record, which includes a record of the job's consumption of system resources during its execution and other statistical information. If set to True, the DRM will dispose of the job's data record at the end of the synchronize() call. If set to False, the data record will be left for future access via the wait() method. It is the responsibility of the application to make sure that either `synchronize()` or `wait()`is called for every job. Not doing so creates a memory leak. Note that calling synchronize() with dispose set to true flushes all accounting information for all jobs in the list. To avoid thread race conditions in multithreaded applications, the DRMAA implementation user should explicitly synchronize this call with any other job submission calls or control calls that may change the number of remote jobs. If the call exits before the timeout has elapsed, all the jobs have been waited on or there was an interrupt. If the invocation exits on timeout, an ExitTimeoutException is thrown. The caller should check system time before and after this call in order to be sure of how much time has passed. """ if dispose: d = 1 else: d = 0 c(drmaa_synchronize, string_vector(jobIds), timeout, d)
# takes string and long, returns JobInfo instance
[docs] @staticmethod def wait(jobId, timeout=-1): """ Wait for a job with jobId to finish execution or fail. :Parameters: `jobId` : str The job id to wait completion for. If the special string, `Session.JOB_IDS_SESSION_ANY`, is provided as the jobId, this routine will wait for any job from the session `timeout` : float The timeout value is used to specify the desired behavior when a result is not immediately available. The value `Session.TIMEOUT_WAIT_FOREVER` may be specified to wait indefinitely for a result. The value `Session.TIMEOUT_NO_WAIT` may be specified to return immediately if no result is available. Alternatively, a number of seconds may be specified to indicate how long to wait for a result to become available This routine is modeled on the wait3 POSIX routine. If the call exits before timeout, either the job has been waited on successfully or there was an interrupt. If the invocation exits on timeout, an `ExitTimeoutException` is thrown. The caller should check system time before and after this call in order to be sure how much time has passed. The routine reaps job data records on a successful call, so any subsequent calls to wait() will fail, throwing an `InvalidJobException`, meaning that the job's data record has been already reaped. This exception is the same as if the job were unknown. (The only case where wait() can be successfully called on a single job more than once is when the previous call to wait() timed out before the job finished.) """ stat = c_int() jid_out = create_string_buffer(128) rusage = pointer(POINTER(drmaa_attr_values_t)()) if isinstance(jobId, str): jobId = jobId.encode(ENCODING) c(drmaa_wait, jobId, jid_out, sizeof(jid_out), byref(stat), timeout, rusage) res_usage = adapt_rusage(rusage) exited = c_int() c(drmaa_wifexited, byref(exited), stat) aborted = c_int() c(drmaa_wifaborted, byref(aborted), stat) signaled = c_int() c(drmaa_wifsignaled, byref(signaled), stat) coredumped = c_int() if exited.value == 0: c(drmaa_wcoredump, byref(coredumped), stat) exit_status = c_int() c(drmaa_wexitstatus, byref(exit_status), stat) term_signal = create_string_buffer(SIGNAL_BUFFER) if signaled.value == 1: c(drmaa_wtermsig, term_signal, sizeof(term_signal), stat) return JobInfo(jid_out.value.decode(), bool(exited), bool(signaled), term_signal.value.decode(), bool(coredumped), bool(aborted), int(exit_status.value), res_usage)
# takes string, returns JobState instance
[docs] @staticmethod def jobStatus(jobId): """ returns the program status of the job identified by jobId. The possible values returned from this method are: * `JobState.UNDETERMINED`: process status cannot be determined, * `JobState.QUEUED_ACTIVE`: job is queued and active, * `JobState.SYSTEM_ON_HOLD`: job is queued and in system hold, * `JobState.USER_ON_HOLD`: job is queued and in user hold, * `JobState.USER_SYSTEM_ON_HOLD`: job is queued and in user and system hold, * `JobState.RUNNING`: job is running, * `JobState.SYSTEM_SUSPENDED`: job is system suspended, * `JobState.USER_SUSPENDED`: job is user suspended, * `JobState.DONE`: job finished normally, and * `JobState.FAILED`: job finished, but failed. The DRMAA implementation should always get the status of the job from the DRM system unless the status has already been determined to be FAILED or DONE and the status has been successfully cached. Terminated jobs return a FAILED status. """ status = c_int() if isinstance(jobId, str): jobId = jobId.encode(ENCODING) c(drmaa_job_ps, jobId, byref(status)) return status_to_string(status.value)
def __enter__(self): """Context manager enter function""" self.initialize(self.contactString) return self def __exit__(self, *_): """Context manager exit function.""" self.exit() return False