Source code for prms_python.simulation

# -*- coding: utf-8 -*-
"""
simulation.py -- Contains ``Simulation`` and ``SimulationSeries`` classes and 
associated functions for managing PRMS simulations at a low level.
"""

from __future__ import print_function
import glob
import multiprocessing as mp
import os
import shutil
import subprocess
import time

from .data import Data
from .parameters import Parameters
from .util import load_statvar

OPJ = os.path.join

[docs]class SimulationSeries(object):
    '''
    Series of simulations all to be run through a common interface. 
    
    Utilizes :class:`multiprocessing.Pool` class to parallelize the 
    execution of series of PRMS simulations. SimulationSeries also
    allows the user to define the PRMS executable command which is 
    set to "prms" as default. It is best to add the prms executable to 
    your $PATH environment variable. Each simulation that is run through
    ``SimulationSeries`` will follow the strict file structure as defined
    by :func:`Simulation.run()`. This class is useful particularly for
    creating new programatic workflows not provided yet by PRMS-Python.

    Arguments:
        simulations (list or tuple): list of :class:`Simulation` objects
            to be run.

    Example:
        Lets say you have already created a series of PRMS models by modifying
        the input climatic forcing data, e.g. you have 100 *data* files and
        you want to run each using the same *control* and *parameters* file.
        For simplicity lets say there is a directory that contains all 100
        *data* files e.g. data1, data2, ... or whatever they are named and 
        nothing else. This example also assumes that you want each simulation 
        to be run and stored in directories named after the *data* files as 
        shown.

        >>> data_dir = 'dir_that_contains_all_data_files'
        >>> params = Parameters('path_to_parameter_file')
        >>> control_path = 'path_to_control'
        >>> # a list comprehension to make multiple simulations with
        >>> # different data files, alternatively you could use a for loop
        >>> sims = [
                    Simulation.from_data
                      (  
                        Data(data_file),
                        params,
                        control_path,
                        simulation_dir='sim_{}'.format(data_file)
                      )
                    for data_file in os.listdir(data_dir)
                    ]

        Next we can use ``SimulationSeries`` to run all of these 
        simulations in parrallel. For example we may use 8 logical cores
        on a common desktop computer.

        >>> sim_series = SimulationSeries(sims)
        >>> sim_series.run(nprocs=8)

        The ``SimulationSeries.run()`` method will run all 100 simulations
        where chunks of 8 at a time will be run in parrallel. Inputs and 
        outputs of each simulation will be sent to each simulation's
        ``simulation_dir`` following the file structure of 
        :func:`Simulation.run()`.

    Note:
        The :class:`Simulation` and :class:`SimulationSeries` classes
        are low-level in that they alone do not create metadata for
        PRMS simulation scenarios. In other words they do not produce
        any additional files that may help the user know what differs
        between individual simulations. 
        
    '''

    def __init__(self, simulations):
        self.series = list(simulations)

[docs]    def run(self, prms_exec='prms', nproc=None):
        """
        Method to run multiple :class:`Simulation` objects in parrallel.

        Keyword Arguments:
            prms_exec (str): name of PRMS executable on $PATH or path to 
                executable
            nproc (int or None): number of logical or physical processors
                for parrallel execution of PRMS simulations.  

        Example:
            see :class:`SimulationSeries`

        Note:
            If ``nproc`` is not assigned the deault action is to use half
            of the available processecors on the machine using the Python
            :mod:`multiprocessing` module. 

        """
        if not nproc:
            nproc = mp.cpu_count() // 2 

        pool = mp.Pool(processes=nproc)
        pool.map(_simulation_runner, self.series)
        pool.close() 

        return self

[docs]    def outputs_iter(self):
        '''
        Return a :class:`generator` of directories with the path to the 
        ``simulation_dir`` as well as paths to the *statvar.dat* output 
        file, and *data* and *parameters* input files used in the simulation.

        Yields:
            :obj:`dict`: dictionary of paths to simulation directory,
                input, and output files.

        Example:
            >>> ser = SimulationSeries(simulations)
            >>> ser.run()
            >>> g = ser.outputs_iter()

            Would return something like

            >>> print(g.next())
                {
                   'simulation_dir': 'path/to/sim/', 
                   'statvar': 'path/to/statvar', 
                   'data': 'path/to/data', 
                   'parameters': 'path/to/parameters'
                 }
        '''
        dirs = list(s.simulation_dir for s in self.series)
        print(dirs)

        return (
            {
                'simulation_dir': d,
                'statvar': OPJ(d, 'outputs', 'statvar.dat'),
                'data': OPJ(d, 'inputs', 'data'),
                'parameters': OPJ(d, 'inputs', 'parameters')
            }
            for d in dirs
        )        

    def __len__(self):
        return len(list(self.outputs_iter()))


def _simulation_runner(sim):
    sim.run(prms_exec='prms')


[docs]class Simulation(object):
    """
    Class that runs and manages file structure for a single PRMS simulation.
    
    The ``Simulation`` class provides low-level managment of a PRMS simulation
    by copying model input files from ``input_dir`` argument to an output dir
    ``simulation_dir``. The file stucture for an individual simulation after
    calling the ``run`` method is simple, two subdirectories "inputs" and 
    "outputs" are created under ``simulation_dir`` and the respective input
    and output files from the current PRMS simulation are transfered there after
    the ``Simulation.run()`` method is called which executes the PRMS model,
    (see examples below in :func:`Simulation.run`).

    A ``Simulation`` instance checks that all required PRMS inputs (control, 
    parameters, data) exist in the expected locations. If simulation_dir is 
    provided and does not exist, it will be created. If it does exist it will 
    be overwritten.
 
    Keyword Arguments:
        input_dir (str): path to directory that contains control, parameter, 
            and data files for the simulation
        simulation_dir (str): directory path to bundle inputs and outputs
       
    Example:
        see :func:`Simulation.run()`

    Raises:
        RuntimeError: if input directory does not contain a PRMS *data*,
            *parameters*, and *control* file.

    """
    def __init__(self, input_dir=None, simulation_dir=None):
        # check if model input paths exist
        idir = input_dir
        self.input_dir = idir
        self.simulation_dir = simulation_dir
        if idir is not None:
            self.control_path = os.path.join(idir, 'control')
            self.parameters_path = os.path.join(idir, 'parameters')
            self.data_path = os.path.join(idir, 'data')

            if not os.path.exists(self.control_path):
                raise RuntimeError('Control file missing from ' + idir)

            if not os.path.exists(self.parameters_path):
                raise RuntimeError('Parameter file missing from ' + idir)

            if not os.path.exists(self.data_path):
                raise RuntimeError('Data file missing from ' + idir)
            # build output (simulation_dir) and move input files there
            if simulation_dir is not None:
                self.simulation_dir = simulation_dir
                if simulation_dir and simulation_dir != input_dir:
                    
                    if os.path.exists(simulation_dir):
                        shutil.rmtree(simulation_dir)

                    os.mkdir(simulation_dir)

                    shutil.copy(self.control_path, simulation_dir)
                    shutil.copy(self.data_path, simulation_dir)
                    shutil.copy(self.parameters_path, simulation_dir)

                    self.control_path = os.path.join(simulation_dir, 'control')
                    self.parameters_path = os.path.join(simulation_dir,
                                                        'parameters')
                    self.data_path = os.path.join(simulation_dir, 'data')

        else:
            self.control_path = None
            self.parameters_path = None
            self.data_path = None
            self.simulation_dir = None

        self.has_run = False

[docs]    @classmethod
    def from_data(cls, data, parameters, control_path, simulation_dir):
        '''
        Create a ``Simulation`` from a :class:`Data` and :class:`Parameter` object,
        plus a path to the *control* file, and providing a ``simulation_dir`` 
        where the simulation should be run.

        Arguments:
            data (:class:`Data`): ``Data`` object for simulation
            parameters (:class:`Parameters`): ``Parameters`` object for simulation 
            control_path (str): path to control file
            simulation_dir (str): path to directory where simulations will be
                run and where input and output will be stored. If it exists it will 
                be overwritten.

        Returns:
            :class:`Simulation` ready to be run using ``simulation_dir`` for
                inputs and outputs

        Example:

            >>> d = Data('path_to_data_file')
            >>> p = Parameters('path_to_parameters_file')
            >>> c = 'path_to_control_file'
            >>> sim_dir = 'path_to_create_simulation'
            >>> sim = Simulation.from_data(d, p, c, sim_dir)
            >>> sim.run()

        Raises:
            TypeError: if ``data`` and ``parameters`` arguments are not of type
                :class:`Data` and :class:`Parameters`
        '''

        if not isinstance(data, Data):
            raise TypeError('data must be instance of Data')

        if not isinstance(parameters, Parameters):
            raise TypeError('parameters must be instance of Parameters, not '\
                             + str(type(parameters)))

        if os.path.exists(simulation_dir):
            shutil.rmtree(simulation_dir)

        os.makedirs(simulation_dir)

        sim = cls()
        sim.simulation_dir = simulation_dir

        sd = simulation_dir

        data_path = OPJ(sd, 'data')
        data.write(data_path)
        params_path = OPJ(sd, 'parameters')
        parameters.write(params_path)
        shutil.copy(control_path, OPJ(sd, 'control'))

        return sim

[docs]    def run(self, prms_exec='prms'):
        """
        Run a ``Simulation`` instance using PRMS input files from ``input_dir`` 
        and copy to the ``Simulation`` file structure under ``simulation_dir`` if
        given, otherwise leave PRMS input output unstructured and in ``input_dir``

        This method runs a single PRMS simulation from a ``Simulation`` instance,
        waits until the process has completed and then transfers model input and 
        output files to respective newly created directories. See example of the 
        file structure that is created under different workflows of the ``run``
        method below.

        Keyword Arguments:
            prms_exec (str): name of PRMS executable on $PATH or path to executable
        
        Examples:
            If we create a :class:`Simulation` instance by only assigning the 
            ``input_dir`` argument and call its ``run`` method the model will be
            run in the ``input_dir`` and all model input and output files will
            remain in ``input_dir``,
    
            >>> import os
            >>> input_dir = os.path.join(
                                          'PRMS-Python',
                                          'prms_python',
                                          'models',
                                          'lbcd'
                                        )
            >>> os.listdir(input_dir)
                ['data',
                 'data_3deg_upshift',
                 'parameters',
                 'parameters_adjusted',
                 'control']
            >>> sim = Simulation(input_dir)
            >>> sim.run()
            >>> os.listdir(input_dir) # all input and outputs in input_dir
                ['data',
                 'data_3deg_upshift',
                 'parameters',
                 'parameters_adjusted',
                 'control',
                 'statvar.dat',
                 'prms_ic.out',
                 'prms.out' ]
            
            Instead if we assigned a path for ``simulation_dir`` keyword 
            argument and then called ``run``, i.e. 
            
            >>> sim = Simulation(input_dir, 'path_simulation')
            >>> sim.run()
    
        the files structure for the PRMS simulation created by ``Simulation.run()`` 
        would be::
             
             path_simulation                             
             ├── inputs
             │   ├── control
             │   ├── data
             │   └── parameters
             └── outputs
                 ├── data_3deg_upshift
                 ├── parameters_adjusted
                 ├── prms_ic.out
                 ├── prms.out
                 └── statvar.dat

        Note:
            As shown in the last example, currently the ``Simulation.run`` routine only
            recognizes the *data*, *parameters*. and *control* file as PRMS inputs,
            all other files found in ``input_dir`` before *and* after normal completion 
            of the PRMS simulation will be transferred to ``simulation_dir/outputs/``. 
        """
        cwd = os.getcwd()

        if self.simulation_dir:
            os.chdir(self.simulation_dir)

        else:
            os.chdir(self.input_dir)

        p = subprocess.Popen(
            prms_exec + ' control', shell=True, stdout=subprocess.PIPE,
            stderr=subprocess.PIPE
        )

        prms_finished = False
        checked_once = False
        while not prms_finished:

            if not checked_once:
                p.communicate()
                checked_once = True

            poll = p.poll()
            prms_finished = poll >= 0

        self.has_run = True
        # avoid too many files open error
        p.stdout.close()
        p.stderr.close()

        if self.simulation_dir:
            os.mkdir('inputs')
            os.mkdir('outputs')
            shutil.move('data', 'inputs')
            shutil.move('parameters', 'inputs')
            shutil.move('control', 'inputs')

            # all remaining files are outputs
            for g in glob.glob('*'):
                if not os.path.isdir(g):
                    shutil.move(g, 'outputs')

        os.chdir(cwd)
Navigation

Related Topics

Donate/support

Source code for prms_python.simulation