# Copyright (C) 2019 Collin Capano
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""EPSIE is a toolkit for doing MCMCs using embarrasingly parallel Markov
chains.
"""
# get the version number
from ._version import __version__
import os
import sys
import pickle
from io import BytesIO
import logging
import numpy
from numpy.random import (PCG64, SeedSequence)
#
# =============================================================================
#
# Random number generation utilities
#
# =============================================================================
#
# The bit generator used for all random number generation.
# Users may change this, but it has to be something recognized by numpy.random
BIT_GENERATOR = PCG64
[docs]def create_seed(seed=None):
"""Creates a seed for a :py:class:`numpy.random.SeedSequence`.
Parameters
----------
seed : int, optional
If a seed is given, will just return it. Default is None, in which case
a seed is created.
Returns
-------
int :
A seed to use.
"""
if seed is None:
seed = SeedSequence().entropy
logging.debug("Using seed: %i", seed)
return seed
[docs]def create_bit_generator(seed=None, stream=0):
"""Creates an instance of a ``BIT_GENERATOR``.
Parameters
----------
seed : int, optional
The seed to use. If seed is None (the default), will create a seed
using :py:func:`create_seed`.
stream : int, optional
The stream to create the bit generator for. This allows multiple
generators to exist with the same seed, but that produce different sets
of random numbers. Default is 0.
Returns
-------
BIT_GENERATOR :
The bit generator initialized with the given seed and stream.
"""
# create the seed sequence
seedseq = SeedSequence(create_seed(seed))
if stream > 0:
seedseq = seedseq.spawn(stream+1)[stream]
return BIT_GENERATOR(seedseq)
[docs]def create_bit_generators(ngenerators, seed=None):
r"""Creates a collection of random bit generators.
The bit generators are different streams with the same seed. They are all
statistically independent of each other, while still being reproducable.
Parameters
----------
ngenerators : int
The number of generators to create. Must be :math:`\geq` 1.
seed : int, optional
The seed to use. If none provided, will generate one using the system
entropy.
Returns
-------
list :
List of ``BIT_GENERATOR``.
"""
if ngenerators < 1:
raise ValueError("ngenerators must be >= 1")
seeds = SeedSequence(create_seed(seed)).spawn(ngenerators)
return list(map(BIT_GENERATOR, seeds))
#
# =============================================================================
#
# Array utilities
#
# =============================================================================
#
[docs]def array2dict(array):
"""Converts a structured array into a dictionary."""
fields = array.dtype.names # raises an AttributeError if array is None
if fields is None:
# not a structred array, just return
return array
return {f: _getatomic(array[f]) for f in fields}
def _getatomic(val):
"""Checks if a given value is numpy scalar. If so, it returns
the value as its native python type.
"""
if isinstance(val, numpy.ndarray) and val.size == 1 and val.ndim == 0:
val = val.item(0)
return val
#
# =============================================================================
#
# Parallel tempering utilities
#
# =============================================================================
#
[docs]def make_betas_ladder(ntemps, maxtemp):
"""Makes a log spaced ladder of betas."""
minbeta = 1./maxtemp
return numpy.geomspace(minbeta, 1., num=ntemps)
#
# =============================================================================
#
# Checkpointing utilities
#
# =============================================================================
#
[docs]def dump_state(state, fp, path=None, dsetname='sampler_state', protocol=None):
"""Dumps the given state to an hdf5 file handler.
The state is stored as a raw binary array to ``{path}/{dsetname}`` in the
given hdf5 file handler. If a dataset with the same name and path is
already in the file, the dataset will be resized and overwritten with the
new state data.
Parameters
----------
state : any picklable object
The sampler state to dump to file. Can be the object returned by
any of the samplers' `.state` attribute (a dictionary of dictionaries),
or any picklable object.
fp : h5py.File
An open hdf5 file handler. Must have write capability enabled.
path : str, optional
The path (group name) to store the state dataset to. Default (None)
will result in the array being stored to the top level.
dsetname : str, optional
The name of the dataset to store the binary array to. Default is
``sampler_state``.
protocol : int, optional
The protocol version to use for pickling. See the :py:mod:`pickle`
module for more details.
"""
memfp = BytesIO()
pickle.dump(state, memfp, protocol=protocol)
dump_pickle_to_hdf(memfp, fp, path=path, dsetname=dsetname)
[docs]def dump_pickle_to_hdf(memfp, fp, path=None, dsetname='sampler_state'):
"""Dumps pickled data to an hdf5 file object.
Parameters
----------
memfp : file object
Bytes stream of pickled data.
fp : h5py.File
An open hdf5 file handler. Must have write capability enabled.
path : str, optional
The path (group name) to store the state dataset to. Default (None)
will result in the array being stored to the top level.
dsetname : str, optional
The name of the dataset to store the binary array to. Default is
``sampler_state``.
"""
memfp.seek(0)
bdata = numpy.frombuffer(memfp.read(), dtype='S1')
if path is not None:
fp = fp[path]
if dsetname not in fp:
fp.create_dataset(dsetname, shape=bdata.shape, maxshape=(None,),
dtype=bdata.dtype)
elif bdata.size != fp[dsetname].shape[0]:
fp[dsetname].resize((bdata.size,))
fp[dsetname][:] = bdata
[docs]def load_state(fp, path=None, dsetname='sampler_state'):
"""Loads a sampler state from the given hdf5 file object.
The sampler state is expected to be stored as a raw bytes array which can
be loaded by pickle.
Parameters
----------
fp : h5py.File
An open hdf5 file handler.
path : str, optional
The path (group name) that the state data is stored to. Default (None)
is to read from the top level.
dsetname : str, optional
The name of the dataset that the state data is stored to. Default is
``sampler_state``.
"""
if path is not None:
fp = fp[path]
bdata = fp[dsetname][()].tobytes()
return pickle.load(BytesIO(bdata))