Using a chain
The Chain
class is central to sampling in epsie. Both the
Metropolis-Hastings sampler and the parallel tempered sampler use
Chain
to represent each of the parallel chains (the parallel
tempered sampler uses a parallel tempered chain, which itself is just a
collection of Chain
instances). A Chain
is also the thing that
is passed to proposals’ update function.
In this notebook we demonstrate setting up and running a chain. Generally, you do not interact with chains directly, but rather with the sampler classes. However, this will give a better sense of what is going on under the hood, and what information is available to proposals when they are updated.
%matplotlib notebook
from matplotlib import pyplot
import numpy
from epsie.proposals import Normal
from epsie.chain import Chain
# simple model for testing
from scipy import stats
class Model(object):
def __init__(self):
# we'll use a 2D Gaussian for the likelihood distribution
self.params = ['x', 'y']
self.mean = [2., 5.]
self.cov = [[1., 0.], [0., 2.]]
self.likelihood_dist = stats.multivariate_normal(mean=self.mean,
cov=self.cov)
# we'll just use a uniform prior
self.prior_bounds = {'x': (-20., 20.),
'y': (-20., 20.)}
xmin = self.prior_bounds['x'][0]
dx = self.prior_bounds['x'][1] - xmin
ymin = self.prior_bounds['y'][0]
dy = self.prior_bounds['y'][1] - ymin
self.prior_dist = {'x': stats.uniform(xmin, dx),
'y': stats.uniform(ymin, dy)}
def prior_rvs(self, size=None):
return {p: self.prior_dist[p].rvs(size=size)
for p in self.params}
def logprior(self, **kwargs):
return sum([self.prior_dist[p].logpdf(kwargs[p]) for p in self.params])
def loglikelihood(self, **kwargs):
return self.likelihood_dist.logpdf([kwargs[p] for p in self.params])
def __call__(self, **kwargs):
logp = self.logprior(**kwargs)
if logp == -numpy.inf:
logl = None
else:
logl = self.loglikelihood(**kwargs)
return logl, logp
model = Model()
# just use a normal distribution for the proposals (Metropolish-Hastings algorithm)
proposal = Normal(['x', 'y'])
# create the chain; we'll use a seed to make the results deterministic
chain = Chain(model.params, model, [proposal], bit_generator=100)
# set the starting positions
chain.start_position = model.prior_rvs()
# evolve the chain for 1000 steps
for _ in range(1000):
chain.step()
# plot the samples
fig, (ax1, ax2) = pyplot.subplots(2)
ax1.plot(chain.positions['x'])
ax2.plot(chain.positions['y'])
# the true values
ax1.axhline(model.mean[0], color='r', ls='--')
ax2.axhline(model.mean[1], color='r', ls='--')
ax1.set_ylabel('x')
ax2.set_ylabel('y')
ax2.set_xlabel('iteration')
fig.show()
<IPython.core.display.Javascript object>
# plot the acceptance ratios (clipped off at 1)
fig, ax = pyplot.subplots()
ar = chain.acceptance['acceptance_ratio']
ar[ar > 1.] = 1.
ax.plot(ar)
ax.set_xlabel('iteration')
ax.set_ylabel('acceptance ratio')
fig.show()
<IPython.core.display.Javascript object>
# plot the positions, colored by their log posterior
fig, ax = pyplot.subplots()
s = ax.scatter(chain.positions['x'], chain.positions['y'], c=chain.stats['logl']+chain.stats['logp'])
c = fig.colorbar(s)
# the true values
ax.axvline(model.mean[0], color='r')
ax.axhline(model.mean[1], color='r')
ax.set_xlabel('x')
ax.set_ylabel('y')
c.set_label('$\log p(d|h)p(h)$')
fig.show()
<IPython.core.display.Javascript object>