Source code for bumps.dream.varplot

"""
Build layout for histogram plots
"""

__all__ = ['var_plot_size', 'plot_vars', 'plot_var']

from math import ceil, sqrt

import numpy as np
from matplotlib import pyplot as plt

# Set space between plots in horiz and vert.
H_SPACE = 0.2
V_SPACE = 0.2

# Set top, bottom, left margins.
T_MARGIN = 0.2
B_MARGIN = 0.2
L_MARGIN = 0.2
R_MARGIN = 0.4

# Set desired plot sizes.
TILE_W = 3.0
TILE_H = 2.0
CBAR_WIDTH = 0.75


[docs] def var_plot_size(n): ncol, nrow = tile_axes_square(n) # Calculate total width and figure size plots_width = (TILE_W+H_SPACE)*ncol figwidth = plots_width + CBAR_WIDTH + L_MARGIN + R_MARGIN figheight = (TILE_H+V_SPACE)*nrow + T_MARGIN + B_MARGIN return figwidth, figheight
def _make_var_axes(n): """ Build a figure with one axis per parameter, and one axis (the last one) to contain the colorbar. Use to make the vars histogram figure. """ fig = plt.gcf() fig.clf() total_width, total_height = fig.get_size_inches() ncol, nrow = tile_axes_square(n) # Calculate dimensions as a faction of figure size. v_space_f = V_SPACE/total_height h_space_f = H_SPACE/total_width t_margin_f = T_MARGIN/total_height b_margin_f = B_MARGIN/total_height l_margin_f = L_MARGIN/total_width top = 1 - t_margin_f+v_space_f left = l_margin_f tile_h = (total_height - T_MARGIN - B_MARGIN)/nrow - V_SPACE tile_w = (total_width - L_MARGIN - R_MARGIN - CBAR_WIDTH)/ncol - H_SPACE tile_h_f = tile_h/total_height tile_w_f = tile_w/total_width # Calculate colorbar location (left, bottom) and colorbar height. l_cbar_f = l_margin_f + ncol*(tile_w_f+h_space_f) b_cbar_f = b_margin_f + v_space_f cbar_w_f = CBAR_WIDTH/total_width cbar_h_f = 1 - t_margin_f - b_margin_f - v_space_f cbar_box = [l_cbar_f, b_cbar_f, cbar_w_f, cbar_h_f] k = 0 for j in range(1, nrow+1): for i in range(0, ncol): if k >= n: break dims = [left + i*(tile_w_f+h_space_f), top - j*(tile_h_f+v_space_f), tile_w_f, tile_h_f] ax = fig.add_axes(dims) ax.set_facecolor('none') k += 1 fig.add_axes(cbar_box) #fig.set_size_inches(total_width, total_height) return fig def tile_axes_square(n): """ Determine number of columns by finding the next greatest square, then determine number of rows needed. """ cols = int(ceil(sqrt(n))) rows = int(ceil(n/float(cols))) return cols, rows
[docs] def plot_vars(draw, all_vstats, **kw): n = len(all_vstats) fig = _make_var_axes(n) cbar = _make_fig_colorbar(draw.logp) for k, vstats in enumerate(all_vstats): fig.sca(fig.axes[k]) plot_var(draw, vstats, k, cbar, **kw)
[docs] def plot_var(draw, vstats, var, cbar, nbins=30): values = draw.points[:, var].flatten() bin_range = vstats.p95_range #bin_range = np.min(values), np.max(values) _make_logp_histogram(values, draw.logp, nbins, bin_range, draw.weights, cbar) _decorate_histogram(vstats)
def _decorate_histogram(vstats): import pylab from matplotlib.transforms import blended_transform_factory as blend l95, h95 = vstats.p95_range l68, h68 = vstats.p68_range # Shade things inside 1-sigma pylab.axvspan(l68, h68, color='gold', alpha=0.5, zorder=-1) # build transform with x=data, y=axes(0,1) ax = pylab.gca() transform = blend(ax.transData, ax.transAxes) def marker(symbol, position): if position < l95: symbol, position, ha = '<'+symbol, l95, 'left' elif position > h95: symbol, position, ha = '>'+symbol, h95, 'right' else: symbol, position, ha = symbol, position, 'center' pylab.text(position, 0.95, symbol, va='top', ha=ha, transform=transform, zorder=3, color='g') #pylab.axvline(v) marker('|', vstats.median) marker('E', vstats.mean) marker('*', vstats.best) pylab.text(0.01, 0.95, vstats.label, zorder=2, backgroundcolor=(1, 1, 0, 0.2), verticalalignment='top', horizontalalignment='left', transform=pylab.gca().transAxes) ax.set_yticklabels([]) def _make_fig_colorbar(logp): import matplotlib as mpl import pylab # Option 1: min to min + 4 #vmin=-max(logp); vmax=vmin+4 # Option 1b: min to min log10(num samples) #vmin=-max(logp); vmax=vmin+log10(len(logp)) # Option 2: full range of best 98% snllf = pylab.sort(-logp) vmin, vmax = snllf[0], snllf[int(0.98*(len(snllf)-1))] # robust range # Option 3: full range #vmin,vmax = -max(logp),-min(logp) fig = pylab.gcf() ax = fig.axes[-1] cmap = mpl.cm.copper # Set the colormap and norm to correspond to the data for which # the colorbar will be used. norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax) # ColorbarBase derives from ScalarMappable and puts a colorbar # in a specified axes, so it has everything needed for a # standalone colorbar. There are many more kwargs, but the # following gives a basic continuous colorbar with ticks # and labels. class MinDigitsFormatter(mpl.ticker.Formatter): def __init__(self, low, high): self.delta = high - low def __call__(self, x, pos=None): return format_value(x, self.delta) ticks = () #(vmin, vmax) formatter = MinDigitsFormatter(vmin, vmax) cbar = mpl.colorbar.ColorbarBase( ax, cmap=cmap, norm=norm, ticks=ticks, format=formatter, orientation='vertical') #cb.set_ticks(ticks) #cb.set_ticklabels(labels) #cb.set_label('negative log likelihood') cbar_box = ax.get_position().bounds fig.text(cbar_box[0],cbar_box[1], '{:.3G}'.format(vmin), va='top') fig.text(cbar_box[0], cbar_box[1]+cbar_box[3], '{:.3G}'.format(vmax), va='bottom') return cbar def _make_logp_histogram(values, logp, nbins, ci, weights, cbar): from numpy import (ones_like, searchsorted, linspace, cumsum, diff, unique, argsort, array, hstack, exp) if weights is None: weights = ones_like(logp) # TODO: values are being sorted to collect stats and again to plot idx = argsort(values) values, weights, logp = values[idx], weights[idx], logp[idx] #open('/tmp/out','a').write("ci=%s, range=%s\n" # % (ci,(min(values),max(values)))) edges = linspace(ci[0], ci[1], nbins+1) idx = searchsorted(values[1:-1], edges) #weightsum = cumsum(weights) #heights = diff(weightsum[idx])/weightsum[-1] # normalized weights import pylab edgecolors = None cmap = cbar.cmap cmap_edges = linspace(0, 1, cmap.N+1)[1:-1] bins = [] # marginalized maximum likelihood for s, e, xlo, xhi \ in zip(idx[:-1], idx[1:], edges[:-1], edges[1:]): if s == e: continue # parameter interval endpoints x = array([xlo, xhi], 'd') # -logp values within interval, with sort index from low to high pv = -logp[s:e] pidx = argsort(pv) pv = pv[pidx] # weights for samples within interval, sorted pw = weights[s:e][pidx] # vertical colorbar top edges is the cumulative sum of the weights y_top = cumsum(pw) # For debugging compare with one rectangle per sample if False: import matplotlib as mpl cmap = mpl.cm.flag edgecolors = 'k' xmid = (xlo+xhi)/2 x = [xlo, xmid] y = hstack((0, y_top)) z = pv[:, None] pylab.pcolormesh(x, y, z, norm=cbar.norm, cmap=cmap) x = [xmid, xhi] # Possibly millions of samples, so group those which have the # same colour instead of drawing each as its own rectangle. # # Norm the values then look up the colormap edges in the sorted # normed negative log probabilities. Drop duplicates, which # represent zero-width bars. Assign the value for each interval # according to the value at the change point. # # The indexing logic is very ugly. The searchsorted() function # returns 0 if before the first or N if after the last, so the # end points of the range are dropped so that there is and implicit # [-inf, ... interior points ..., inf] range. Similarly, colours # below vmin go to vmin and above vmax go to vmax, so drop those # end points as well. Then put the end points back on in the # found indices [0, ... interior edges ..., N-1]. Use the value # at the end of the boundary to colour the section. # Something is not quite right: with this algorithm the first # block appears to always be one element long and is often the # same colour as the next block. This is only visible if edges # are drawn so ignore it for now. change_point = searchsorted(cbar.norm(pv[1:-1]), cmap_edges) tops = unique(hstack((change_point, len(pv)-1))) y = hstack((0, y_top[tops])) z = pv[tops][:, None] pylab.pcolormesh( x, y, z, norm=cbar.norm, cmap=cmap, edgecolors=edgecolors) # centerpoint, histogram height, maximum likelihood for each bin bins.append(((xlo+xhi)/2, y_top[-1], exp(cbar.norm.vmin-pv[0]))) # Check for broken distribution if not bins: return centers, height, maxlikelihood = array(bins).T # Normalize maximum likelihood plot so it contains the same area as the # histogram, unless it is really spikey, in which case make sure it has # about the same height as the histogram. maxlikelihood *= np.sum(height)/np.sum(maxlikelihood) hist_peak = np.max(height) ml_peak = np.max(maxlikelihood) if ml_peak > hist_peak*1.3: maxlikelihood *= hist_peak*1.3/ml_peak pylab.plot(centers, maxlikelihood, '-g') ## plot marginal gaussian approximation along with histogram #def G(x, mean, std): # return np.exp(-((x-mean)/std)**2/2)/np.sqrt(2*np.pi*std**2) ## TODO: use weighted average for standard deviation #mean, std = np.average(values, weights=weights), np.std(values, ddof=1) #pdf = G(centers, mean, std) #pylab.plot(centers, pdf*np.sum(height)/np.sum(pdf), '-b') def _make_var_histogram(values, logp, nbins, ci, weights): # Produce a histogram hist, bins = np.histogram(values, bins=nbins, range=ci, #new=True, density=True, weights=weights) # Find the max likelihood for values in each bin edges = np.searchsorted(values, bins) histbest = [np.max(logp[edges[i]:edges[i+1]]) if edges[i] < edges[i+1] else -np.inf for i in range(nbins)] # scale to marginalized probability with peak the same height as hist histbest = np.exp(np.asarray(histbest) - max(logp)) * np.max(hist) import pylab # Plot the histogram pylab.bar(bins[:-1], hist, width=bins[1]-bins[0]) # Plot the kernel density estimate #density = KDE1D(values) #x = linspace(bins[0],bins[-1],100) #pylab.plot(x, density(x), '-k') # Plot the marginal maximum likelihood centers = (bins[:-1]+bins[1:])/2 pylab.plot(centers, histbest, '-g')