Source code for bumps.pdfwrapper

"""
Build a bumps model from a function.

The :class:`PDF` class uses introspection to convert a negative log
likelihood function nllf(m1,m2,...) into a :class:`bumps.fitproblem.Fitness`
class that has fittable parameters m1, m2, ....

There is no attempt to manage data or uncertainties, except that an
additional plot function can be provided to display the current value
of the function in whatever way is meaningful.

The note regarding user defined functions in :mod:`bumps.curve` apply
here as well.
"""

import inspect
from dataclasses import dataclass
from typing import Callable, Dict

import numpy as np

from .parameter import Parameter, ValueProtocol
from .fitproblem import Fitness, CovarianceMixin
from .bounds import init_bounds



[docs]
@dataclass(init=False, eq=False)
class PDF(CovarianceMixin):
    """
    Build a model from a function.

    This model can be fitted with any of the bumps optimizers.

    *fn* is a function that returns the negative log likelihood of seeing
    its input parameters.

    The fittable parameters are derived from the parameter names in the
    function definition, with *name* prepended to each parameter.

    The optional *plot* function takes the same arguments as *fn*, with an
    additional *view* argument which may be set from the bumps command
    line.  If provide, it should provide a visual indication of the
    function value and uncertainty on the current matplotlib.pyplot figure.

    Additional keyword arguments are treated as the initial values for
    the parameters, or initial ranges if par=(min,max).  Otherwise, the
    default is taken from the function definition (if the function uses
    par=value to define the parameter) or is set to zero if no default is
    given in the function.
    """

    fn: Callable
    name: str
    plotter: Callable
    dof: int
    pars: Dict[str, Parameter] = None  # Needs to be None initially for getattr/setattr to work

    has_residuals = False  # Don't have true residuals

    def __init__(self, fn, name="", plotter=None, dof=1, pars=None, **kw):
        self.dof = dof
        # Make every name a parameter; initialize the parameters
        # with the default value if function is defined with keyword
        # initializers; override the initializers with any keyword
        # arguments specified in the fit function constructor.
        labels, vararg, varkw, values, *_ = inspect.getfullargspec(fn)

        if vararg or varkw:
            raise TypeError("Function cannot have *args or **kwargs in declaration")
        # Parameters default to zero
        init = dict((p, 0.0) for p in labels)
        # If the function provides default values, use those
        if values:
            init.update(zip(labels[-len(values) :], values))
        # Regardless, use any values specified in the constructor, but first
        # check that they exist as function parameters.
        invalid = set(kw.keys()) - set(labels)
        if invalid:
            raise TypeError("Invalid initializers: %s" % ", ".join(sorted(invalid)))
        init.update(kw)

        # Build parameters out of ranges and initial values
        if pars is None:
            pars = dict((p, Parameter.default(init[p], name=name + p)) for p in labels)
        self.pars = pars

        # Remember the function, parameters, and number of parameters
        self.fn = fn
        self._labels = labels
        self.plotter = plotter
        self.name = name if name else "PDF"

    # Allow dot access to members of the parameter dictionary. Existing attributes
    # of the object take precedence.
    def __setattr__(self, key, value):
        # print(f"setting {key}")
        if self.pars and key in self.pars and key not in self.__dict__:
            if not isinstance(value, ValueProtocol):
                raise TypeError("Can only assign parameter or expression to a parameter slot")
            self.__dict__["pars"][key] = value
        else:
            super().__setattr__(key, value)

    def __getattr__(self, key):
        # print(f"getting {key}")
        if self.pars and key in self.pars:
            return self.pars[key]
        raise AttributeError(f"{type(self)!r} has no attribute {key!r}")


[docs]
    def parameters(self):
        return self.pars


    parameters.__doc__ = Fitness.parameters.__doc__


[docs]
    def nllf(self):
        kw = dict((p, getattr(self, p).value) for p in self._labels)
        return self.fn(**kw)


    nllf.__doc__ = Fitness.__call__.__doc__


[docs]
    def chisq(self):
        return 2 * self.nllf() / self.dof



[docs]
    def residuals(self):
        # A fake residuals vector of length 1.
        return np.sqrt([2 * self.nllf()])


    # chisq.__doc__ = Fitness.chisq.__doc__


[docs]
    def chisq_str(self):
        return "%g" % self.chisq()


    # chisq_str.__doc__ = Fitness.chisq_str.__doc__

    __call__ = chisq


[docs]
    def plot(self, view=None):
        if self.plotter:
            kw = dict((p, getattr(self, p).value) for p in self._labels)
            self.plotter(view=view, **kw)


    plot.__doc__ = Fitness.plot.__doc__


[docs]
    def numpoints(self):
        return len(self._labels) + 1


    numpoints.__doc__ = Fitness.numpoints.__doc__




[docs]
class VectorPDF(CovarianceMixin):
    """
    Build a model from a function.

    This model can be fitted with any of the bumps optimizers.

    *fn* is a function that returns the negative log likelihood of seeing
    its input parameters.

    Vector *p* of length *n* defines the initial value. Unlike :class:`PDF`,
    *VectorPDF* operates on a parameter vector *p* rather than individual
    parameters *p1*, *p2*, etc.  Default parameter values *p* must be
    provided in order to determine the number of parameters.

    *labels* are the names of the individual parameters.  If not present,
    the name for parameter *k* defaults to *pk*.  Each label is prefixed by
    *name*.

    The optional *plot* function takes the same arguments as *fn*, with an
    additional *view* argument which may be set from the bumps command
    line.  If provide, it should provide a visual indication of the
    function value and uncertainty on the current matplotlib.pyplot figure.

    Additional keyword arguments are treated as the initial values for
    the parameters, or initial ranges if par=(min,max).  Otherwise, the
    default is taken from the function definition (if the function uses
    par=value to define the parameter) or is set to zero if no default is
    given in the function.
    """

    has_residuals = False  # Don't have true residuals

    def __init__(self, fn, p, name="", plot=None, dof=1, labels=None, **kw):
        self.dof = dof
        if labels is None:
            labels = ["p" + str(k) for k, _ in enumerate(p)]
        init = dict(zip(labels, p))
        init.update(kw)

        # Build parameters out of ranges and initial values
        pars = dict((k, Parameter.default(init[k], name=name + k)) for k in labels)

        # Make parameters accessible as model attributes
        for k, v in pars.items():
            if hasattr(self, k):
                raise TypeError("Parameter cannot be named %s" % k)
            setattr(self, k, v)

        # Remember the function, parameters, and number of parameters
        self._function = fn
        self._labels = labels
        self._plot = plot
        self.name = name if name else "VectorPDF"


[docs]
    def parameters(self):
        # Note: need to refetch the pars from self in case the user assigned
        # model.par = new_parameter after creating the model.
        return dict((k, getattr(self, k)) for k in self._labels)


    parameters.__doc__ = Fitness.parameters.__doc__


[docs]
    def nllf(self):
        pvec = np.array([getattr(self, k).value for k in self._labels])
        return self._function(pvec)


    nllf.__doc__ = Fitness.__call__.__doc__


[docs]
    def chisq(self):
        return 2 * self.nllf() / self.dof


    # chisq.__doc__ = Fitness.chisq.__doc__


[docs]
    def chisq_str(self):
        return "%g" % self.chisq()


    # chisq_str.__doc__ = Fitness.chisq_str.__doc__

    __call__ = chisq


[docs]
    def plot(self, view=None):
        if self._plot:
            values = np.array([getattr(self, k).value for k in self._labels])
            self._plot(values, view=view)


    plot.__doc__ = Fitness.plot.__doc__


[docs]
    def numpoints(self):
        return len(self._labels) + 1


    numpoints.__doc__ = Fitness.numpoints.__doc__


[docs]
    def residuals(self):
        return np.array([self.chisq()])


    residuals.__doc__ = Fitness.residuals.__doc__




[docs]
class DirectProblem(CovarianceMixin):
    """
    Build model from negative log likelihood function *f(p)*.

    Vector *p* of length *n* defines the initial value.

    *bounds* defines limiting values for *p* as
    *[(p1_low, p1_high), (p2_low, p2_high), ...]*.  If all parameters are
    have the same bounds, use *bounds=np.tile([low,high],[n,1])*.

    Unlike :class:`PDF`, no parameter objects are defined for the elements
    of *p*, so all are fitting parameters.
    """

    has_residuals = False  # Don't have true residuals

    def __init__(self, f, p0, bounds=None, dof=1, labels=None, plot=None):
        self.f = f
        self.n = len(p0)
        self.p = np.asarray(p0, "d")
        self.dof = dof
        if bounds is not None:
            self._bounds = np.asarray(bounds, "d")
        else:
            self._bounds = np.tile((-np.inf, np.inf), (self.n, 1)).T

        self._labels = labels if labels else ["p%d" % i for i, _ in enumerate(p0)]
        self._plot = plot
        self.model_reset()


[docs]
    def nllf(self, pvec=None):
        # Nllf is the primary interface from the fitters.  We are going to
        # make it as cheap as possible by not having to marshall values
        # through parameter boxes.
        return self.f(pvec) if pvec is not None else self.f(self.p)



[docs]
    def model_reset(self):
        self._parameters = [
            Parameter(value=self.p[k], bounds=self._bounds[:, k], labels=self._labels[k]) for k in range(len(self.p))
        ]



[docs]
    def model_update(self):
        self.p = np.array([p.value for p in self._parameters])



[docs]
    def model_parameters(self):
        return self._parameters



[docs]
    def chisq(self):
        return 2 * self.nllf() / self.dof



[docs]
    def chisq_str(self):
        return "%g" % self.chisq()


    __call__ = chisq


[docs]
    def setp(self, p):
        # Note: setp is called
        self.p = p
        for parameter, value in zip(self._parameters, self.p):
            parameter.value = value



[docs]
    def getp(self):
        return self.p



[docs]
    def show(self):
        print("[nllf=%g]" % self.nllf())
        print(self.summarize())



[docs]
    def summarize(self):
        return "\n".join("%40s %g" % (name, value) for name, value in zip(self._labels, self.getp()))



[docs]
    def labels(self):
        return self._labels



[docs]
    def randomize(self, n=None):
        bounds = [init_bounds(b) for b in self._bounds.T]
        if n is not None:
            return np.array([b.random(n) for b in bounds]).T
        else:
            # Need to go through setp when updating model.
            self.setp([b.random(1)[0] for b in bounds])



[docs]
    def bounds(self):
        return self._bounds



[docs]
    def plot(self, p=None, fignum=None, figfile=None, view=None):
        if p is not None:
            self.setp(p)
        if self._plot:
            values = np.array([getattr(self, p).value for p in self._labels])
            self._plot(values, view=view)


    plot.__doc__ = Fitness.plot.__doc__