Source code for bumps.dream.mahal
# This program is public domain
# Author: Paul Kienzle, April 2010
"""
Mahalanobis distance calculator
Compute the
`Mahalanobis distance <https://en.wikipedia.org/wiki/Mahalanobis_distance>`_
between observations and a reference set. The principle components of the
reference set define the basis of the space for the observations. The simple
Euclidean distance is used within this space.
"""
__all__ = ["mahalanobis"]
from numpy import dot, mean, sum
from numpy.linalg import svd
[docs]def mahalanobis(Y, X):
"""
Returns the distances of the observations from a reference set.
Observations are stored in rows *Y* and the reference set in *X*.
"""
M = mean(X, axis=0) # mean
Xc = X - mean(X, axis=0) # center the reference
W = dot(Xc.T, Xc)/(Xc.shape[0] - 1) # covariance of reference
Yc = Y - M # center the observations
# Distance is diag(Yc * inv(W) * Yc.H)
# solve Wb = Yc.H using singular value decomposition because it is
# the most accurate with numpy; QR decomposition does not use pivoting,
# and is less accurate. The built-in solve routine is between the two.
u, s, vh = svd(W, 0)
SVinv = vh.T.conj()/s
Uy = dot(u.T.conj(), Yc.T.conj())
b = dot(SVinv, Uy)
D = sum(Yc.T * b, axis=0) # compute distance
return D
def test():
from numpy import array
from numpy.linalg import norm
d = mahalanobis(array([[2, 3, 4], [2, 3, 4]]),
array([[1, 0, 0], [2, 1, 0], [1, 1, 0], [2, 0, 1]]))
assert norm(d-[290.25, 290.25]) < 1e-12, "diff=%s" % str(d-[290.25, 290.25])
if __name__ == "__main__":
test()