Covariance Estimation¶

Calculation of Covariance¶

import numpy as np
A = np.array([[90, 60, 90], [90, 90, 90], [60, 60, 60], [60, 60, 90], [30, 30, 30]])

m = len(A)
One = np.ones((m, 1))
One_Matrix = One.dot(One.T) # m*m ones matrix

Average = One_Matrix.dot(A)/m # calculate the average of each column
Average

array([[66., 60., 72.],
       [66., 60., 72.],
       [66., 60., 72.],
       [66., 60., 72.],
       [66., 60., 72.]])

a = A - Average # center data

CoV = a.T.dot(a)/m # n*n matrix, Co-variance
CoV

array([[504., 360., 468.],
       [360., 360., 360.],
       [468., 360., 576.]])

# bias, False, is normalized by (m - 1); True, is normalized by m
cov = np.cov(A.T, bias=True)
cov

array([[504., 360., 468.],
       [360., 360., 360.],
       [468., 360., 576.]])

Generate Data from a Covariance Matrix¶

from sklearn.datasets import make_gaussian_quantiles
real_cov = np.array([[.8, .3], [.3, .4]])
rng = np.random.RandomState(0)
X = rng.multivariate_normal(mean=[0, 0], cov=real_cov, size=500)
X_test = np.array([[-3, 1], [0, 0]])

import matplotlib.pyplot as plt
plt.figure(figsize=(9, 3.5))
plt.plot(X[:, 0], X[:, 1], "yo")
plt.plot(X_test[:, 0], X_test[:, 1], "bs")

[<matplotlib.lines.Line2D at 0x7ff33d8c4c90>]

from matplotlib.patches import Ellipse
cov = np.cov(X.T, bias=True)

def plot_ellipse(X, cov):
    lambda_, v = np.linalg.eig(cov)
    lambda_ = np.sqrt(lambda_)
    ax = plt.subplot(111, aspect='equal')
    ell = Ellipse(xy=(np.mean(X[:, 0]), np.mean(X[:, 1])),
                  width=lambda_[0]*2*2, height=lambda_[1]*2*2, # 2 sigma
                  angle=np.rad2deg(np.arccos(v[0, 0])), lw=4, edgecolor='r', facecolor='none')
    ax.add_artist(ell)

    plt.plot(X[:, 0], X[:, 1], "yo")
    plt.show()
    
plot_ellipse(X, cov)

Empirical covariance¶

cov

array([[0.75693565, 0.28186631],
       [0.28186631, 0.39288421]])

from sklearn.covariance import EmpiricalCovariance
model = EmpiricalCovariance().fit(X)
model.covariance_

array([[0.75693565, 0.28186631],
       [0.28186631, 0.39288421]])

import warnings
warnings.filterwarnings('ignore')
np.exp(model.score(X_test[0])), np.exp(model.score(X_test[1])) # calcualte probabilities

(2.816995470132487e-07, 0.34003854237993864)

model.location_ # estimated feature means

array([0.06228968, 0.01937939])

plot_ellipse(X, model.covariance_)

Shrunk Covariance¶

Avoid the situations that the empirical covariance matrix cannot be inverted for numerical reasons

from sklearn.covariance import ShrunkCovariance
model = ShrunkCovariance().fit(X)
model.covariance_

array([[0.73873308, 0.25367968],
       [0.25367968, 0.41108678]])

np.exp(model.score(X_test[0])), np.exp(model.score(X_test[1])) # calcualte probabilities

(9.721303897029778e-07, 0.32447281853736265)

model.location_

array([0.06228968, 0.01937939])

plot_ellipse(X, model.covariance_)

Ledoit-Wolf Shrinkage¶

compute the optimal shrinkage coefficient that minimizes the Mean Squared Error between the estimated and the real covariance matrix

from sklearn.covariance import LedoitWolf
model = LedoitWolf().fit(X)
model.covariance_

array([[0.7535177 , 0.27657361],
       [0.27657361, 0.39630217]])

np.exp(model.score(X_test[0])), np.exp(model.score(X_test[1])) # calcualte probabilities

(3.658482640574148e-07, 0.33681519109653796)

model.location_

array([0.06228968, 0.01937939])

plot_ellipse(X, model.covariance_)

Oracle Approximating Shrinkage¶

smaller Mean Squared Error than the one given by Ledoit and Wolf's method

from sklearn.covariance import OAS
model = OAS().fit(X)
model.covariance_

array([[0.75337235, 0.27634855],
       [0.27634855, 0.39644751]])

np.exp(model.score(X_test[0])), np.exp(model.score(X_test[1])) # calcualte probabilities

(3.6981633331926726e-07, 0.33668147169551416)

model.location_

array([0.06228968, 0.01937939])

plot_ellipse(X, model.covariance_)

Sparse Inverse Covariance¶

matrix inverse of the covariance matrix, often called the precision matrix
if two features are independent conditionally on the others, the corresponding coefficient in the precision matrix will be zero
if n_samples is on the order of n_features or smaller, sparse inverse covariance estimators tend to work better than shrunk covariance estimators

from sklearn.covariance import GraphicalLassoCV
model = GraphicalLassoCV()
model.fit(X)
model.covariance_

array([[0.75693565, 0.28186627],
       [0.28186627, 0.39288421]])

np.exp(model.score(X_test[0])), np.exp(model.score(X_test[1])) # calcualte probabilities

(2.817001600569494e-07, 0.3400385227512984)

model.location_

array([0.06228968, 0.01937939])

plot_ellipse(X, model.covariance_)

Robust Covariance Estimation¶

empirical covariance estimator and the shrunk covariance estimators are very sensitive to the presence of outliers
robust covariance estimators can be used to perform outlier detection and discard/downweight some observations according to further processing of the data
FastMCD