from scipy import stats, special
import numpy as np
import matplotlib.pyplot as plt

sample_1 = stats.uniform.rvs(size=100)
sample_2 = stats.uniform.rvs(size=100)


fig, ax = plt.subplots()
ax.plot(range(100), sample_1)
ax.plot(range(100), sample_2)

[<matplotlib.lines.Line2D at 0x7f7f8840c040>]


# Euclidian distance
from scipy.spatial import distance
distance.euclidean(sample_1, sample_2)
# or
distance.cdist(sample_1.reshape(1, -1), sample_2.reshape(1, -1), 'euclidean')
# or 
distance.cdist(sample_1.reshape(1, -1), sample_2.reshape(1, -1), 'minkowski', p=2.)

# Standardized Euclidean distance
distance.cdist(sample_1.reshape(1, -1), sample_2.reshape(1, -1), 'seuclidean', V=None)

array([[14.14213562]])


# Manhattan distance
distance.cdist(sample_1.reshape(1, -1), sample_2.reshape(1, -1), 'minkowski', p=1.)
# or
distance.cdist(sample_1.reshape(1, -1), sample_2.reshape(1, -1), 'cityblock')

array([[34.0490069]])


distance.cdist(sample_1.reshape(1, -1), sample_2.reshape(1, -1), 'cosine')

array([[0.2620535]])


distance.cdist(sample_1.reshape(1, -1), sample_2.reshape(1, -1), 'correlation')

array([[1.04351762]])


distance.cdist(sample_1.reshape(1, -1), sample_2.reshape(1, -1), 'chebyshev')

array([[0.97902329]])


distance.cdist(sample_1, sample_2, 'mahalanobis', VI=None) # each sample need at least 100 records


stats.kstest(sample_1, sample_2)

KstestResult(statistic=0.1, pvalue=0.7020569828664881)


from sklearn import metrics
metrics.mutual_info_score(sample_1, sample_2)

/opt/anaconda3/lib/python3.8/site-packages/sklearn/metrics/cluster/_supervised.py:58: UserWarning: Clustering metrics expects discrete values but received continuous values for label, and continuous values for target
  warnings.warn(msg, UserWarning)

4.605170185988092


from scipy.stats import wasserstein_distance
wasserstein_distance(sample_1, sample_2)

0.02437976189308293

Comparing Histograms¶

Minkowski-form distance¶

Cosine distance¶

Correlation distance¶

Chebyshev distance¶

Mahalanobis distance¶

Kolmogorov-Smirnov distance (K-S)¶

Mutual information¶

Earth Mover's distance¶

Reference¶