import numpy as np
# generate binomial random numbers
def make_data(N, f=0.3, rseed=1):
rand = np.random.RandomState(rseed)
x = rand.randn(N)
x[int(f * N):] += 5
return x
X = make_data(1000)
from sklearn.neighbors import KernelDensity
kde = KernelDensity(kernel='gaussian',bandwidth=0.1).fit(X.reshape(-1, 1)) # train KDE
X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
log_density = kde.score_samples(X_plot) # calculate the log density from trained KDE
import matplotlib.pyplot as plt
fig, ax = plt.subplots();
ax.plot(X_plot, np.exp(log_density));
hist = ax.hist(X, bins=30, normed=True) # histogram probability function plot
ax.scatter(X, np.zeros(1000)-0.01, marker = '+', color='r'); # scatter plot of sample points
density, bins, patches = hist
# area under histogram plot is 1
density, bins, patches
widths = bins[1:] - bins[:-1]
(density * widths).sum()
* Gaussian kernel, Tophat kernel, Epanechnikov kernel, Exponential kernel, Linear kernel, Cosine kernel
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import LeaveOneOut
bandwidths = 10 ** np.linspace(-1, 1, 100)
grid = GridSearchCV(KernelDensity(kernel='gaussian'), {'bandwidth': bandwidths}, cv=LeaveOneOut())
grid.fit(X[:, None]);
grid.best_params_
kde = grid.best_estimator_
log_density = kde.score_samples(X_plot)
import matplotlib.pyplot as plt
fig, ax = plt.subplots();
ax.plot(X_plot, np.exp(log_density));
hist = ax.hist(X, bins=30, normed=True) # histogram probability function plot
ax.scatter(X, np.zeros(1000)-0.01, marker = '+', color='r'); # scatter plot of sample points
m1 = np.random.normal(size=1000)
m2 = np.random.normal(scale=0.5, size=1000)
X = np.vstack([m1.ravel(), m2.ravel()]).T
bandwidths = 10 ** np.linspace(-1, 1, 100)
grid = GridSearchCV(KernelDensity(kernel='gaussian'), {'bandwidth': bandwidths}, cv=LeaveOneOut())
grid.fit(X)
X_test = np.array([[-3, 1], [0, 0]])
kde = grid.best_estimator_
log_density = kde.score_samples(X_test)
np.exp(log_density)
import matplotlib.pyplot as plt
fig, ax = plt.subplots();
ax.scatter(m1, m2, marker = '+', color='r'); # scatter plot of sample points
ax.scatter(X_test[:, 0], X_test[:, 1], marker = 'o', color='b')
from sklearn.datasets import load_digits
digits = load_digits()
digits.data.shape # 8*8 digit
from sklearn.decomposition import PCA
pca = PCA(n_components=15, whiten=False)
data = pca.fit_transform(digits.data)
params = {'bandwidth': np.logspace(-1, 1, 20)}
grid = GridSearchCV(KernelDensity(), params)
grid.fit(data)
kde = grid.best_estimator_
new_data = kde.sample(44, random_state=0)
new_data = pca.inverse_transform(new_data)
new_data.shape
fig, ax = plt.subplots();
ax.imshow(new_data[1, :].reshape(8, 8))
from sklearn.datasets import fetch_species_distributions
data = fetch_species_distributions()
species_names = ['Bradypus Variegatus', 'Microryzomys Minutus']
Xtrain = np.vstack([data['train']['dd lat'], data['train']['dd long']]).T
ytrain = np.array([d.decode('ascii').startswith('micro') for d in data['train']['species']], dtype='int')
Xtrain *= np.pi / 180.
def construct_grids(batch):
"""Construct the map grid from the batch object
Parameters
----------
batch : Batch object
The object returned by :func:`fetch_species_distributions`
Returns
-------
(xgrid, ygrid) : 1-D arrays
The grid corresponding to the values in batch.coverages
"""
# x,y coordinates for corner cells
xmin = batch.x_left_lower_corner + batch.grid_size
xmax = xmin + (batch.Nx * batch.grid_size)
ymin = batch.y_left_lower_corner + batch.grid_size
ymax = ymin + (batch.Ny * batch.grid_size)
# x coordinates of the grid cells
xgrid = np.arange(xmin, xmax, batch.grid_size)
# y coordinates of the grid cells
ygrid = np.arange(ymin, ymax, batch.grid_size)
return (xgrid, ygrid)
xgrid, ygrid = construct_grids(data)
X, Y = np.meshgrid(xgrid[::5], ygrid[::5][::-1])
land_reference = data.coverages[6][::5, ::5]
land_mask = (land_reference > -9999).ravel()
xy = np.vstack([Y.ravel(), X.ravel()]).T
xy = xy[land_mask]
xy *= np.pi / 180.
kde = KernelDensity(bandwidth=0.04, metric='haversine', kernel='gaussian', algorithm='ball_tree')
kde.fit(Xtrain[ytrain == 0])
Z = np.full(land_mask.shape[0], -9999, dtype='int')
Z[land_mask] = np.exp(kde.score_samples(xy))
Z = Z.reshape(X.shape)
levels = np.linspace(0, Z.max(), 25)
plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds)
plt.contour(X, Y, land_reference, levels=[-9998], colors="k", linestyles="solid")