import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
def make_data(N, f=0.3, rseed=1):
rand = np.random.RandomState(rseed)
x = rand.randn(N)
x[int(f * N):] += 5
return x
x = make_data(1000)
hist = plt.hist(x, bins=30)
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import LeaveOneOut
bandwidths = 10 ** np.linspace(-1, 1, 100)
grid = GridSearchCV(KernelDensity(kernel='gaussian'),
{'bandwidth': bandwidths},
cv=LeaveOneOut())
grid.fit(x[:, None]);
grid.best_params_
{'bandwidth': 0.35111917342151316}
model = grid.best_estimator_
x_d = np.linspace(-4, 8, 1000)
logprob = model.score_samples(x_d[:, None])
plt.fill_between(x_d, np.exp(logprob), alpha=0.5, color='r')
plt.plot(x, np.full_like(x, -0.01), '|b', markeredgewidth=1)
plt.ylim(-0.02, 0.3)
(-0.02, 0.3)
x_sample = model.sample(1000)
hist = plt.hist(x_sample, bins=30)