import numpy as np
m1 = np.random.normal(size=1000)
m2 = np.random.normal(scale=0.5, size=1000)
X = np.vstack([m1.ravel(), m2.ravel()]).T
X_test = np.array([[-3, 1], [0, 0]])
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
plt.figure(figsize=(9, 3.5))
plt.plot(X[:, 0], X[:, 1], "yo")
plt.plot(X_test[:, 0], X_test[:, 1], "bs")
from sklearn.covariance import EllipticEnvelope
# contamination, control how many percentage to be outlier
clf = EllipticEnvelope(random_state=0, contamination = 0.05)
clf.fit(X)
labels = clf.predict(X)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
ax.scatter(X[:, 0], X[:, 1], c=labels, marker='o', cmap="Paired")
ax.scatter(X[:, 0], X[:, 1], marker='o', c=labels)
from sklearn.covariance import MinCovDet
clf = MinCovDet(random_state=0)
clf.fit(X)
contamination = 4 # 4% of the the samples are outliers
threshold = np.percentile(clf.dist_, 100-contamination)
anomalies = X[clf.dist_ > threshold]
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
ax.scatter(X[:, 0], X[:, 1])
ax.scatter(anomalies[:, 0], anomalies[:, 1], marker='x')
from sklearn.ensemble import IsolationForest
# contamination, control how many percentage to be outlier
clf = IsolationForest(n_estimators=100, warm_start=True, contamination = 0.05)
clf.fit(X)
labels = clf.predict(X)
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
ax.scatter(X[:, 0], X[:, 1], c=labels, marker='o', cmap="Paired")
ax.scatter(X[:, 0], X[:, 1], marker='o', c=labels)
from sklearn.neighbors import LocalOutlierFactor
# contamination, control how many percentage to be outlier
lof = LocalOutlierFactor(novelty=False, n_neighbors=10, contamination = 0.05)
labels = lof.fit_predict(X)
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
ax.scatter(X[:, 0], X[:, 1], c=labels, marker='o', cmap="Paired")
ax.scatter(X[:, 0], X[:, 1], marker='o', c=labels)
from sklearn.svm import OneClassSVM
clf = OneClassSVM(gamma='auto').fit(X)
labels = clf.predict(X_test)
labels, X_test
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
ax.scatter(X[:, 0], X[:, 1])
ax.scatter(X_test[:, 0], X_test[:, 1], c=labels, marker='x', s=100, cmap="Paired")
from sklearn.neighbors import LocalOutlierFactor
lof = LocalOutlierFactor(novelty=True)
lof.fit(X)
lables = lof.predict(X_test)
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
ax.scatter(X[:, 0], X[:, 1])
ax.scatter(X_test[:, 0], X_test[:, 1], c=labels, marker='x', s=100, cmap="Paired")