from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784')
data_X, data_Y = mnist['data'], mnist['target']
import numpy as np
train_X = data_X
train_Y_5 = (data_Y == '5')
from sklearn.model_selection import cross_val_predict
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
scores = cross_val_predict(model, train_X, train_Y_5, cv = 3, method="decision_function")
from sklearn.metrics import precision_recall_curve
precisions, recalls, thresholds = precision_recall_curve(train_Y_5, scores)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
line1, = ax.plot(thresholds, precisions[:-1], 'b--', label='Precision')
line2, = ax.plot(thresholds, recalls[:-1], 'g-', label='Recall')
legend = ax.legend((line1, line2), ('Precision', 'Recall'), shadow=True);
plt.xlabel("Threshold", fontsize=14)
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
ax.plot(recalls, precisions)
plt.xlabel("Recall", fontsize=14)
plt.ylabel("Precision", fontsize=14)
ax.set_xlim(0, 1);
from sklearn.metrics import roc_curve, roc_auc_score
fpr, tpr, thresholds = roc_curve(train_Y_5, scores)
roc_auc_score(train_Y_5, scores)
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
ax.plot(fpr, tpr)
plt.xlabel("False Positive Rate", fontsize=14)
plt.ylabel("True Positive Rate (Recall)", fontsize=14)
ax.set_xlim(0, 1);
ax.set_ylim(0, 1);
from sklearn.datasets import load_digits
X, y = load_digits(return_X_y=True)
from sklearn.naive_bayes import GaussianNB
model = GaussianNB();
from sklearn.model_selection import learning_curve
train_sizes, train_scores, test_scores = learning_curve(model, X, y, cv=5, train_sizes=np.linspace(.1, 1.0, 5), scoring='accuracy')
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111)
line1, = ax.plot(train_sizes, train_scores_mean, 'ro-', label='Train')
line2, = ax.plot(train_sizes, test_scores_mean, 'go-', label='Validation')
ax.fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.1,
color="r")
ax.fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.1,
color="g")
legend = ax.legend((line1, line2), ('Train', 'Validation'), shadow=True);
plt.xlabel("Training set size", fontsize=14)
plt.ylabel("Accuracy", fontsize=14)