from sklearn.datasets import load_digits
digits = load_digits()
X = digits.data
y = digits.target
from sklearn.model_selection import train_test_split
train_X, test_X, train_Y, test_Y = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)
import numpy as np
y_train_large = (train_Y >= 7)
y_train_odd = (train_Y % 2 == 1)
y_multilabel = np.c_[y_train_large, y_train_odd] # create multiple labels
# by default, KNeighborsClassifier in sklearn supports multilabel
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=1)
model.fit(train_X, y_multilabel)
y_pred = model.predict(test_X) # 360*2
from sklearn.multioutput import MultiOutputClassifier
forest = RandomForestClassifier(random_state=1)
model = MultiOutputClassifier(forest, n_jobs=-1)
model.fit(train_X, y_multilabel)
y_pred = model.predict(test_X) # 360*2
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=0)
model = MultiOutputClassifier(lr, n_jobs=-1)
model.fit(train_X, y_multilabel)
y_pred = model.predict(test_X) # 360*2
from sklearn.multioutput import ClassifierChain
model = RandomForestClassifier(random_state=1)
chain = ClassifierChain(model, order='random', random_state=0)
chain.fit(train_X, y_multilabel)
y_pred = chain.predict(test_X) # 360*2
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=10, n_targets=3, random_state=1)
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
# MultiOutputRegressor, fits one regressor per target it can not take advantage of correlations between targets
model = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
model.fit(X, y)
model.predict(X)
from sklearn.multioutput import RegressorChain
from sklearn.linear_model import LogisticRegression
model = GradientBoostingRegressor()
chain = RegressorChain(base_estimator=model)
chain.fit(X, y)
chain.predict(X)
digits = load_digits()
X = digits.data
y = digits.target
train_X, test_X, train_Y, test_Y = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)
noise = np.random.randint(0, 3, (len(train_X), 64))
X_train_mod = train_X + noise
noise = np.random.randint(0, 3, (len(test_X), 64))
X_test_mod = test_X + noise
y_train_mod = train_X
y_test_mod = test_X
import matplotlib
import matplotlib.pyplot as plt
plt.imshow(X_test_mod[100].reshape(8, 8), cmap = matplotlib.cm.binary, interpolation='nearest')
# by default, KNeighborsClassifier in sklearn supports multiclass-multioutput classification
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train_mod, y_train_mod)
clean_digit = knn_clf.predict([X_test_mod[100]])
plt.imshow(clean_digit.reshape(8, 8), cmap = matplotlib.cm.binary, interpolation='nearest')