import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.svm import SVC
# create a data set containing labeled data and unlabeled data
# unlabeled data have the label -1
rng = np.random.RandomState(42)
iris = datasets.load_iris()
random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3
iris.target[random_unlabeled_points] = -1
iris.target
array([ 0, 0, 0, 0, -1, -1, -1, 0, 0, 0, -1, 0, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, 0, 0, 0, -1, 0, 0, -1, 0, -1, -1, 0, 0, 0, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, -1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, -1, 1, 1, -1, 1, -1, 1, -1, 1, 1, -1, -1, 1, 1, 1, 1, -1, 1, -1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 2, 2, 2, 2, -1, 2, 2, -1, -1, -1, -1, 2, 2, 2, 2, 2, -1, 2, 2, 2, 2, 2, -1, -1, 2, 2, 2, -1, 2, 2, -1, -1, 2, 2, 2, 2, 2, 2, 2, 2, -1, 2, 2, -1, -1, 2, 2, -1, -1])
# create a classifier implementing predict_proba
svc = SVC(probability=True, gamma="auto")
# create a self-training model
self_training_model = SelfTrainingClassifier(svc)
# train the model
self_training_model.fit(iris.data, iris.target)
SelfTrainingClassifier(base_estimator=SVC(gamma='auto', probability=True))
# predict unlabeled data
predict = self_training_model.predict(iris.data[random_unlabeled_points]) # predicted labels
labels = datasets.load_iris().target[random_unlabeled_points] # real labels
from sklearn.metrics import classification_report
print(classification_report(labels, predict))
precision recall f1-score support 0 1.00 1.00 1.00 19 1 1.00 0.80 0.89 15 2 0.85 1.00 0.92 17 accuracy 0.94 51 macro avg 0.95 0.93 0.94 51 weighted avg 0.95 0.94 0.94 51
rng = np.random.RandomState(42)
iris = datasets.load_iris()
random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3
# create a data set containing labeled data and unlabeled data
# unlabeled data have the label -1
iris.target[random_unlabeled_points] = -1
iris.target
array([ 0, 0, 0, 0, -1, -1, -1, 0, 0, 0, -1, 0, 0, -1, -1, -1, 0, 0, 0, -1, 0, -1, -1, 0, 0, 0, -1, 0, 0, -1, 0, -1, -1, 0, 0, 0, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, -1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, -1, 1, 1, -1, 1, -1, 1, -1, 1, 1, -1, -1, 1, 1, 1, 1, -1, 1, -1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 2, 2, 2, 2, -1, 2, 2, -1, -1, -1, -1, 2, 2, 2, 2, 2, -1, 2, 2, 2, 2, 2, -1, -1, 2, 2, 2, -1, 2, 2, -1, -1, 2, 2, 2, 2, 2, 2, 2, 2, -1, 2, 2, -1, -1, 2, 2, -1, -1])
from sklearn.semi_supervised import LabelPropagation
label_prop_model = LabelPropagation()
label_prop_model.fit(iris.data, iris.target)
LabelPropagation()
# predict unlabeled data
predict = label_prop_model.predict(iris.data[random_unlabeled_points]) # predicted labels
labels = datasets.load_iris().target[random_unlabeled_points] # real labels
from sklearn.metrics import classification_report
print(classification_report(labels, predict))
precision recall f1-score support 0 1.00 1.00 1.00 19 1 1.00 0.87 0.93 15 2 0.89 1.00 0.94 17 accuracy 0.96 51 macro avg 0.96 0.96 0.96 51 weighted avg 0.96 0.96 0.96 51