import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import SelfTrainingClassifier
from sklearn.svm import SVC


# create a data set containing labeled data and unlabeled data
# unlabeled data have the label -1
rng = np.random.RandomState(42)
iris = datasets.load_iris()
random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3
iris.target[random_unlabeled_points] = -1


iris.target

array([ 0,  0,  0,  0, -1, -1, -1,  0,  0,  0, -1,  0,  0, -1, -1, -1,  0,
        0,  0, -1,  0, -1, -1,  0,  0,  0, -1,  0,  0, -1,  0, -1, -1,  0,
        0,  0,  0, -1,  0,  0, -1,  0, -1,  0, -1,  0,  0,  0,  0, -1,  1,
        1,  1,  1,  1,  1, -1, -1, -1,  1,  1, -1,  1,  1, -1,  1, -1,  1,
       -1,  1,  1, -1, -1,  1,  1,  1,  1, -1,  1, -1,  1,  1,  1, -1,  1,
        1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1, -1, -1, -1,  2,
        2,  2,  2, -1,  2,  2, -1, -1, -1, -1,  2,  2,  2,  2,  2, -1,  2,
        2,  2,  2,  2, -1, -1,  2,  2,  2, -1,  2,  2, -1, -1,  2,  2,  2,
        2,  2,  2,  2,  2, -1,  2,  2, -1, -1,  2,  2, -1, -1])


# create a classifier implementing predict_proba
svc = SVC(probability=True, gamma="auto")

# create a self-training model
self_training_model = SelfTrainingClassifier(svc)

# train the model
self_training_model.fit(iris.data, iris.target)

SelfTrainingClassifier(base_estimator=SVC(gamma='auto', probability=True))


# predict unlabeled data
predict = self_training_model.predict(iris.data[random_unlabeled_points]) # predicted labels
labels = datasets.load_iris().target[random_unlabeled_points] # real labels

from sklearn.metrics import classification_report
print(classification_report(labels, predict))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.80      0.89        15
           2       0.85      1.00      0.92        17

    accuracy                           0.94        51
   macro avg       0.95      0.93      0.94        51
weighted avg       0.95      0.94      0.94        51


rng = np.random.RandomState(42)
iris = datasets.load_iris()
random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3

# create a data set containing labeled data and unlabeled data
# unlabeled data have the label -1
iris.target[random_unlabeled_points] = -1


iris.target

array([ 0,  0,  0,  0, -1, -1, -1,  0,  0,  0, -1,  0,  0, -1, -1, -1,  0,
        0,  0, -1,  0, -1, -1,  0,  0,  0, -1,  0,  0, -1,  0, -1, -1,  0,
        0,  0,  0, -1,  0,  0, -1,  0, -1,  0, -1,  0,  0,  0,  0, -1,  1,
        1,  1,  1,  1,  1, -1, -1, -1,  1,  1, -1,  1,  1, -1,  1, -1,  1,
       -1,  1,  1, -1, -1,  1,  1,  1,  1, -1,  1, -1,  1,  1,  1, -1,  1,
        1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1, -1, -1, -1,  2,
        2,  2,  2, -1,  2,  2, -1, -1, -1, -1,  2,  2,  2,  2,  2, -1,  2,
        2,  2,  2,  2, -1, -1,  2,  2,  2, -1,  2,  2, -1, -1,  2,  2,  2,
        2,  2,  2,  2,  2, -1,  2,  2, -1, -1,  2,  2, -1, -1])


from sklearn.semi_supervised import LabelPropagation
label_prop_model = LabelPropagation()
label_prop_model.fit(iris.data, iris.target)

LabelPropagation()


# predict unlabeled data
predict = label_prop_model.predict(iris.data[random_unlabeled_points]) # predicted labels
labels = datasets.load_iris().target[random_unlabeled_points] # real labels

from sklearn.metrics import classification_report
print(classification_report(labels, predict))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.87      0.93        15
           2       0.89      1.00      0.94        17

    accuracy                           0.96        51
   macro avg       0.96      0.96      0.96        51
weighted avg       0.96      0.96      0.96        51

Semi-supervised Learning¶

1. Inductive Learning¶

Classifier based methods¶

Self Training¶

2. Transductive Learning¶

Data based methods¶

Label Propagation¶

Reference¶