from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)
import numpy as np
rng = np.random.RandomState(1)
# create 6 samples, each has 100 features
# each element value is the count which is from 0 to 99
X = rng.randint(100, size=(6, 100)) # m = 6, n = 100
y = np.array([1, 0, 1, 0, 1, 0]) # m = 6
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
model.fit(X, y)
model.predict(X[2:3])
from sklearn.naive_bayes import ComplementNB
model = ComplementNB()
model.fit(X, y)
model.predict(X[2:3])
# create 6 samples, each has 100 features
# each element value is the count which is from 0 to 99
# assumes that all our features are binary such that they take only two values.
# Means 0s can represent “word does not occur in the document” and 1s as "word occurs in the document"
X = rng.randint(2, size=(6, 100)) # m = 6, n = 100
y = np.array([1, 0, 1, 0, 1, 0]) # m = 6
from sklearn.naive_bayes import BernoulliNB
model = BernoulliNB()
model.fit(X, y)
model.predict(X[2:3])
X = rng.randint(2, size=(6, 100)) # m = 6, n = 100, each feature has two categories
y = np.array([1, 0, 1, 0, 1, 0]) # m = 6
from sklearn.naive_bayes import CategoricalNB
model = CategoricalNB()
model.fit(X, y)
model.predict(X[2:3])