from IPython.display import Image
Image(filename='img/Perceptron.png')
$w_{ij}$, i, index in layer 0, j, index in layer 1
$$ L = |L_{0} L_{1} L_{2}|$$$$ L = XW $$$$ Outputs = \phi (L) $$$\phi$ is called the activation function
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
iris = load_iris() # Bunch
x = iris.data[:, (2, 3)]
y = (iris.target == 0).astype(np.int)
per_clf = Perceptron()
per_clf.fit(x, y)
y_pred = per_clf.predict([[2, 0.5]])
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
import tensorflow as tf
from tensorflow import keras
model = keras.models.Sequential()
model.add(keras.Input(shape=(8,))) # recommend to specify the input_shape in the first layer
model.add(keras.layers.Dense(30, activation="relu",))
model.add(keras.layers.Dense(1))
model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(lr=1e-3))
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))
mse_test = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test[:3])
plt.plot(pd.DataFrame(history.history))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
import tensorflow as tf
from tensorflow import keras
fashion_mnist = keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train.shape, y_train.shape, X_test.shape, y_test.shape
((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))
X_valid, X_train = X_train[:5000] / 255., X_train[5000:] / 255.
y_valid, y_train = y_train[:5000], y_train[5000:]
X_test = X_test / 255.
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28])) # convert input image into a 1D array
model.add(keras.layers.Dense(300, activation='relu')) # contain connection weights between neurons and their inputs, bias
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
model.summary() # layer's name, shape, number of parameters
# 785*300, 784 feature + 1 bias
# 301*100
# 101*10
Model: "sequential_2" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_1 (Flatten) (None, 784) 0 _________________________________________________________________ dense_11 (Dense) (None, 300) 235500 _________________________________________________________________ dense_12 (Dense) (None, 100) 30100 _________________________________________________________________ dense_13 (Dense) (None, 10) 1010 ================================================================= Total params: 266,610 Trainable params: 266,610 Non-trainable params: 0 _________________________________________________________________
model.layers[1].name
'dense_11'
weights, biases = model.layers[1].get_weights()
print(weights.shape) # 784*300
print(biases.shape) # 300
(784, 300) (300,)
# specify the loss function and the optimizer
# sparse_categorical_crossentropy, 1 label for each record
# categorical_crossentropy, 10 probabilities to represent label
# binary_crossentropy, binary classification
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
# Calling fit() method again will continue training where it left off
# validation error is computed at the end of each epoch
# training error is computed during each epoch
history = model.fit(X_train, y_train, epochs=30, validation_data=(X_valid, y_valid))
history.params
{'verbose': 1, 'epochs': 30, 'steps': 1719}
import pandas as pd
import matplotlib.pyplot as plt
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
model.evaluate(X_test, y_test)
313/313 [==============================] - 0s 807us/step - loss: 0.3369 - accuracy: 0.8810
[0.336931973695755, 0.8809999823570251]
y_prob = model.predict(X_test[:3])
y_prob.round(2)
array([[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.02, 0. , 0.97], [0. , 0. , 0.99, 0. , 0.01, 0. , 0. , 0. , 0. , 0. ], [0. , 1. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ]], dtype=float32)
y_pred = model.predict_classes(X_test[:3])
y_pred
/Users/lchen/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/sequential.py:450: UserWarning: `model.predict_classes()` is deprecated and will be removed after 2021-01-01. Please use instead:* `np.argmax(model.predict(x), axis=-1)`, if your model does multi-class classification (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`, if your model does binary classification (e.g. if it uses a `sigmoid` last-layer activation). warnings.warn('`model.predict_classes()` is deprecated and '
array([9, 2, 1])
import numpy as np
labels = np.array(["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
"Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"])
labels[y_pred]
array(['Ankle boot', 'Pullover', 'Trouser'], dtype='<U11')
plt.subplot(1, 3, 1)
plt.imshow(X_test[0], cmap="binary", interpolation="nearest")
plt.axis('off')
plt.subplot(1, 3, 2)
plt.imshow(X_test[1], cmap="binary", interpolation="nearest")
plt.axis('off')
plt.subplot(1, 3, 3)
plt.imshow(X_test[2], cmap="binary", interpolation="nearest")
plt.axis('off')
(-0.5, 27.5, 27.5, -0.5)
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
np.random.seed(42)
tf.random.set_seed(42)
Image(filename='img/MLP_1.png')
input_ = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation="relu")(input_)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_, hidden2])
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs=[input_], outputs=[output])
model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(lr=1e-3))
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))
y_pred = model.predict(X_test[:3])
y_pred
array([[0.4701073], [1.8735044], [3.379823 ]], dtype=float32)
Image(filename='img/MLP_2.png')
X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]
X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]
X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]
input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="main_output")(concat)
model = keras.models.Model(inputs=[input_A, input_B],outputs=[output])
model.compile(loss="mse", optimizer=keras.optimizers.SGD(lr=1e-3))
history = model.fit((X_train_A, X_train_B), y_train, epochs=20)
model.predict((X_new_A, X_new_B))
array([[0.40118584], [1.9036655 ], [3.3729222 ]], dtype=float32)
Image(filename='img/MLP_3.png')
input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="main_output")(concat)
aux_output = keras.layers.Dense(1, name="aux_output")(hidden2)
model = keras.models.Model(inputs=[input_A, input_B],outputs=[output, aux_output])
# by default, the final loss used for training is the sum of all losses
# auxiliary is used as regularization
# main output has higher weight than auxiliary output
model.compile(loss=["mse", "mse"], loss_weights=[0.9, 0.1], optimizer=keras.optimizers.SGD(lr=1e-3))
history = model.fit([X_train_A, X_train_B], [y_train, y_train], epochs=20,
validation_data=([X_valid_A, X_valid_B], [y_valid, y_valid]))
total_loss, main_loss, aux_loss = model.evaluate([X_test_A, X_test_B], [y_test, y_test])
y_pred_main, y_pred_aux = model.predict([X_new_A, X_new_B])
class WideAndDeepModel(keras.models.Model):
def __init__(self, units=30, activation="relu", **kwargs):
"""Create layters"""
super().__init__(**kwargs)
self.hidden1 = keras.layers.Dense(units, activation=activation)
self.hidden2 = keras.layers.Dense(units, activation=activation)
self.main_output = keras.layers.Dense(1)
self.aux_output = keras.layers.Dense(1)
def call(self, inputs):
"""Create architecture and conduct operations"""
input_A, input_B = inputs
#print(len(inputs))
hidden1 = self.hidden1(input_B)
hidden2 = self.hidden2(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
main_output = self.main_output(concat)
aux_output = self.aux_output(hidden2)
return main_output, aux_output
model = WideAndDeepModel(30, activation="relu")
model.compile(loss="mse", loss_weights=[0.9, 0.1], optimizer=keras.optimizers.SGD(lr=1e-3))
history = model.fit((X_train_A, X_train_B), (y_train, y_train), epochs=10, validation_data=((X_valid_A, X_valid_B), (y_valid, y_valid)))
# save subclass model
# cannot be saved in h5 format
model.save('temp_model', save_format='tf') # save model
model_load = keras.models.load_model('temp_model')
INFO:tensorflow:Assets written to: temp_model/assets
# save a model created with Sequential or Functional API
model.save('temp.h5') # save model
model_load = keras.models.load_model('temp.h5')
total_loss, main_loss, aux_loss = model_load.evaluate((X_test_A, X_test_B), (y_test, y_test))
y_pred_main, y_pred_aux = model_load.predict((X_new_A, X_new_B))
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
input_ = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation="relu")(input_)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_, hidden2])
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs=[input_], outputs=[output])
class PrintValTrainRatioCallback(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
print(logs['val_loss']/logs['loss'])
model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(lr=1e-3))
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid), callbacks=[PrintValTrainRatioCallback()])
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_valid, y_valid), callbacks=[early_stopping_cb])
checkpoint_cb = keras.callbacks.ModelCheckpoint('my_keras_model.h5', save_best_only=True)
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_valid, y_valid), callbacks=[checkpoint_cb])
model = keras.models.load_model('my_keras_model.h5') # roll back to best model
fashion_mnist = keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_valid, X_train = X_train[:5000] / 255., X_train[5000:] / 255.
y_valid, y_train = y_train[:5000], y_train[5000:]
X_test = X_test / 255.
import os
import time
def get_run_logdir():
run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
return os.path.join('log', run_id)
run_logdir = get_run_logdir()
# MLP Classification
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28])) # convert input image into a 1D array
model.add(keras.layers.Dense(300, activation='relu')) # contain connection weights between neurons and their inputs, bias
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
history = model.fit(X_train, y_train, epochs=30, validation_data=(X_valid, y_valid), callbacks=[tensorboard_cb])
%load_ext tensorboard
%tensorboard --logdir=./log --port=6006
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[8]):
model = keras.models.Sequential()
model.add(keras.layers.InputLayer(input_shape=input_shape))
for layer in range(n_hidden):
model.add(keras.layers.Dense(n_neurons, activation="relu"))
model.add(keras.layers.Dense(1))
optimizer = keras.optimizers.SGD(lr=learning_rate)
model.compile(loss="mse", optimizer=optimizer)
return model
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(build_model)
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV
param_distribs = {
"n_hidden": [0, 1, 2, 3],
"n_neurons": [10, 20],
"learning_rate": [0.01, 0.02],
}
rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=10, cv=3, verbose=2)
rnd_search_cv.fit(X_train, y_train, epochs=10,
validation_data=(X_valid, y_valid),
callbacks=[keras.callbacks.EarlyStopping(patience=10)])
print(rnd_search_cv.best_params_)
print(rnd_search_cv.best_score_)
{'n_neurons': 20, 'n_hidden': 2, 'learning_rate': 0.01} -0.3620850046475728
model = rnd_search_cv.best_estimator_.model
model.predict(X_test)
array([[0.85256857], [1.6229372 ], [3.9205074 ], ..., [1.5167751 ], [2.5552483 ], [3.9521298 ]], dtype=float32)
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_valid, X_train = X_train[:5000] / 255., X_train[5000:] / 255.
y_valid, y_train = y_train[:5000], y_train[5000:]
X_test = X_test / 255.
# create a funciton to build and compile a Keras model
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3):
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28])) # convert input image into a 1D array
for layer in range(n_hidden):
model.add(keras.layers.Dense(n_neurons, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
return model
# create a sklearn wrapper, convert a Keras model to a Scikit-learn model
keras_clf = keras.wrappers.scikit_learn.KerasClassifier(build_model)
# cross-validation
# sklearn above 0.21 may cause clone error
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
param_distribs = {
'n_hidden': [1],
'n_neurons': [10, 20],
'learning_rate': [3e-4]
}
rnd_search_cv = RandomizedSearchCV(keras_clf, param_distribs, n_iter=3, cv=3)
rnd_search_cv.fit(X_train, y_train, epochs=10)
print(rnd_search_cv.best_params_)
{'n_neurons': 20, 'n_hidden': 1, 'learning_rate': 0.0003}
model = rnd_search_cv.best_estimator_.model
model.predict(X_test)