381 lines
13 KiB
Python
381 lines
13 KiB
Python
# %% [markdown]
|
|
# # Previous Class Definitions
|
|
# The previously defined Layer_Dense, Activation_ReLU, and Activation_Softmax
|
|
|
|
# %%
|
|
# imports
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import nnfs
|
|
from nnfs.datasets import spiral_data, vertical_data
|
|
nnfs.init()
|
|
|
|
# %%
|
|
class Layer_Dense:
|
|
def __init__(self, n_inputs, n_neurons):
|
|
# Initialize the weights and biases
|
|
self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights
|
|
self.biases = np.zeros((1, n_neurons))
|
|
|
|
def forward(self, inputs):
|
|
# Calculate the output values from inputs, weights, and biases
|
|
self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed
|
|
|
|
class Activation_ReLU:
|
|
def forward(self, inputs):
|
|
self.output = np.maximum(0, inputs)
|
|
|
|
class Activation_Softmax:
|
|
def forward(self, inputs):
|
|
# Get the unnormalized probabilities
|
|
# Subtract max from the row to prevent larger numbers
|
|
exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
|
|
|
|
# Normalize the probabilities with element wise division
|
|
probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)
|
|
self.output = probabilities
|
|
|
|
# %% [markdown]
|
|
# # Forward Pass with No Loss Consideration
|
|
# 2 input neural network with 2 layers of 3 neurons each. ReLU activation in the first layer with Softmax in the second layer to normalize the outputs.
|
|
|
|
# %%
|
|
# Create dataset
|
|
X, y = spiral_data(samples=100, classes=3)
|
|
# Create Dense layer with 2 input features and 3 output values
|
|
dense1 = Layer_Dense(2, 3)
|
|
# Create ReLU activation (to be used with Dense layer):
|
|
activation1 = Activation_ReLU()
|
|
# Create second Dense layer with 3 input features (as we take output
|
|
# of previous layer here) and 3 output values
|
|
dense2 = Layer_Dense(3, 3)
|
|
# Create Softmax activation (to be used with Dense layer):
|
|
activation2 = Activation_Softmax()
|
|
|
|
# Make a forward pass of our training data through this layer
|
|
dense1.forward(X)
|
|
|
|
# Make a forward pass through activation function
|
|
# it takes the output of first dense layer here
|
|
activation1.forward(dense1.output)
|
|
# Make a forward pass through second Dense layer
|
|
# it takes outputs of activation function of first layer as inputs
|
|
dense2.forward(activation1.output)
|
|
# Make a forward pass through activation function
|
|
# it takes the output of second dense layer here
|
|
activation2.forward(dense2.output)
|
|
# Let's see output of the first few samples:
|
|
print(activation2.output[:5])
|
|
|
|
# %% [markdown]
|
|
# # Calculating Network Error with Categorical Cross Entropy Loss
|
|
# loss = negative sum of the expected output * log(neural network output)
|
|
# loss = - sum(expected_i * log(nn_output_i)) for all i in outputs
|
|
#
|
|
# In the classification case, incorrect outputs do not end up mattering as the expected_i for the wrong class is 0.
|
|
#
|
|
|
|
# %%
|
|
nn_outputs = np.array([
|
|
[0.7, 0.1, 0.2],
|
|
[0.1, 0.5, 0.4],
|
|
[0.02, 0.9, 0.08]])
|
|
class_targets = [0, 1, 1]
|
|
losses = -np.log(nn_outputs[range(len(nn_outputs)), class_targets])
|
|
print(f"Losses: {losses}")
|
|
print(f"Average Loss: {np.average(losses)}")
|
|
|
|
# %% [markdown]
|
|
# ## Loss with One Hot Encoding
|
|
# Classification typically has the expected output to be all zero except for the class the inputs belong too. This leads to simplfiying the cross entropy loss calculation.
|
|
|
|
# %%
|
|
true_output = np.array([
|
|
[1, 0, 0],
|
|
[0, 1, 0],
|
|
[0, 1, 0]
|
|
])
|
|
|
|
nn_output = np.array([
|
|
[0.7, 0.2, 0.1],
|
|
[0.1, 0.5, 0.4],
|
|
[0.02, 0.9, 0.08]
|
|
])
|
|
|
|
# Element by element multiplication "erases" the output terms corresponding with 0
|
|
A = true_output*nn_output
|
|
|
|
# Sum the columns (ie, sum every element in row 0, then row 1, etc) because each row is a batch of output
|
|
B = np.sum(A, axis = 1)
|
|
|
|
# Get the cross entropy loss
|
|
C = -np.log(B)
|
|
|
|
print(f"Losses: {C}")
|
|
print(f"Average Loss: {np.mean(C)}")
|
|
|
|
|
|
# %% [markdown]
|
|
# ## Implementing the Loss Class
|
|
|
|
# %%
|
|
# Base class for Loss functions
|
|
class Loss:
|
|
'''Calculates the data and regularization losses given
|
|
model output and ground truth values'''
|
|
def calculate(self, output, y):
|
|
sample_losses = self.forward(output, y)
|
|
data_loss = np.average(sample_losses)
|
|
return data_loss
|
|
|
|
# %% [markdown]
|
|
# ## Implementing the Categorical Cross Entropy Loss Class
|
|
|
|
# %%
|
|
class Loss_CategoricalCrossEntropy(Loss):
|
|
def forward(self, y_pred, y_true):
|
|
'''y_pred is the neural network output
|
|
y_true is the ideal output of the neural network'''
|
|
samples = len(y_pred)
|
|
# Bound the predicted values
|
|
y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
|
|
|
|
if len(y_true.shape) == 1: # Categorically labeled
|
|
correct_confidences = y_pred_clipped[range(samples), y_true]
|
|
elif len(y_true.shape) == 2: # One hot encoded
|
|
correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
|
|
|
|
# Calculate the losses
|
|
negative_log_likelihoods = -np.log(correct_confidences)
|
|
return negative_log_likelihoods
|
|
|
|
# %%
|
|
nn_outputs = np.array([
|
|
[0.7, 0.1, 0.2],
|
|
[0.1, 0.5, 0.4],
|
|
[0.02, 0.9, 0.08]])
|
|
class_targets = np.array([
|
|
[1, 0, 0],
|
|
[0, 1, 0],
|
|
[0, 1, 0]])
|
|
|
|
loss_function = Loss_CategoricalCrossEntropy()
|
|
losses = loss_function.calculate(nn_outputs, class_targets)
|
|
print(f"Losses: {losses}")
|
|
print(f"Average Loss: {np.average(losses)}")
|
|
|
|
# %% [markdown]
|
|
# # Introducing Accuracy
|
|
# In the simple example, if the highest value in the outputs align with the correct classification, then that accuracy is 1. Even if it was 51% red and 49% blue, and the true output is red, it would be considered fully accurate.
|
|
|
|
# %%
|
|
nn_outputs = np.array([
|
|
[0.7, 0.1, 0.2],
|
|
[0.1, 0.5, 0.4],
|
|
[0.02, 0.9, 0.08]])
|
|
class_targets = np.array([
|
|
[1, 0, 0],
|
|
[0, 1, 0],
|
|
[0, 1, 0]])
|
|
|
|
# Calculate the losses
|
|
loss_function = Loss_CategoricalCrossEntropy()
|
|
losses = loss_function.calculate(nn_outputs, class_targets)
|
|
print(f"Losses: {losses}")
|
|
print(f"Average Loss: {np.average(losses)}")
|
|
|
|
# Calculate the accuracy
|
|
predictions = np.argmax(nn_outputs, axis=1)
|
|
# If targets are one-hot encoded - convert them
|
|
if len(class_targets.shape) == 2:
|
|
class_targets = np.argmax(class_targets, axis=1)
|
|
# True evaluates to 1; False to 0
|
|
accuracy = np.mean(predictions == class_targets)
|
|
print(f"Accuracy: {accuracy}")
|
|
|
|
# %% [markdown]
|
|
# # The Need for Optimization
|
|
|
|
# %%
|
|
#SIMPLER DATASET
|
|
nnfs.init()
|
|
X, y = vertical_data(samples=100, classes=3)
|
|
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap='brg')
|
|
plt.show()
|
|
|
|
# %% [markdown]
|
|
# # Test Strategy 1: Randomly Select Weights and Biases
|
|
# For a large number of tests, randomly set weights and biases and look at accuracy.
|
|
|
|
# %%
|
|
# Create dataset
|
|
X, y = vertical_data(samples=100, classes=3)
|
|
|
|
# Create model
|
|
dense1 = Layer_Dense(2, 3) # first dense layer, 2 inputs
|
|
activation1 = Activation_ReLU()
|
|
dense2 = Layer_Dense(3, 3) # second dense layer, 3 inputs, 3 outputs
|
|
activation2 = Activation_Softmax()
|
|
|
|
# Create loss function
|
|
loss_function = Loss_CategoricalCrossEntropy()
|
|
|
|
# Helper variables
|
|
lowest_loss = 9999999 # some initial value
|
|
best_dense1_weights = dense1.weights.copy()
|
|
best_dense1_biases = dense1.biases.copy()
|
|
best_dense2_weights = dense2.weights.copy()
|
|
best_dense2_biases = dense2.biases.copy()
|
|
|
|
for iteration in range(10000):
|
|
# Generate a new set of weights for iteration
|
|
dense1.weights = 0.05 * np.random.randn(2, 3)
|
|
dense1.biases = 0.05 * np.random.randn(1, 3)
|
|
dense2.weights = 0.05 * np.random.randn(3, 3)
|
|
dense2.biases = 0.05 * np.random.randn(1, 3)
|
|
|
|
# Perform a forward pass of the training data through this layer
|
|
dense1.forward(X)
|
|
activation1.forward(dense1.output)
|
|
dense2.forward(activation1.output)
|
|
activation2.forward(dense2.output)
|
|
|
|
# Perform a forward pass through activation function
|
|
# it takes the output of second dense layer here and returns loss
|
|
loss = loss_function.calculate(activation2.output, y)
|
|
|
|
# Calculate accuracy from output of activation2 and targets
|
|
# calculate values along first axis
|
|
predictions = np.argmax(activation2.output, axis=1)
|
|
accuracy = np.mean(predictions == y)
|
|
|
|
# If loss is smaller - print and save weights and biases aside
|
|
if loss < lowest_loss:
|
|
print('New set of weights found, iteration:', iteration,'loss:', loss, 'acc:', accuracy)
|
|
best_dense1_weights = dense1.weights.copy()
|
|
best_dense1_biases = dense1.biases.copy()
|
|
best_dense2_weights = dense2.weights.copy()
|
|
best_dense2_biases = dense2.biases.copy()
|
|
lowest_loss = loss
|
|
|
|
# %% [markdown]
|
|
# # Test Strategy 2: Randomly Adjust Weights and Biases
|
|
# For a large number of tests with a starting weight and bias, update the weights and biases by some small, random value. If the new accuracy is higher, keep the weights and biases. If the new accuracy is lower, revert back to the last weights and biases.
|
|
|
|
# %%
|
|
# Create dataset
|
|
X, y = vertical_data(samples=100, classes=3)
|
|
|
|
# Create model
|
|
dense1 = Layer_Dense(2, 3) # first dense layer, 2 inputs
|
|
activation1 = Activation_ReLU()
|
|
dense2 = Layer_Dense(3, 3) # second dense layer, 3 inputs, 3 outputs
|
|
activation2 = Activation_Softmax()
|
|
|
|
# Create loss function
|
|
loss_function = Loss_CategoricalCrossEntropy()
|
|
|
|
# Helper variables
|
|
lowest_loss = 9999999 # some initial value
|
|
best_dense1_weights = dense1.weights.copy()
|
|
best_dense1_biases = dense1.biases.copy()
|
|
best_dense2_weights = dense2.weights.copy()
|
|
best_dense2_biases = dense2.biases.copy()
|
|
for iteration in range(10000):
|
|
# Update weights with some small random values
|
|
dense1.weights += 0.05 * np.random.randn(2, 3)
|
|
dense1.biases += 0.05 * np.random.randn(1, 3)
|
|
dense2.weights += 0.05 * np.random.randn(3, 3)
|
|
dense2.biases += 0.05 * np.random.randn(1, 3)
|
|
|
|
# Perform a forward pass of our training data through this layer
|
|
dense1.forward(X)
|
|
activation1.forward(dense1.output)
|
|
dense2.forward(activation1.output)
|
|
activation2.forward(dense2.output)
|
|
|
|
# Perform a forward pass through activation function
|
|
# it takes the output of second dense layer here and returns loss
|
|
loss = loss_function.calculate(activation2.output, y)
|
|
|
|
# Calculate accuracy from output of activation2 and targets
|
|
# calculate values along first axis
|
|
predictions = np.argmax(activation2.output, axis=1)
|
|
accuracy = np.mean(predictions == y)
|
|
|
|
# If loss is smaller - print and save weights and biases aside
|
|
if loss < lowest_loss:
|
|
print('New set of weights found, iteration:', iteration,'loss:', loss, 'acc:', accuracy)
|
|
best_dense1_weights = dense1.weights.copy()
|
|
best_dense1_biases = dense1.biases.copy()
|
|
best_dense2_weights = dense2.weights.copy()
|
|
best_dense2_biases = dense2.biases.copy()
|
|
lowest_loss = loss
|
|
# Revert weights and biases
|
|
else:
|
|
dense1.weights = best_dense1_weights.copy()
|
|
dense1.biases = best_dense1_biases.copy()
|
|
dense2.weights = best_dense2_weights.copy()
|
|
dense2.biases = best_dense2_biases.copy()
|
|
|
|
# %% [markdown]
|
|
# # Test Strategy 2 on Spiral Dataset
|
|
|
|
# %%
|
|
# Create dataset
|
|
X, y = spiral_data(samples=100, classes=3)
|
|
|
|
# Create model
|
|
dense1 = Layer_Dense(2, 3) # first dense layer, 2 inputs
|
|
activation1 = Activation_ReLU()
|
|
dense2 = Layer_Dense(3, 3) # second dense layer, 3 inputs, 3 outputs
|
|
activation2 = Activation_Softmax()
|
|
|
|
# Create loss function
|
|
loss_function = Loss_CategoricalCrossEntropy()
|
|
|
|
# Helper variables
|
|
lowest_loss = 9999999 # some initial value
|
|
best_dense1_weights = dense1.weights.copy()
|
|
best_dense1_biases = dense1.biases.copy()
|
|
best_dense2_weights = dense2.weights.copy()
|
|
best_dense2_biases = dense2.biases.copy()
|
|
for iteration in range(10000):
|
|
# Update weights with some small random values
|
|
dense1.weights += 0.05 * np.random.randn(2, 3)
|
|
dense1.biases += 0.05 * np.random.randn(1, 3)
|
|
dense2.weights += 0.05 * np.random.randn(3, 3)
|
|
dense2.biases += 0.05 * np.random.randn(1, 3)
|
|
|
|
# Perform a forward pass of our training data through this layer
|
|
dense1.forward(X)
|
|
activation1.forward(dense1.output)
|
|
dense2.forward(activation1.output)
|
|
activation2.forward(dense2.output)
|
|
|
|
# Perform a forward pass through activation function
|
|
# it takes the output of second dense layer here and returns loss
|
|
loss = loss_function.calculate(activation2.output, y)
|
|
|
|
# Calculate accuracy from output of activation2 and targets
|
|
# calculate values along first axis
|
|
predictions = np.argmax(activation2.output, axis=1)
|
|
accuracy = np.mean(predictions == y)
|
|
|
|
# If loss is smaller - print and save weights and biases aside
|
|
if loss < lowest_loss:
|
|
print('New set of weights found, iteration:', iteration,'loss:', loss, 'acc:', accuracy)
|
|
best_dense1_weights = dense1.weights.copy()
|
|
best_dense1_biases = dense1.biases.copy()
|
|
best_dense2_weights = dense2.weights.copy()
|
|
best_dense2_biases = dense2.biases.copy()
|
|
lowest_loss = loss
|
|
# Revert weights and biases
|
|
else:
|
|
dense1.weights = best_dense1_weights.copy()
|
|
dense1.biases = best_dense1_biases.copy()
|
|
dense2.weights = best_dense2_weights.copy()
|
|
dense2.biases = best_dense2_biases.copy()
|
|
|
|
|