Neural-Networks-From-Scratch/lecture07/notes_07.py

196 lines
6.2 KiB
Python

# %% [markdown]
# # Previous Class Definitions
# The previously defined Layer_Dense, Activation_ReLU, and Activation_Softmax
# %%
# imports
import numpy as np
import nnfs
from nnfs.datasets import spiral_data
nnfs.init()
# %%
class Layer_Dense:
def __init__(self, n_inputs, n_neurons):
# Initialize the weights and biases
self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights
self.biases = np.zeros((1, n_neurons))
def forward(self, inputs):
# Calculate the output values from inputs, weights, and biases
self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed
class Activation_ReLU:
def forward(self, inputs):
self.output = np.maximum(0, inputs)
class Activation_Softmax:
def forward(self, inputs):
# Get the unnormalized probabilities
# Subtract max from the row to prevent larger numbers
exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
# Normalize the probabilities with element wise division
probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)
self.output = probabilities
# %% [markdown]
# # Forward Pass with No Loss Consideration
# 2 input neural network with 2 layers of 3 neurons each. ReLU activation in the first layer with Softmax in the second layer to normalize the outputs.
# %%
# Create dataset
X, y = spiral_data(samples=100, classes=3)
# Create Dense layer with 2 input features and 3 output values
dense1 = Layer_Dense(2, 3)
# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_ReLU()
# Create second Dense layer with 3 input features (as we take output
# of previous layer here) and 3 output values
dense2 = Layer_Dense(3, 3)
# Create Softmax activation (to be used with Dense layer):
activation2 = Activation_Softmax()
# Make a forward pass of our training data through this layer
dense1.forward(X)
# Make a forward pass through activation function
# it takes the output of first dense layer here
activation1.forward(dense1.output)
# Make a forward pass through second Dense layer
# it takes outputs of activation function of first layer as inputs
dense2.forward(activation1.output)
# Make a forward pass through activation function
# it takes the output of second dense layer here
activation2.forward(dense2.output)
# Let's see output of the first few samples:
print(activation2.output[:5])
# %% [markdown]
# # Calculating Network Error with Categorical Cross Entropy Loss
# loss = negative sum of the expected output * log(neural network output)
# loss = - sum(expected_i * log(nn_output_i)) for all i in outputs
#
# In the classification case, incorrect outputs do not end up mattering as the expected_i for the wrong class is 0.
#
# %%
nn_outputs = np.array([
[0.7, 0.1, 0.2],
[0.1, 0.5, 0.4],
[0.02, 0.9, 0.08]])
class_targets = [0, 1, 1]
losses = -np.log(nn_outputs[range(len(nn_outputs)), class_targets])
print(f"Losses: {losses}")
print(f"Average Loss: {np.average(losses)}")
# %% [markdown]
# ## Loss with One Hot Encoding
# Classification typically has the expected output to be all zero except for the class the inputs belong too. This leads to simplfiying the cross entropy loss calculation.
# %%
true_output = np.array([
[1, 0, 0],
[0, 1, 0],
[0, 1, 0]
])
nn_output = np.array([
[0.7, 0.2, 0.1],
[0.1, 0.5, 0.4],
[0.02, 0.9, 0.08]
])
# Element by element multiplication "erases" the output terms corresponding with 0
A = true_output*nn_output
# Sum the columns (ie, sum every element in row 0, then row 1, etc) because each row is a batch of output
B = np.sum(A, axis = 1)
# Get the cross entropy loss
C = -np.log(B)
print(f"Losses: {C}")
print(f"Average Loss: {np.mean(C)}")
# %% [markdown]
# ## Implementing the Loss Class
# %%
# Base class for Loss functions
class Loss:
'''Calculates the data and regularization losses given
model output and ground truth values'''
def calculate(self, output, y):
sample_losses = self.forward(output, y)
data_loss = np.average(sample_losses)
return data_loss
# %% [markdown]
# ## Implementing the Categorical Cross Entropy Loss Class
# %%
class Loss_CategoricalCrossEntropy(Loss):
def forward(self, y_pred, y_true):
'''y_pred is the neural network output
y_true is the ideal output of the neural network'''
samples = len(y_pred)
# Bound the predicted values
y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
if len(y_true.shape) == 1: # Categorically labeled
correct_confidences = y_pred_clipped[range(samples), y_true]
elif len(y_true.shape) == 2: # One hot encoded
correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
# Calculate the losses
negative_log_likelihoods = -np.log(correct_confidences)
return negative_log_likelihoods
# %%
nn_outputs = np.array([
[0.7, 0.1, 0.2],
[0.1, 0.5, 0.4],
[0.02, 0.9, 0.08]])
class_targets = np.array([
[1, 0, 0],
[0, 1, 0],
[0, 1, 0]])
loss_function = Loss_CategoricalCrossEntropy()
losses = loss_function.calculate(nn_outputs, class_targets)
print(f"Losses: {losses}")
print(f"Average Loss: {np.average(losses)}")
# %% [markdown]
# # Introducing Accuracy
# In the simple example, if the highest value in the outputs align with the correct classification, then that accuracy is 1. Even if it was 51% red and 49% blue, and the true output is red, it would be considered fully accurate.
# %%
nn_outputs = np.array([
[0.7, 0.1, 0.2],
[0.1, 0.5, 0.4],
[0.02, 0.9, 0.08]])
class_targets = np.array([
[1, 0, 0],
[0, 1, 0],
[0, 1, 0]])
# Calculate the losses
loss_function = Loss_CategoricalCrossEntropy()
losses = loss_function.calculate(nn_outputs, class_targets)
print(f"Losses: {losses}")
print(f"Average Loss: {np.average(losses)}")
# Calculate the accuracy
predictions = np.argmax(nn_outputs, axis=1)
# If targets are one-hot encoded - convert them
if len(class_targets.shape) == 2:
class_targets = np.argmax(class_targets, axis=1)
# True evaluates to 1; False to 0
accuracy = np.mean(predictions == class_targets)
print(f"Accuracy: {accuracy}")