# %% [markdown] # # Previous Class Definitions # The previously defined Layer_Dense, Activation_ReLU, and Activation_Softmax # %% # imports import numpy as np import nnfs from nnfs.datasets import spiral_data nnfs.init() # %% class Layer_Dense: def __init__(self, n_inputs, n_neurons): # Initialize the weights and biases self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights self.biases = np.zeros((1, n_neurons)) def forward(self, inputs): # Calculate the output values from inputs, weights, and biases self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed class Activation_ReLU: def forward(self, inputs): self.output = np.maximum(0, inputs) class Activation_Softmax: def forward(self, inputs): # Get the unnormalized probabilities # Subtract max from the row to prevent larger numbers exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True)) # Normalize the probabilities with element wise division probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True) self.output = probabilities # %% [markdown] # # Forward Pass with No Loss Consideration # 2 input neural network with 2 layers of 3 neurons each. ReLU activation in the first layer with Softmax in the second layer to normalize the outputs. # %% # Create dataset X, y = spiral_data(samples=100, classes=3) # Create Dense layer with 2 input features and 3 output values dense1 = Layer_Dense(2, 3) # Create ReLU activation (to be used with Dense layer): activation1 = Activation_ReLU() # Create second Dense layer with 3 input features (as we take output # of previous layer here) and 3 output values dense2 = Layer_Dense(3, 3) # Create Softmax activation (to be used with Dense layer): activation2 = Activation_Softmax() # Make a forward pass of our training data through this layer dense1.forward(X) # Make a forward pass through activation function # it takes the output of first dense layer here activation1.forward(dense1.output) # Make a forward pass through second Dense layer # it takes outputs of activation function of first layer as inputs dense2.forward(activation1.output) # Make a forward pass through activation function # it takes the output of second dense layer here activation2.forward(dense2.output) # Let's see output of the first few samples: print(activation2.output[:5]) # %% [markdown] # # Calculating Network Error with Categorical Cross Entropy Loss # loss = negative sum of the expected output * log(neural network output) # loss = - sum(expected_i * log(nn_output_i)) for all i in outputs # # In the classification case, incorrect outputs do not end up mattering as the expected_i for the wrong class is 0. # # %% nn_outputs = np.array([ [0.7, 0.1, 0.2], [0.1, 0.5, 0.4], [0.02, 0.9, 0.08]]) class_targets = [0, 1, 1] losses = -np.log(nn_outputs[range(len(nn_outputs)), class_targets]) print(f"Losses: {losses}") print(f"Average Loss: {np.average(losses)}") # %% [markdown] # ## Loss with One Hot Encoding # Classification typically has the expected output to be all zero except for the class the inputs belong too. This leads to simplfiying the cross entropy loss calculation. # %% true_output = np.array([ [1, 0, 0], [0, 1, 0], [0, 1, 0] ]) nn_output = np.array([ [0.7, 0.2, 0.1], [0.1, 0.5, 0.4], [0.02, 0.9, 0.08] ]) # Element by element multiplication "erases" the output terms corresponding with 0 A = true_output*nn_output # Sum the columns (ie, sum every element in row 0, then row 1, etc) because each row is a batch of output B = np.sum(A, axis = 1) # Get the cross entropy loss C = -np.log(B) print(f"Losses: {C}") print(f"Average Loss: {np.mean(C)}") # %% [markdown] # ## Implementing the Loss Class # %% # Base class for Loss functions class Loss: '''Calculates the data and regularization losses given model output and ground truth values''' def calculate(self, output, y): sample_losses = self.forward(output, y) data_loss = np.average(sample_losses) return data_loss # %% [markdown] # ## Implementing the Categorical Cross Entropy Loss Class # %% class Loss_CategoricalCrossEntropy(Loss): def forward(self, y_pred, y_true): '''y_pred is the neural network output y_true is the ideal output of the neural network''' samples = len(y_pred) # Bound the predicted values y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7) if len(y_true.shape) == 1: # Categorically labeled correct_confidences = y_pred_clipped[range(samples), y_true] elif len(y_true.shape) == 2: # One hot encoded correct_confidences = np.sum(y_pred_clipped*y_true, axis=1) # Calculate the losses negative_log_likelihoods = -np.log(correct_confidences) return negative_log_likelihoods # %% nn_outputs = np.array([ [0.7, 0.1, 0.2], [0.1, 0.5, 0.4], [0.02, 0.9, 0.08]]) class_targets = np.array([ [1, 0, 0], [0, 1, 0], [0, 1, 0]]) loss_function = Loss_CategoricalCrossEntropy() losses = loss_function.calculate(nn_outputs, class_targets) print(f"Losses: {losses}") print(f"Average Loss: {np.average(losses)}") # %% [markdown] # # Introducing Accuracy # In the simple example, if the highest value in the outputs align with the correct classification, then that accuracy is 1. Even if it was 51% red and 49% blue, and the true output is red, it would be considered fully accurate. # %% nn_outputs = np.array([ [0.7, 0.1, 0.2], [0.1, 0.5, 0.4], [0.02, 0.9, 0.08]]) class_targets = np.array([ [1, 0, 0], [0, 1, 0], [0, 1, 0]]) # Calculate the losses loss_function = Loss_CategoricalCrossEntropy() losses = loss_function.calculate(nn_outputs, class_targets) print(f"Losses: {losses}") print(f"Average Loss: {np.average(losses)}") # Calculate the accuracy predictions = np.argmax(nn_outputs, axis=1) # If targets are one-hot encoded - convert them if len(class_targets.shape) == 2: class_targets = np.argmax(class_targets, axis=1) # True evaluates to 1; False to 0 accuracy = np.mean(predictions == class_targets) print(f"Accuracy: {accuracy}")