# %% [markdown] # # Previous Class Definitions # The previously defined Layer_Dense, Activation_ReLU, Activation_Softmax, Loss, and Loss_CategoricalCrossEntropy classes. # %% # imports import matplotlib.pyplot as plt import numpy as np import nnfs from nnfs.datasets import spiral_data, vertical_data nnfs.init() # %% class Layer_Dense: def __init__(self, n_inputs, n_neurons): # Initialize the weights and biases self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights self.biases = np.zeros((1, n_neurons)) def forward(self, inputs): # Calculate the output values from inputs, weights, and biases self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed class Activation_ReLU: def forward(self, inputs): self.output = np.maximum(0, inputs) class Activation_Softmax: def forward(self, inputs): # Get the unnormalized probabilities # Subtract max from the row to prevent larger numbers exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True)) # Normalize the probabilities with element wise division probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True) self.output = probabilities # Base class for Loss functions class Loss: '''Calculates the data and regularization losses given model output and ground truth values''' def calculate(self, output, y): sample_losses = self.forward(output, y) data_loss = np.average(sample_losses) return data_loss class Loss_CategoricalCrossEntropy(Loss): def forward(self, y_pred, y_true): '''y_pred is the neural network output y_true is the ideal output of the neural network''' samples = len(y_pred) # Bound the predicted values y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7) if len(y_true.shape) == 1: # Categorically labeled correct_confidences = y_pred_clipped[range(samples), y_true] elif len(y_true.shape) == 2: # One hot encoded correct_confidences = np.sum(y_pred_clipped*y_true, axis=1) # Calculate the losses negative_log_likelihoods = -np.log(correct_confidences) return negative_log_likelihoods # %% [markdown] # # Backpropagation of a Single Neuron # Backpropagation helps us find the gradient of the neural network with respect to each of the parameters (weights and biases) of each neuron. # # Imagine a layer that has 3 inputs and 1 neuron. There are 3 inputs (x0, x1, x2), three weights (w0, w1, w2), 1 bias (b0), and 1 output (z). There is a ReLU activation layer after the neuron output going into a square loss function (loss = z^2). # # Loss = (ReLU(sum(mul(x0, w0), mul(x1, w1), mul(x2, w2(, b0)))))^2 # # $\frac{\delta Loss()}{\delta w0} = \frac{\delta Loss()}{\delta ReLU()} * \frac{\delta ReLU()}{\delta sum()} * \frac{\delta sum()}{\delta mul(x0, w0)} * \frac{\delta mul(x0, w0)}{\delta w0}$ # # $\frac{\delta Loss()}{\delta ReLU()} = 2 * ReLU(sum(...))$ # # $\frac{\delta ReLU()}{\delta sum()}$ = 0 if sum(...) is less than 0 and 1 if sum(...) is greater than 0 # # $\frac{\delta sum()}{\delta mul(x0, w0)} = 1$ # # $\frac{\delta mul(x0, w0)}{\delta w0} = x0$ # # This is repeated for w0, w1, w2, b0. # # We then use numerical differentiation to approximate the gradient. Then, we update the parameters using small step sizes, such that $w0[i+1] = w0[i] - step*\frac{\delta Loss()}{\delta w0}$ # # %% import numpy as np # Initial parameters weights = np.array([-3.0, -1.0, 2.0]) bias = 1.0 inputs = np.array([1.0, -2.0, 3.0]) target_output = 0.0 learning_rate = 0.001 def relu(x): return np.maximum(0, x) def relu_derivative(x): return np.where(x > 0, 1.0, 0.0) for iteration in range(200): # Forward pass linear_output = np.dot(weights, inputs) + bias output = relu(linear_output) loss = (output - target_output) ** 2 # Backward pass to calculate gradient dloss_doutput = 2 * (output - target_output) doutput_dlinear = relu_derivative(linear_output) dlinear_dweights = inputs dlinear_dbias = 1.0 dloss_dlinear = dloss_doutput * doutput_dlinear dloss_dweights = dloss_dlinear * dlinear_dweights dloss_dbias = dloss_dlinear * dlinear_dbias # Update weights and bias weights -= learning_rate * dloss_dweights bias -= learning_rate * dloss_dbias # Print the loss for this iteration print(f"Iteration {iteration + 1}, Loss: {loss}") print("Final weights:", weights) print("Final bias:", bias)