Neural-Networks-From-Scratch/lecture12/notes_12.py
2024-12-04 22:25:12 +00:00

131 lines
4.5 KiB
Python

# %% [markdown]
# # Previous Class Definitions
# The previously defined Layer_Dense, Activation_ReLU, Activation_Softmax, Loss, and Loss_CategoricalCrossEntropy classes.
# %%
# imports
import matplotlib.pyplot as plt
import numpy as np
import nnfs
from nnfs.datasets import spiral_data, vertical_data
nnfs.init()
# %%
class Layer_Dense:
def __init__(self, n_inputs, n_neurons):
# Initialize the weights and biases
self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights
self.biases = np.zeros((1, n_neurons))
def forward(self, inputs):
# Calculate the output values from inputs, weights, and biases
self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed
class Activation_ReLU:
def forward(self, inputs):
self.output = np.maximum(0, inputs)
class Activation_Softmax:
def forward(self, inputs):
# Get the unnormalized probabilities
# Subtract max from the row to prevent larger numbers
exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
# Normalize the probabilities with element wise division
probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)
self.output = probabilities
# Base class for Loss functions
class Loss:
'''Calculates the data and regularization losses given
model output and ground truth values'''
def calculate(self, output, y):
sample_losses = self.forward(output, y)
data_loss = np.average(sample_losses)
return data_loss
class Loss_CategoricalCrossEntropy(Loss):
def forward(self, y_pred, y_true):
'''y_pred is the neural network output
y_true is the ideal output of the neural network'''
samples = len(y_pred)
# Bound the predicted values
y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
if len(y_true.shape) == 1: # Categorically labeled
correct_confidences = y_pred_clipped[range(samples), y_true]
elif len(y_true.shape) == 2: # One hot encoded
correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
# Calculate the losses
negative_log_likelihoods = -np.log(correct_confidences)
return negative_log_likelihoods
# %% [markdown]
# # Backpropagation of a Single Neuron
# Backpropagation helps us find the gradient of the neural network with respect to each of the parameters (weights and biases) of each neuron.
#
# Imagine a layer that has 3 inputs and 1 neuron. There are 3 inputs (x0, x1, x2), three weights (w0, w1, w2), 1 bias (b0), and 1 output (z). There is a ReLU activation layer after the neuron output going into a square loss function (loss = z^2).
#
# Loss = (ReLU(sum(mul(x0, w0), mul(x1, w1), mul(x2, w2(, b0)))))^2
#
# $\frac{\delta Loss()}{\delta w0} = \frac{\delta Loss()}{\delta ReLU()} * \frac{\delta ReLU()}{\delta sum()} * \frac{\delta sum()}{\delta mul(x0, w0)} * \frac{\delta mul(x0, w0)}{\delta w0}$
#
# $\frac{\delta Loss()}{\delta ReLU()} = 2 * ReLU(sum(...))$
#
# $\frac{\delta ReLU()}{\delta sum()}$ = 0 if sum(...) is less than 0 and 1 if sum(...) is greater than 0
#
# $\frac{\delta sum()}{\delta mul(x0, w0)} = 1$
#
# $\frac{\delta mul(x0, w0)}{\delta w0} = x0$
#
# This is repeated for w0, w1, w2, b0.
#
# We then use numerical differentiation to approximate the gradient. Then, we update the parameters using small step sizes, such that $w0[i+1] = w0[i] - step*\frac{\delta Loss()}{\delta w0}$
#
# %%
import numpy as np
# Initial parameters
weights = np.array([-3.0, -1.0, 2.0])
bias = 1.0
inputs = np.array([1.0, -2.0, 3.0])
target_output = 0.0
learning_rate = 0.001
def relu(x):
return np.maximum(0, x)
def relu_derivative(x):
return np.where(x > 0, 1.0, 0.0)
for iteration in range(200):
# Forward pass
linear_output = np.dot(weights, inputs) + bias
output = relu(linear_output)
loss = (output - target_output) ** 2
# Backward pass to calculate gradient
dloss_doutput = 2 * (output - target_output)
doutput_dlinear = relu_derivative(linear_output)
dlinear_dweights = inputs
dlinear_dbias = 1.0
dloss_dlinear = dloss_doutput * doutput_dlinear
dloss_dweights = dloss_dlinear * dlinear_dweights
dloss_dbias = dloss_dlinear * dlinear_dbias
# Update weights and bias
weights -= learning_rate * dloss_dweights
bias -= learning_rate * dloss_dbias
# Print the loss for this iteration
print(f"Iteration {iteration + 1}, Loss: {loss}")
print("Final weights:", weights)
print("Final bias:", bias)