131 lines
4.5 KiB
Python
131 lines
4.5 KiB
Python
# %% [markdown]
|
|
# # Previous Class Definitions
|
|
# The previously defined Layer_Dense, Activation_ReLU, Activation_Softmax, Loss, and Loss_CategoricalCrossEntropy classes.
|
|
|
|
# %%
|
|
# imports
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import nnfs
|
|
from nnfs.datasets import spiral_data, vertical_data
|
|
nnfs.init()
|
|
|
|
# %%
|
|
class Layer_Dense:
|
|
def __init__(self, n_inputs, n_neurons):
|
|
# Initialize the weights and biases
|
|
self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights
|
|
self.biases = np.zeros((1, n_neurons))
|
|
|
|
def forward(self, inputs):
|
|
# Calculate the output values from inputs, weights, and biases
|
|
self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed
|
|
|
|
class Activation_ReLU:
|
|
def forward(self, inputs):
|
|
self.output = np.maximum(0, inputs)
|
|
|
|
class Activation_Softmax:
|
|
def forward(self, inputs):
|
|
# Get the unnormalized probabilities
|
|
# Subtract max from the row to prevent larger numbers
|
|
exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
|
|
|
|
# Normalize the probabilities with element wise division
|
|
probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)
|
|
self.output = probabilities
|
|
|
|
# Base class for Loss functions
|
|
class Loss:
|
|
'''Calculates the data and regularization losses given
|
|
model output and ground truth values'''
|
|
def calculate(self, output, y):
|
|
sample_losses = self.forward(output, y)
|
|
data_loss = np.average(sample_losses)
|
|
return data_loss
|
|
|
|
class Loss_CategoricalCrossEntropy(Loss):
|
|
def forward(self, y_pred, y_true):
|
|
'''y_pred is the neural network output
|
|
y_true is the ideal output of the neural network'''
|
|
samples = len(y_pred)
|
|
# Bound the predicted values
|
|
y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
|
|
|
|
if len(y_true.shape) == 1: # Categorically labeled
|
|
correct_confidences = y_pred_clipped[range(samples), y_true]
|
|
elif len(y_true.shape) == 2: # One hot encoded
|
|
correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)
|
|
|
|
# Calculate the losses
|
|
negative_log_likelihoods = -np.log(correct_confidences)
|
|
return negative_log_likelihoods
|
|
|
|
# %% [markdown]
|
|
# # Backpropagation of a Single Neuron
|
|
# Backpropagation helps us find the gradient of the neural network with respect to each of the parameters (weights and biases) of each neuron.
|
|
#
|
|
# Imagine a layer that has 3 inputs and 1 neuron. There are 3 inputs (x0, x1, x2), three weights (w0, w1, w2), 1 bias (b0), and 1 output (z). There is a ReLU activation layer after the neuron output going into a square loss function (loss = z^2).
|
|
#
|
|
# Loss = (ReLU(sum(mul(x0, w0), mul(x1, w1), mul(x2, w2(, b0)))))^2
|
|
#
|
|
# $\frac{\delta Loss()}{\delta w0} = \frac{\delta Loss()}{\delta ReLU()} * \frac{\delta ReLU()}{\delta sum()} * \frac{\delta sum()}{\delta mul(x0, w0)} * \frac{\delta mul(x0, w0)}{\delta w0}$
|
|
#
|
|
# $\frac{\delta Loss()}{\delta ReLU()} = 2 * ReLU(sum(...))$
|
|
#
|
|
# $\frac{\delta ReLU()}{\delta sum()}$ = 0 if sum(...) is less than 0 and 1 if sum(...) is greater than 0
|
|
#
|
|
# $\frac{\delta sum()}{\delta mul(x0, w0)} = 1$
|
|
#
|
|
# $\frac{\delta mul(x0, w0)}{\delta w0} = x0$
|
|
#
|
|
# This is repeated for w0, w1, w2, b0.
|
|
#
|
|
# We then use numerical differentiation to approximate the gradient. Then, we update the parameters using small step sizes, such that $w0[i+1] = w0[i] - step*\frac{\delta Loss()}{\delta w0}$
|
|
#
|
|
|
|
# %%
|
|
import numpy as np
|
|
|
|
# Initial parameters
|
|
weights = np.array([-3.0, -1.0, 2.0])
|
|
bias = 1.0
|
|
inputs = np.array([1.0, -2.0, 3.0])
|
|
target_output = 0.0
|
|
learning_rate = 0.001
|
|
|
|
def relu(x):
|
|
return np.maximum(0, x)
|
|
|
|
def relu_derivative(x):
|
|
return np.where(x > 0, 1.0, 0.0)
|
|
|
|
for iteration in range(200):
|
|
# Forward pass
|
|
linear_output = np.dot(weights, inputs) + bias
|
|
output = relu(linear_output)
|
|
loss = (output - target_output) ** 2
|
|
|
|
# Backward pass to calculate gradient
|
|
dloss_doutput = 2 * (output - target_output)
|
|
doutput_dlinear = relu_derivative(linear_output)
|
|
dlinear_dweights = inputs
|
|
dlinear_dbias = 1.0
|
|
|
|
dloss_dlinear = dloss_doutput * doutput_dlinear
|
|
dloss_dweights = dloss_dlinear * dlinear_dweights
|
|
dloss_dbias = dloss_dlinear * dlinear_dbias
|
|
|
|
# Update weights and bias
|
|
weights -= learning_rate * dloss_dweights
|
|
bias -= learning_rate * dloss_dbias
|
|
|
|
# Print the loss for this iteration
|
|
print(f"Iteration {iteration + 1}, Loss: {loss}")
|
|
|
|
print("Final weights:", weights)
|
|
print("Final bias:", bias)
|
|
|
|
|
|
|