349 lines
10 KiB
Plaintext
349 lines
10 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Previous Class Definitions\n",
|
|
"The previously defined Layer_Dense, Activation_ReLU, and Activation_Softmax"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"import numpy as np\n",
|
|
"import nnfs\n",
|
|
"from nnfs.datasets import spiral_data\n",
|
|
"nnfs.init()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class Layer_Dense:\n",
|
|
" def __init__(self, n_inputs, n_neurons):\n",
|
|
" # Initialize the weights and biases\n",
|
|
" self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights\n",
|
|
" self.biases = np.zeros((1, n_neurons))\n",
|
|
"\n",
|
|
" def forward(self, inputs):\n",
|
|
" # Calculate the output values from inputs, weights, and biases\n",
|
|
" self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed\n",
|
|
"\n",
|
|
"class Activation_ReLU:\n",
|
|
" def forward(self, inputs):\n",
|
|
" self.output = np.maximum(0, inputs)\n",
|
|
" \n",
|
|
"class Activation_Softmax:\n",
|
|
" def forward(self, inputs):\n",
|
|
" # Get the unnormalized probabilities\n",
|
|
" # Subtract max from the row to prevent larger numbers\n",
|
|
" exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))\n",
|
|
"\n",
|
|
" # Normalize the probabilities with element wise division\n",
|
|
" probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)\n",
|
|
" self.output = probabilities"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Forward Pass with No Loss Consideration\n",
|
|
"2 input neural network with 2 layers of 3 neurons each. ReLU activation in the first layer with Softmax in the second layer to normalize the outputs."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[[0.33333334 0.33333334 0.33333334]\n",
|
|
" [0.33333316 0.3333332 0.33333364]\n",
|
|
" [0.33333287 0.3333329 0.33333418]\n",
|
|
" [0.3333326 0.33333263 0.33333477]\n",
|
|
" [0.33333233 0.3333324 0.33333528]]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Create dataset\n",
|
|
"X, y = spiral_data(samples=100, classes=3)\n",
|
|
"# Create Dense layer with 2 input features and 3 output values\n",
|
|
"dense1 = Layer_Dense(2, 3)\n",
|
|
"# Create ReLU activation (to be used with Dense layer):\n",
|
|
"activation1 = Activation_ReLU()\n",
|
|
"# Create second Dense layer with 3 input features (as we take output\n",
|
|
"# of previous layer here) and 3 output values\n",
|
|
"dense2 = Layer_Dense(3, 3)\n",
|
|
"# Create Softmax activation (to be used with Dense layer):\n",
|
|
"activation2 = Activation_Softmax()\n",
|
|
"\n",
|
|
"# Make a forward pass of our training data through this layer\n",
|
|
"dense1.forward(X)\n",
|
|
"\n",
|
|
"# Make a forward pass through activation function\n",
|
|
"# it takes the output of first dense layer here\n",
|
|
"activation1.forward(dense1.output)\n",
|
|
"# Make a forward pass through second Dense layer\n",
|
|
"# it takes outputs of activation function of first layer as inputs\n",
|
|
"dense2.forward(activation1.output)\n",
|
|
"# Make a forward pass through activation function\n",
|
|
"# it takes the output of second dense layer here\n",
|
|
"activation2.forward(dense2.output)\n",
|
|
"# Let's see output of the first few samples:\n",
|
|
"print(activation2.output[:5])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Calculating Network Error with Categorical Cross Entropy Loss\n",
|
|
"loss = negative sum of the expected output * log(neural network output)\n",
|
|
"loss = - sum(expected_i * log(nn_output_i)) for all i in outputs\n",
|
|
"\n",
|
|
"In the classification case, incorrect outputs do not end up mattering as the expected_i for the wrong class is 0.\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Losses: [0.35667494 0.69314718 0.10536052]\n",
|
|
"Average Loss: 0.38506088005216804\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"nn_outputs = np.array([\n",
|
|
" [0.7, 0.1, 0.2],\n",
|
|
" [0.1, 0.5, 0.4],\n",
|
|
" [0.02, 0.9, 0.08]])\n",
|
|
"class_targets = [0, 1, 1]\n",
|
|
"losses = -np.log(nn_outputs[range(len(nn_outputs)), class_targets])\n",
|
|
"print(f\"Losses: {losses}\")\n",
|
|
"print(f\"Average Loss: {np.average(losses)}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Loss with One Hot Encoding\n",
|
|
"Classification typically has the expected output to be all zero except for the class the inputs belong too. This leads to simplfiying the cross entropy loss calculation."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Losses: [0.35667494 0.69314718 0.10536052]\n",
|
|
"Average Loss: 0.38506088005216804\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"true_output = np.array([\n",
|
|
" [1, 0, 0],\n",
|
|
" [0, 1, 0],\n",
|
|
" [0, 1, 0]\n",
|
|
"])\n",
|
|
"\n",
|
|
"nn_output = np.array([\n",
|
|
" [0.7, 0.2, 0.1],\n",
|
|
" [0.1, 0.5, 0.4],\n",
|
|
" [0.02, 0.9, 0.08]\n",
|
|
"])\n",
|
|
"\n",
|
|
"# Element by element multiplication \"erases\" the output terms corresponding with 0\n",
|
|
"A = true_output*nn_output\n",
|
|
"\n",
|
|
"# Sum the columns (ie, sum every element in row 0, then row 1, etc) because each row is a batch of output\n",
|
|
"B = np.sum(A, axis = 1)\n",
|
|
"\n",
|
|
"# Get the cross entropy loss\n",
|
|
"C = -np.log(B)\n",
|
|
"\n",
|
|
"print(f\"Losses: {C}\")\n",
|
|
"print(f\"Average Loss: {np.mean(C)}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Implementing the Loss Class"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Base class for Loss functions\n",
|
|
"class Loss:\n",
|
|
" '''Calculates the data and regularization losses given\n",
|
|
" model output and ground truth values'''\n",
|
|
" def calculate(self, output, y):\n",
|
|
" sample_losses = self.forward(output, y)\n",
|
|
" data_loss = np.average(sample_losses)\n",
|
|
" return data_loss"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Implementing the Categorical Cross Entropy Loss Class"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class Loss_CategoricalCrossEntropy(Loss):\n",
|
|
" def forward(self, y_pred, y_true):\n",
|
|
" '''y_pred is the neural network output\n",
|
|
" y_true is the ideal output of the neural network'''\n",
|
|
" samples = len(y_pred)\n",
|
|
" # Bound the predicted values \n",
|
|
" y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)\n",
|
|
" \n",
|
|
" if len(y_true.shape) == 1: # Categorically labeled\n",
|
|
" correct_confidences = y_pred_clipped[range(samples), y_true]\n",
|
|
" elif len(y_true.shape) == 2: # One hot encoded\n",
|
|
" correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)\n",
|
|
"\n",
|
|
" # Calculate the losses\n",
|
|
" negative_log_likelihoods = -np.log(correct_confidences)\n",
|
|
" return negative_log_likelihoods"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Losses: 0.38506088005216804\n",
|
|
"Average Loss: 0.38506088005216804\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"nn_outputs = np.array([\n",
|
|
" [0.7, 0.1, 0.2],\n",
|
|
" [0.1, 0.5, 0.4],\n",
|
|
" [0.02, 0.9, 0.08]])\n",
|
|
"class_targets = np.array([\n",
|
|
" [1, 0, 0],\n",
|
|
" [0, 1, 0],\n",
|
|
" [0, 1, 0]])\n",
|
|
"\n",
|
|
"loss_function = Loss_CategoricalCrossEntropy()\n",
|
|
"losses = loss_function.calculate(nn_outputs, class_targets)\n",
|
|
"print(f\"Losses: {losses}\")\n",
|
|
"print(f\"Average Loss: {np.average(losses)}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Introducing Accuracy\n",
|
|
"In the simple example, if the highest value in the outputs align with the correct classification, then that accuracy is 1. Even if it was 51% red and 49% blue, and the true output is red, it would be considered fully accurate."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Losses: 0.38506088005216804\n",
|
|
"Average Loss: 0.38506088005216804\n",
|
|
"Accuracy: 1.0\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"nn_outputs = np.array([\n",
|
|
" [0.7, 0.1, 0.2],\n",
|
|
" [0.1, 0.5, 0.4],\n",
|
|
" [0.02, 0.9, 0.08]])\n",
|
|
"class_targets = np.array([\n",
|
|
" [1, 0, 0],\n",
|
|
" [0, 1, 0],\n",
|
|
" [0, 1, 0]])\n",
|
|
"\n",
|
|
"# Calculate the losses\n",
|
|
"loss_function = Loss_CategoricalCrossEntropy()\n",
|
|
"losses = loss_function.calculate(nn_outputs, class_targets)\n",
|
|
"print(f\"Losses: {losses}\")\n",
|
|
"print(f\"Average Loss: {np.average(losses)}\")\n",
|
|
"\n",
|
|
"# Calculate the accuracy\n",
|
|
"predictions = np.argmax(nn_outputs, axis=1)\n",
|
|
"# If targets are one-hot encoded - convert them\n",
|
|
"if len(class_targets.shape) == 2:\n",
|
|
" class_targets = np.argmax(class_targets, axis=1)\n",
|
|
"# True evaluates to 1; False to 0\n",
|
|
"accuracy = np.mean(predictions == class_targets)\n",
|
|
"print(f\"Accuracy: {accuracy}\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|