Lecture 8

2024-11-12 00:35:57 +00:00 · 2024-11-12 00:35:57 +00:00 · 13087d2a0b
commit 13087d2a0b
parent 40fd334e0e
2 changed files with 253 additions and 22 deletions
--- a/lecture07/handout_7.ipynb
+++ b/lecture07/handout_7.ipynb
@ -984,7 +984,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1012,7 +1012,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@ -1045,7 +1045,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
--- a/lecture07/notes_07.ipynb
+++ b/lecture07/notes_07.ipynb
@ -10,12 +10,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
+   "execution_count": 3,
+   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
@ -27,12 +23,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
-    }
-   },
+   "execution_count": 1,
+   "metadata": {},
   "outputs": [],
   "source": [
    "class Layer_Dense:\n",
@ -70,13 +62,21 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "vscode": {
-     "languageId": "plaintext"
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0.33333334 0.33333334 0.33333334]\n",
+      " [0.33333316 0.3333332  0.33333364]\n",
+      " [0.33333287 0.3333329  0.33333418]\n",
+      " [0.3333326  0.33333263 0.33333477]\n",
+      " [0.33333233 0.3333324  0.33333528]]\n"
+     ]
    }
-   },
-   "outputs": [],
+   ],
   "source": [
    "# Create dataset\n",
    "X, y = spiral_data(samples=100, classes=3)\n",
@ -105,11 +105,242 @@
    "# Let's see output of the first few samples:\n",
    "print(activation2.output[:5])"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Calculating Network Error with Categorical Cross Entropy Loss\n",
+    "loss = negative sum of the expected output * log(neural network output)\n",
+    "loss = - sum(expected_i * log(nn_output_i)) for all i in outputs\n",
+    "\n",
+    "In the classification case, incorrect outputs do not end up mattering as the expected_i for the wrong class is 0.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Losses: [0.35667494 0.69314718 0.10536052]\n",
+      "Average Loss: 0.38506088005216804\n"
+     ]
+    }
+   ],
+   "source": [
+    "nn_outputs = np.array([\n",
+    "    [0.7, 0.1, 0.2],\n",
+    "    [0.1, 0.5, 0.4],\n",
+    "    [0.02, 0.9, 0.08]])\n",
+    "class_targets = [0, 1, 1]\n",
+    "losses = -np.log(nn_outputs[range(len(nn_outputs)), class_targets])\n",
+    "print(f\"Losses: {losses}\")\n",
+    "print(f\"Average Loss: {np.average(losses)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loss with One Hot Encoding\n",
+    "Classification typically has the expected output to be all zero except for the class the inputs belong too. This leads to simplfiying the cross entropy loss calculation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Losses: [0.35667494 0.69314718 0.10536052]\n",
+      "Average Loss: 0.38506088005216804\n"
+     ]
+    }
+   ],
+   "source": [
+    "true_output = np.array([\n",
+    "    [1, 0, 0],\n",
+    "    [0, 1, 0],\n",
+    "    [0, 1, 0]\n",
+    "])\n",
+    "\n",
+    "nn_output = np.array([\n",
+    "    [0.7, 0.2, 0.1],\n",
+    "    [0.1, 0.5, 0.4],\n",
+    "    [0.02, 0.9, 0.08]\n",
+    "])\n",
+    "\n",
+    "# Element by element multiplication \"erases\" the output terms corresponding with 0\n",
+    "A = true_output*nn_output\n",
+    "\n",
+    "# Sum the columns (ie, sum every element in row 0, then row 1, etc) because each row is a batch of output\n",
+    "B = np.sum(A, axis = 1)\n",
+    "\n",
+    "# Get the cross entropy loss\n",
+    "C = -np.log(B)\n",
+    "\n",
+    "print(f\"Losses: {C}\")\n",
+    "print(f\"Average Loss: {np.mean(C)}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Implementing the Loss Class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Base class for Loss functions\n",
+    "class Loss:\n",
+    "    '''Calculates the data and regularization losses given\n",
+    "    model output and ground truth values'''\n",
+    "    def calculate(self, output, y):\n",
+    "        sample_losses = self.forward(output, y)\n",
+    "        data_loss = np.average(sample_losses)\n",
+    "        return data_loss"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Implementing the Categorical Cross Entropy Loss Class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Loss_CategoricalCrossEntropy(Loss):\n",
+    "    def forward(self, y_pred, y_true):\n",
+    "        '''y_pred is the neural network output\n",
+    "        y_true is the ideal output of the neural network'''\n",
+    "        samples = len(y_pred)\n",
+    "        # Bound the predicted values \n",
+    "        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)\n",
+    "        \n",
+    "        if len(y_true.shape) == 1:     # Categorically labeled\n",
+    "            correct_confidences = y_pred_clipped[range(samples), y_true]\n",
+    "        elif len(y_true.shape) == 2:   # One hot encoded\n",
+    "            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)\n",
+    "\n",
+    "        # Calculate the losses\n",
+    "        negative_log_likelihoods = -np.log(correct_confidences)\n",
+    "        return negative_log_likelihoods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Losses: 0.38506088005216804\n",
+      "Average Loss: 0.38506088005216804\n"
+     ]
+    }
+   ],
+   "source": [
+    "nn_outputs = np.array([\n",
+    "    [0.7, 0.1, 0.2],\n",
+    "    [0.1, 0.5, 0.4],\n",
+    "    [0.02, 0.9, 0.08]])\n",
+    "class_targets = np.array([\n",
+    "    [1, 0, 0],\n",
+    "    [0, 1, 0],\n",
+    "    [0, 1, 0]])\n",
+    "\n",
+    "loss_function = Loss_CategoricalCrossEntropy()\n",
+    "losses = loss_function.calculate(nn_outputs, class_targets)\n",
+    "print(f\"Losses: {losses}\")\n",
+    "print(f\"Average Loss: {np.average(losses)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introducing Accuracy\n",
+    "In the simple example, if the highest value in the outputs align with the correct classification, then that accuracy is 1. Even if it was 51% red and 49% blue, and the true output is red, it would be considered fully accurate."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Losses: 0.38506088005216804\n",
+      "Average Loss: 0.38506088005216804\n",
+      "Accuracy: 1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "nn_outputs = np.array([\n",
+    "    [0.7, 0.1, 0.2],\n",
+    "    [0.1, 0.5, 0.4],\n",
+    "    [0.02, 0.9, 0.08]])\n",
+    "class_targets = np.array([\n",
+    "    [1, 0, 0],\n",
+    "    [0, 1, 0],\n",
+    "    [0, 1, 0]])\n",
+    "\n",
+    "# Calculate the losses\n",
+    "loss_function = Loss_CategoricalCrossEntropy()\n",
+    "losses = loss_function.calculate(nn_outputs, class_targets)\n",
+    "print(f\"Losses: {losses}\")\n",
+    "print(f\"Average Loss: {np.average(losses)}\")\n",
+    "\n",
+    "# Calculate the accuracy\n",
+    "predictions = np.argmax(nn_outputs, axis=1)\n",
+    "# If targets are one-hot encoded - convert them\n",
+    "if len(class_targets.shape) == 2:\n",
+    "    class_targets = np.argmax(class_targets, axis=1)\n",
+    "# True evaluates to 1; False to 0\n",
+    "accuracy = np.mean(predictions == class_targets)\n",
+    "print(f\"Accuracy: {accuracy}\")"
+   ]
  }
 ],
 "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
  "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,