Lectures 18-20

2025-01-19 18:55:33 +00:00 · 2025-01-19 18:55:33 +00:00 · 748500dc53
commit 748500dc53
parent 0d4ea9b80f
4 changed files with 157 additions and 9 deletions
--- a/README.md
+++ b/README.md
@ -14,9 +14,7 @@ Lecture 12 uses same handout.
 Lectures 13-17 use same handout.
-Lecture 18 uses same handout.
+Lectures 18-21 use same handout.
 Lectures 19-21 use same handout.
 Lecture 22 uses same handout.
--- a/lecture18/handout_18.ipynb
+++ b/lecture18/handout_18.ipynb
--- a/lecture18_21/handout_18.ipynb
+++ b/lecture18_21/handout_18.ipynb
--- a/lecture18_21/notes_18.ipynb
+++ b/lecture18_21/notes_18.ipynb
@ -9,7 +9,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@ -23,7 +23,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@ -40,9 +40,6 @@
    "class Activation_ReLU:\n",
    "    def forward(self, inputs):\n",
    "        self.output = np.maximum(0, inputs)\n",
    "    \n",
    "    def derivative(self, inputs):\n",
    "        return np.where(inputs > 0, 1, 0)\n",
    "        \n",
    "class Activation_Softmax:\n",
    "    def forward(self, inputs):\n",
@ -154,6 +151,160 @@
    "## Note With Layer_Dense class\n",
    "The Layer_Dense class has the weights stored in the transposed fashion for forward propagation. Therefore, the weight matrix must be transposed for the backpropagation."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Adding the Backward Propagation Method to Layer_Dense and ReLU Activation Classes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Layer_Dense:\n",
    "    def __init__(self, n_inputs, n_neurons):\n",
    "        # Initialize the weights and biases\n",
    "        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)  # Normal distribution of weights\n",
    "        self.biases = np.zeros((1, n_neurons))\n",
    "\n",
    "    def forward(self, inputs):\n",
    "        # Calculate the output values from inputs, weights, and biases\n",
    "        self.output = np.dot(inputs, self.weights) + self.biases        # Weights are already transposed\n",
    "    \n",
    "    def backward(self, dvalues):\n",
    "        '''Calculated the gradient of the loss with respect to the weights and biases of this layer.\n",
    "        dvalues is equiavelent to a transposed dl_dZ. It is the gradient \n",
    "        of the loss with respect to the outputs of this layer.'''\n",
    "        self.dweights = np.dot(self.inputs.T, dvalues)\n",
    "        self.dbiases = np.sum(dvalues, axis=0, keepdims=0)\n",
    "        self.dinputs = np.dot(dvalues, self.weights.T)\n",
    "\n",
    "class Activation_ReLU:\n",
    "    def forward(self, inputs):\n",
    "        self.output = np.maximum(0, inputs)\n",
    "    \n",
    "    def backward(self, dvalues):\n",
    "        '''Calculated the gradient of the loss with respect to this layer's activation function\n",
    "        dvalues is equiavelent to a transposed dl_dZ. It is the gradient \n",
    "        of the loss with respect to the outputs of this layer.'''\n",
    "        self.dinputs = dvalues.copy()\n",
    "        self.dinputs[self.inputs <= 0] = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Adding the Backward Propagation Method to the Loss_CategoricalCrossEntropy Class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Loss_CategoricalCrossEntropy(Loss):\n",
    "    def forward(self, y_pred, y_true):\n",
    "        '''y_pred is the neural network output\n",
    "        y_true is the ideal output of the neural network'''\n",
    "        samples = len(y_pred)\n",
    "        # Bound the predicted values \n",
    "        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)\n",
    "        \n",
    "        if len(y_true.shape) == 1:     # Categorically labeled\n",
    "            correct_confidences = y_pred_clipped[range(samples), y_true]\n",
    "        elif len(y_true.shape) == 2:   # One hot encoded\n",
    "            correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)\n",
    "\n",
    "        # Calculate the losses\n",
    "        negative_log_likelihoods = -np.log(correct_confidences)\n",
    "        return negative_log_likelihoods\n",
    "    \n",
    "    def backward(self, dvalues, y_true):\n",
    "        samples = len(dvalues)\n",
    "\n",
    "        # Number of lables in each sample\n",
    "        labels = len(dvalues[0])\n",
    "\n",
    "        # if the labels are sparse, turn them into a one-hot vector\n",
    "        if len(y_true.shape) == 1:\n",
    "            y_true = np.eye(labels)[y_true]\n",
    "\n",
    "        # Calculate the gradient then normalize\n",
    "        self.dinputs = -y_true / dvalues\n",
    "        self.dinputs = self.dinputs / samples"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Combined Softmax Activation and Cross Entropy Loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Activation_Softmax_Loss_CategoricalCrossentropy():\n",
    "    def __init__(self):\n",
    "        self.activation = Activation_Softmax()\n",
    "        self.loss = Loss_CategoricalCrossEntropy()\n",
    "\n",
    "    def forward(self, inputs, y_true):\n",
    "        self.activation.forward(inputs)\n",
    "        self.output = self.activation.output\n",
    "        return self.loss.calculate(self.output, y_true)\n",
    "    \n",
    "    def backward(self, dvalues, y_true):\n",
    "        samples = len(dvalues)\n",
    "\n",
    "        # if the samples are one-hot encoded, turn them into discrete values\n",
    "        if len(y_true.shape) == 2:\n",
    "            y_true = np.argmax(y_true, axis=1)\n",
    "            \n",
    "        # Copy so we can safely modify\n",
    "        self.dinputs = dvalues.copy()\n",
    "        \n",
    "        # Calculate and normalize gradient \n",
    "        self.dinputs[range(samples), y_true] -= 1\n",
    "        self.dinputs = self.dinputs / samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Gradients: combined loss and activation:\n",
      "[[-0.1         0.03333333  0.06666667]\n",
      " [ 0.03333333 -0.16666667  0.13333333]\n",
      " [ 0.00666667 -0.03333333  0.02666667]]\n"
     ]
    }
   ],
   "source": [
    "softmax_outputs = np.array([[0.7, 0.1, 0.2],\n",
    " [0.1, 0.5, 0.4],\n",
    " [0.02, 0.9, 0.08]])\n",
    "class_targets = np.array([0, 1, 1])\n",
    "softmax_loss = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
    "softmax_loss.backward(softmax_outputs, class_targets)\n",
    "dvalues1 = softmax_loss.dinputs\n",
    "print('Gradients: combined loss and activation:')\n",
    "print(dvalues1)"
   ]
  }
 ],
 "metadata": {