From a2267e71c8acc051c3f12603e09b9f886efeb9b8 Mon Sep 17 00:00:00 2001 From: judsonupchurch Date: Sun, 19 Jan 2025 19:03:38 +0000 Subject: [PATCH] Lecture 21-22 --- lecture18_21/notes_18.ipynb | 216 +++++++++++++++++++++++++++++++++++- lecture22/handout_22.ipynb | 14 --- 2 files changed, 210 insertions(+), 20 deletions(-) diff --git a/lecture18_21/notes_18.ipynb b/lecture18_21/notes_18.ipynb index e32fb95..bd404d5 100644 --- a/lecture18_21/notes_18.ipynb +++ b/lecture18_21/notes_18.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -161,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -173,6 +173,7 @@ "\n", " def forward(self, inputs):\n", " # Calculate the output values from inputs, weights, and biases\n", + " self.inputs = inputs\n", " self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed\n", " \n", " def backward(self, dvalues):\n", @@ -185,6 +186,7 @@ "\n", "class Activation_ReLU:\n", " def forward(self, inputs):\n", + " self.inputs = inputs\n", " self.output = np.maximum(0, inputs)\n", " \n", " def backward(self, dvalues):\n", @@ -204,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -249,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -280,7 +282,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -305,6 +307,208 @@ "print('Gradients: combined loss and activation:')\n", "print(dvalues1)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Optimizer_SGD Class" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "class Optimizer_SGD():\n", + " def __init__(self, learning_rate=0.5):\n", + " self.learning_rate = learning_rate\n", + "\n", + " def update_params(self, layer):\n", + " layer.weights += -self.learning_rate * layer.dweights\n", + " layer.biases += -self.learning_rate * layer.dbiases" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Optimizer_SGD Class on Spiral Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch: 0, acc: 0.350, loss: 1.099\n", + "epoch: 100, acc: 0.457, loss: 1.090\n", + "epoch: 200, acc: 0.460, loss: 1.058\n", + "epoch: 300, acc: 0.460, loss: 1.055\n", + "epoch: 400, acc: 0.463, loss: 1.053\n", + "epoch: 500, acc: 0.463, loss: 1.052\n", + "epoch: 600, acc: 0.463, loss: 1.052\n", + "epoch: 700, acc: 0.460, loss: 1.052\n", + "epoch: 800, acc: 0.463, loss: 1.051\n", + "epoch: 900, acc: 0.450, loss: 1.051\n", + "epoch: 1000, acc: 0.447, loss: 1.051\n", + "epoch: 1100, acc: 0.450, loss: 1.050\n", + "epoch: 1200, acc: 0.453, loss: 1.049\n", + "epoch: 1300, acc: 0.453, loss: 1.048\n", + "epoch: 1400, acc: 0.453, loss: 1.046\n", + "epoch: 1500, acc: 0.457, loss: 1.044\n", + "epoch: 1600, acc: 0.457, loss: 1.040\n", + "epoch: 1700, acc: 0.463, loss: 1.036\n", + "epoch: 1800, acc: 0.457, loss: 1.030\n", + "epoch: 1900, acc: 0.467, loss: 1.025\n", + "epoch: 2000, acc: 0.477, loss: 1.019\n", + "epoch: 2100, acc: 0.500, loss: 1.013\n", + "epoch: 2200, acc: 0.513, loss: 1.007\n", + "epoch: 2300, acc: 0.523, loss: 0.999\n", + "epoch: 2400, acc: 0.533, loss: 0.990\n", + "epoch: 2500, acc: 0.540, loss: 0.981\n", + "epoch: 2600, acc: 0.537, loss: 0.973\n", + "epoch: 2700, acc: 0.543, loss: 0.964\n", + "epoch: 2800, acc: 0.537, loss: 0.957\n", + "epoch: 2900, acc: 0.547, loss: 0.947\n", + "epoch: 3000, acc: 0.553, loss: 0.938\n", + "epoch: 3100, acc: 0.507, loss: 0.937\n", + "epoch: 3200, acc: 0.523, loss: 0.950\n", + "epoch: 3300, acc: 0.523, loss: 0.931\n", + "epoch: 3400, acc: 0.537, loss: 0.950\n", + "epoch: 3500, acc: 0.480, loss: 0.929\n", + "epoch: 3600, acc: 0.490, loss: 0.925\n", + "epoch: 3700, acc: 0.540, loss: 0.932\n", + "epoch: 3800, acc: 0.530, loss: 0.916\n", + "epoch: 3900, acc: 0.507, loss: 0.932\n", + "epoch: 4000, acc: 0.487, loss: 0.939\n", + "epoch: 4100, acc: 0.590, loss: 0.950\n", + "epoch: 4200, acc: 0.530, loss: 0.932\n", + "epoch: 4300, acc: 0.523, loss: 0.921\n", + "epoch: 4400, acc: 0.450, loss: 0.907\n", + "epoch: 4500, acc: 0.487, loss: 0.932\n", + "epoch: 4600, acc: 0.507, loss: 0.906\n", + "epoch: 4700, acc: 0.493, loss: 0.908\n", + "epoch: 4800, acc: 0.487, loss: 0.911\n", + "epoch: 4900, acc: 0.547, loss: 0.908\n", + "epoch: 5000, acc: 0.520, loss: 0.913\n", + "epoch: 5100, acc: 0.537, loss: 0.909\n", + "epoch: 5200, acc: 0.560, loss: 0.907\n", + "epoch: 5300, acc: 0.553, loss: 0.914\n", + "epoch: 5400, acc: 0.580, loss: 0.893\n", + "epoch: 5500, acc: 0.573, loss: 0.927\n", + "epoch: 5600, acc: 0.573, loss: 0.910\n", + "epoch: 5700, acc: 0.523, loss: 0.886\n", + "epoch: 5800, acc: 0.593, loss: 0.894\n", + "epoch: 5900, acc: 0.530, loss: 0.904\n", + "epoch: 6000, acc: 0.540, loss: 0.876\n", + "epoch: 6100, acc: 0.510, loss: 0.900\n", + "epoch: 6200, acc: 0.567, loss: 0.881\n", + "epoch: 6300, acc: 0.583, loss: 0.920\n", + "epoch: 6400, acc: 0.573, loss: 0.896\n", + "epoch: 6500, acc: 0.550, loss: 0.865\n", + "epoch: 6600, acc: 0.500, loss: 0.899\n", + "epoch: 6700, acc: 0.590, loss: 0.874\n", + "epoch: 6800, acc: 0.590, loss: 0.936\n", + "epoch: 6900, acc: 0.547, loss: 0.894\n", + "epoch: 7000, acc: 0.570, loss: 0.890\n", + "epoch: 7100, acc: 0.560, loss: 0.876\n", + "epoch: 7200, acc: 0.583, loss: 0.866\n", + "epoch: 7300, acc: 0.550, loss: 0.876\n", + "epoch: 7400, acc: 0.527, loss: 0.874\n", + "epoch: 7500, acc: 0.533, loss: 0.854\n", + "epoch: 7600, acc: 0.540, loss: 0.847\n", + "epoch: 7700, acc: 0.540, loss: 0.842\n", + "epoch: 7800, acc: 0.540, loss: 0.841\n", + "epoch: 7900, acc: 0.553, loss: 0.882\n", + "epoch: 8000, acc: 0.577, loss: 0.961\n", + "epoch: 8100, acc: 0.500, loss: 0.914\n", + "epoch: 8200, acc: 0.510, loss: 0.872\n", + "epoch: 8300, acc: 0.513, loss: 0.869\n", + "epoch: 8400, acc: 0.597, loss: 0.898\n", + "epoch: 8500, acc: 0.553, loss: 0.927\n", + "epoch: 8600, acc: 0.520, loss: 0.884\n", + "epoch: 8700, acc: 0.533, loss: 0.852\n", + "epoch: 8800, acc: 0.540, loss: 0.844\n", + "epoch: 8900, acc: 0.540, loss: 0.840\n", + "epoch: 9000, acc: 0.537, loss: 0.840\n", + "epoch: 9100, acc: 0.543, loss: 0.851\n", + "epoch: 9200, acc: 0.560, loss: 0.880\n", + "epoch: 9300, acc: 0.590, loss: 0.914\n", + "epoch: 9400, acc: 0.610, loss: 0.926\n", + "epoch: 9500, acc: 0.533, loss: 0.932\n", + "epoch: 9600, acc: 0.503, loss: 0.915\n", + "epoch: 9700, acc: 0.503, loss: 0.894\n", + "epoch: 9800, acc: 0.510, loss: 0.882\n", + "epoch: 9900, acc: 0.513, loss: 0.873\n", + "epoch: 10000, acc: 0.513, loss: 0.872\n" + ] + } + ], + "source": [ + "# Create dataset\n", + "X, y = spiral_data(samples=100, classes=3)\n", + "\n", + "# Create Dense layer with 2 input features and 64 output values\n", + "dense1 = Layer_Dense(2, 64)\n", + "\n", + "# Create ReLU activation (to be used with Dense layer)\n", + "activation1 = Activation_ReLU()\n", + "\n", + "# Create second Dense layer with 64 input features (as we take output\n", + "# of previous layer here) and 3 output values (output values)\n", + "dense2 = Layer_Dense(64, 3)\n", + "\n", + "# Create Softmax classifier's combined loss and activation\n", + "loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n", + "\n", + "# Create optimizer\n", + "optimizer = Optimizer_SGD()\n", + "\n", + "# Train in loop\n", + "for epoch in range(10001):\n", + " # Perform a forward pass of our training data through this layer\n", + " dense1.forward(X)\n", + " \n", + " # Perform a forward pass through activation function\n", + " # takes the output of first dense layer here\n", + " activation1.forward(dense1.output)\n", + " \n", + " # Perform a forward pass through second Dense layer\n", + " # takes outputs of activation function of first layer as inputs\n", + " dense2.forward(activation1.output)\n", + " \n", + " # Perform a forward pass through the activation/loss function\n", + " # takes the output of second dense layer here and returns loss\n", + " loss = loss_activation.forward(dense2.output, y)\n", + " \n", + " # Calculate accuracy from output of activation2 and targets\n", + " # calculate values along first axis\n", + " predictions = np.argmax(loss_activation.output, axis=1)\n", + " if len(y.shape) == 2:\n", + " y = np.argmax(y, axis=1)\n", + " accuracy = np.mean(predictions == y)\n", + " \n", + " if not epoch % 100:\n", + " print(f'epoch: {epoch}, ' +\n", + " f'acc: {accuracy:.3f}, ' +\n", + " f'loss: {loss:.3f}')\n", + " \n", + " # Backward pass\n", + " loss_activation.backward(loss_activation.output, y)\n", + " dense2.backward(loss_activation.dinputs)\n", + " activation1.backward(dense2.dinputs)\n", + " dense1.backward(activation1.dinputs)\n", + " \n", + " # Update weights and biases\n", + " optimizer.update_params(dense1)\n", + " optimizer.update_params(dense2)\n" + ] } ], "metadata": { diff --git a/lecture22/handout_22.ipynb b/lecture22/handout_22.ipynb index 6b537ae..b90ff56 100644 --- a/lecture22/handout_22.ipynb +++ b/lecture22/handout_22.ipynb @@ -2980,20 +2980,6 @@ " optimizer.update_params(dense1)\n", " optimizer.update_params(dense2)\n" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {