diff --git a/lecture23_24/notes_23.ipynb b/lecture23_24/notes_23.ipynb new file mode 100644 index 0000000..b52bb9e --- /dev/null +++ b/lecture23_24/notes_23.ipynb @@ -0,0 +1,1365 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Previous Class Definitions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import nnfs\n", + "from nnfs.datasets import spiral_data, vertical_data\n", + "nnfs.init()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "class Layer_Dense:\n", + " def __init__(self, n_inputs, n_neurons):\n", + " # Initialize the weights and biases\n", + " self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights\n", + " self.biases = np.zeros((1, n_neurons))\n", + "\n", + " def forward(self, inputs):\n", + " # Calculate the output values from inputs, weights, and biases\n", + " self.inputs = inputs\n", + " self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed\n", + " \n", + " def backward(self, dvalues):\n", + " '''Calculated the gradient of the loss with respect to the weights and biases of this layer.\n", + " dvalues is equiavelent to a transposed dl_dZ. It is the gradient \n", + " of the loss with respect to the outputs of this layer.'''\n", + " self.dweights = np.dot(self.inputs.T, dvalues)\n", + " self.dbiases = np.sum(dvalues, axis=0, keepdims=0)\n", + " self.dinputs = np.dot(dvalues, self.weights.T)\n", + "\n", + "class Activation_ReLU:\n", + " def forward(self, inputs):\n", + " self.inputs = inputs\n", + " self.output = np.maximum(0, inputs)\n", + " \n", + " def backward(self, dvalues):\n", + " '''Calculated the gradient of the loss with respect to this layer's activation function\n", + " dvalues is equiavelent to a transposed dl_dZ. It is the gradient \n", + " of the loss with respect to the outputs of this layer.'''\n", + " self.dinputs = dvalues.copy()\n", + " self.dinputs[self.inputs <= 0] = 0\n", + " \n", + "class Activation_Softmax:\n", + " def forward(self, inputs):\n", + " # Get the unnormalized probabilities\n", + " # Subtract max from the row to prevent larger numbers\n", + " exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))\n", + "\n", + " # Normalize the probabilities with element wise division\n", + " probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)\n", + " self.output = probabilities\n", + " \n", + "# Base class for Loss functions\n", + "class Loss:\n", + " '''Calculates the data and regularization losses given\n", + " model output and ground truth values'''\n", + " def calculate(self, output, y):\n", + " sample_losses = self.forward(output, y)\n", + " data_loss = np.average(sample_losses)\n", + " return data_loss\n", + "\n", + "class Loss_CategoricalCrossEntropy(Loss):\n", + " def forward(self, y_pred, y_true):\n", + " '''y_pred is the neural network output\n", + " y_true is the ideal output of the neural network'''\n", + " samples = len(y_pred)\n", + " # Bound the predicted values \n", + " y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)\n", + " \n", + " if len(y_true.shape) == 1: # Categorically labeled\n", + " correct_confidences = y_pred_clipped[range(samples), y_true]\n", + " elif len(y_true.shape) == 2: # One hot encoded\n", + " correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)\n", + "\n", + " # Calculate the losses\n", + " negative_log_likelihoods = -np.log(correct_confidences)\n", + " return negative_log_likelihoods\n", + " \n", + " def backward(self, dvalues, y_true):\n", + " samples = len(dvalues)\n", + "\n", + " # Number of lables in each sample\n", + " labels = len(dvalues[0])\n", + "\n", + " # if the labels are sparse, turn them into a one-hot vector\n", + " if len(y_true.shape) == 1:\n", + " y_true = np.eye(labels)[y_true]\n", + "\n", + " # Calculate the gradient then normalize\n", + " self.dinputs = -y_true / dvalues\n", + " self.dinputs = self.dinputs / samples\n", + "\n", + "class Activation_Softmax_Loss_CategoricalCrossentropy():\n", + " def __init__(self):\n", + " self.activation = Activation_Softmax()\n", + " self.loss = Loss_CategoricalCrossEntropy()\n", + "\n", + " def forward(self, inputs, y_true):\n", + " self.activation.forward(inputs)\n", + " self.output = self.activation.output\n", + " return self.loss.calculate(self.output, y_true)\n", + " \n", + " def backward(self, dvalues, y_true):\n", + " samples = len(dvalues)\n", + "\n", + " # if the samples are one-hot encoded, turn them into discrete values\n", + " if len(y_true.shape) == 2:\n", + " y_true = np.argmax(y_true, axis=1)\n", + " \n", + " # Copy so we can safely modify\n", + " self.dinputs = dvalues.copy()\n", + " \n", + " # Calculate and normalize gradient \n", + " self.dinputs[range(samples), y_true] -= 1\n", + " self.dinputs = self.dinputs / samples\n", + "\n", + "class Optimizer_SGD():\n", + " def __init__(self, learning_rate=0.5):\n", + " self.learning_rate = learning_rate\n", + "\n", + " def update_params(self, layer):\n", + " layer.weights += -self.learning_rate * layer.dweights\n", + " layer.biases += -self.learning_rate * layer.dbiases" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Previous Notes and Notation\n", + "The previous notation is clunky and long. From here forward, we will use the following notation for a layer with $n$ inputs and $i$ neurons. The neruon layer has is followed by an activation layer and then fed into a final value $y$ with a computed loss $l$. There can be $j$ batches of data.\n", + "\n", + "$\\vec{X_j} = \\begin{bmatrix} x_{1j} & x_{2j} & \\cdots & x_{nj} \\end{bmatrix}$ -> Row vector for the layer inputs for the $j$ batch of data.\n", + "\n", + "$\\overline{\\overline{W}} = \\begin{bmatrix} \\vec{w_{1}} \\\\ \\vec{w_{2}} \\\\ \\vdots \\\\ \\vec{w_{i}} \\end{bmatrix} = \\begin{bmatrix} w_{11} & w_{12} & \\cdots & w_{1n} \\\\ w_{21} & w_{22} & \\cdots & w_{2n} \\\\ \\vdots & \\vdots & \\ddots & \\vdots \\\\ w_{i1} & w_{i2} & \\cdots & w_{in}\\end{bmatrix}$ -> Matrix of weight values. Each row is a neuron's weights and each column is the weights for a given input.\n", + "\n", + "$\\vec{B} = \\begin{bmatrix} b_1 & b_2 & \\cdots & b_i \\end{bmatrix}$ -> Row vector for the neuron biases\n", + "\n", + "$\\vec{Z_j} = \\begin{bmatrix} z_{1j} & z_{2j} & \\cdots & z_{ij} \\end{bmatrix}$ -> Row vector for the neuron outputs for the $j$ batch of data.\n", + "\n", + "$\\vec{A_j} = \\begin{bmatrix} a_{1j} & a_{2j} & \\cdots & a_{ij} \\end{bmatrix}$ -> Row vector for the activation later outputs for the $j$ batch of data.\n", + "\n", + "$y_j$ -> Final layer output for the $j$ batch of data if the layer is the final layer (could be summation, probability, etc).\n", + "\n", + "$l_j$ -> Loss for the $j$ batch of data.\n", + "\n", + "The $j$ is often dropped because we typically only need to think with 1 set of input data.\n", + "\n", + "## Gradient Descent Using New Notation\n", + "We will look at the weight that the $i$ neuron applies for the $n$ input.\n", + "\n", + "$\\frac{\\delta l}{\\delta w_{in}} = \\frac{\\delta l}{\\delta y} \\frac{\\delta y}{\\delta a_i} \\frac{\\delta a_i}{\\delta z_i} \\frac{\\delta z_i}{\\delta w_{in}}$\n", + "\n", + "Similarly, for the bias of the $i$ neuron, there is\n", + "\n", + "$\\frac{\\delta l}{\\delta b_{i}} = \\frac{\\delta l}{\\delta y} \\frac{\\delta y}{\\delta a_i} \\frac{\\delta a_i}{\\delta z_i} \\frac{\\delta z_i}{\\delta b_{i}}$\n", + "\n", + "For the system we are using, where $l = (y-0)^2$ and the activation layer is ReLU, we have\n", + "\n", + "$\\frac{\\delta l}{\\delta y} = 2y$\n", + "\n", + "$\\frac{\\delta y}{\\delta a_i} = 1$\n", + "\n", + "$\\frac{\\delta a_i}{\\delta z_i} = 1$ if $z_i > 0$ else $0$\n", + "\n", + "$\\frac{\\delta z_i}{\\delta w_{in}} = x_n$\n", + "\n", + "$\\frac{\\delta z_i}{\\delta b_{i}} = 1$\n", + "\n", + "## Matrix Representation of Gradient Descent\n", + "We can simplify by seeing that $\\frac{\\delta l}{\\delta y} \\frac{\\delta y}{\\delta a_i} \\frac{\\delta a_i}{\\delta z_i} = \\frac{\\delta l}{\\delta z_i}$ is a common term.\n", + "\n", + "We take $\\frac{\\delta l}{\\delta z_i}$ and turn it into a 1 x $i$ vector that such that \n", + "\n", + "$\\frac{\\delta l}{\\delta \\vec{Z}} = \\begin{bmatrix} \\frac{\\delta l}{\\delta z_1} & \\frac{\\delta l}{\\delta z_2} & \\cdots & \\frac{\\delta l}{\\delta z_i} \\end{bmatrix}$\n", + "\n", + "We than can get that the gradient matrix for all weights is a $i$ x $n$ matrix given by \n", + "\n", + "$\\frac{\\delta l}{\\delta \\overline{\\overline{W}}} = \\begin{bmatrix} \\frac{\\delta l}{\\delta w_{11}} & \\frac{\\delta l}{\\delta w_{12}} & \\cdots & \\frac{\\delta l}{\\delta w_{1n}} \\\\ \\frac{\\delta l}{\\delta w_{21}} & w\\frac{\\delta l}{\\delta w_{22}} & \\cdots & \\frac{\\delta l}{\\delta w_{2n}} \\\\ \\vdots & \\vdots & \\ddots & \\vdots \\\\ \\frac{\\delta l}{\\delta w_{i1}} & \\frac{\\delta l}{\\delta w_{i2}} & \\cdots & \\frac{\\delta l}{\\delta w_{in}} \\end{bmatrix} = \\begin{bmatrix} \\frac{\\delta l}{\\delta z_1} \\\\ \\frac{\\delta l}{\\delta z_2} \\\\ \\vdots \\\\ \\frac{\\delta l}{\\delta z_n} \\end{bmatrix} \\begin{bmatrix} \\frac{\\delta z_1}{\\delta w_{i1}} & \\frac{\\delta z_1}{\\delta w_{i1}} & \\cdots & \\frac{\\delta z_1}{\\delta w_{in}} \\end{bmatrix} = \\begin{bmatrix} \\frac{\\delta l}{\\delta z_1} \\\\ \\frac{\\delta l}{\\delta z_2} \\\\ \\vdots \\\\ \\frac{\\delta l}{\\delta z_n} \\end{bmatrix} \\begin{bmatrix} x_1 & x_2 & \\cdots & x_n \\end{bmatrix}$\n", + "\n", + "Similarly, the gradient vector for the biases is given by\n", + "$\\frac{\\delta l}{\\delta \\vec{B}} = \\frac{\\delta l}{\\delta \\vec{Z}} \\frac{\\delta \\vec{Z}}{\\delta \\vec{B}} = \\vec{1} \\begin{bmatrix} \\frac{\\delta l}{\\delta z_1} & \\frac{\\delta l}{\\delta z_2} & \\cdots & \\frac{\\delta l}{\\delta z_i} \\end{bmatrix}$\n", + "\n", + "## Gradients of the Loss with Respect to Inputs\n", + "When chaining multiple layers together, we will need the partial derivatives of the loss with respect to the next layers input (ie, the output of the current layer). This involves extra summation because the output of 1 layer is fed into every neuron of the next layer, so the total loss must be found.\n", + "\n", + "The gradient of the loss with respect to the $n$ input fed into $i$ neurons is\n", + "\n", + "$\\frac{\\delta l}{\\delta x_n} = \\frac{\\delta l}{\\delta z_1} \\frac{\\delta z_1}{\\delta x_n} + \\frac{\\delta l}{\\delta z_2} \\frac{\\delta z_2}{\\delta x_n} + ... + \\frac{\\delta l}{\\delta z_i} \\frac{\\delta z_i}{\\delta x_n}$\n", + "\n", + "\n", + "Noting that $\\frac{\\delta z_i}{\\delta x_n} = w_{in}$ allows us to have\n", + "\n", + "$\\frac{\\delta l}{\\delta \\vec{X}} = \\begin{bmatrix} \\frac{\\delta l}{\\delta x_1} & \\frac{\\delta l}{\\delta x_2} & \\cdots & \\frac{\\delta l}{\\delta x_n} \\end{bmatrix} = \\begin{bmatrix} \\frac{\\delta l}{\\delta z_1} & \\frac{\\delta l}{\\delta z_2} & \\cdots & \\frac{\\delta l}{\\delta z_n} \\end{bmatrix} \\begin{bmatrix} w_{11} & w_{12} & \\cdots & w_{1n} \\\\ w_{21} & w_{22} & \\cdots & w_{2n} \\\\ \\vdots & \\vdots & \\ddots & \\vdots \\\\ w_{i1} & w_{i2} & \\cdots & w_{in} \\end{bmatrix}$\n", + "\n", + "## Note With Layer_Dense class\n", + "The Layer_Dense class has the weights stored in the transposed fashion for forward propagation. Therefore, the weight matrix must be transposed for the backpropagation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Learning Rate Decay\n", + "Start with a larger learning rate and gradually decrease it.\n", + "\n", + "$\\alpha = \\frac{\\alpha_0}{1+decay*t}$" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "class Optimizer_SGD():\n", + " def __init__(self, learning_rate=0.5, decay=0.0):\n", + " self.initial_rate = learning_rate\n", + " self.current_learning_rate = self.initial_rate\n", + " self.decay = decay\n", + " self.iterations = 0\n", + "\n", + " def pre_update_params(self):\n", + " # Update the current_learning_rate before updating params\n", + " if self.decay:\n", + " self.current_learning_rate = self.initial_rate / (1 + self.decay * self.iterations)\n", + "\n", + " def update_params(self, layer):\n", + " layer.weights += -self.current_learning_rate * layer.dweights\n", + " layer.biases += -self.current_learning_rate * layer.dbiases\n", + "\n", + " def post_update_params(self):\n", + " # Update the self.iterations for use with decay\n", + " self.iterations += 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing the Learning Rate Decay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch: 0, acc: 0.353, loss: 1.099\n", + "epoch: 100, acc: 0.467, loss: 1.079\n", + "epoch: 200, acc: 0.450, loss: 1.067\n", + "epoch: 300, acc: 0.447, loss: 1.065\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch: 400, acc: 0.433, loss: 1.064\n", + "epoch: 500, acc: 0.430, loss: 1.062\n", + "epoch: 600, acc: 0.423, loss: 1.061\n", + "epoch: 700, acc: 0.433, loss: 1.059\n", + "epoch: 800, acc: 0.453, loss: 1.056\n", + "epoch: 900, acc: 0.467, loss: 1.054\n", + "epoch: 1000, acc: 0.467, loss: 1.051\n", + "epoch: 1100, acc: 0.463, loss: 1.048\n", + "epoch: 1200, acc: 0.463, loss: 1.045\n", + "epoch: 1300, acc: 0.467, loss: 1.041\n", + "epoch: 1400, acc: 0.470, loss: 1.037\n", + "epoch: 1500, acc: 0.480, loss: 1.032\n", + "epoch: 1600, acc: 0.480, loss: 1.026\n", + "epoch: 1700, acc: 0.490, loss: 1.020\n", + "epoch: 1800, acc: 0.493, loss: 1.014\n", + "epoch: 1900, acc: 0.493, loss: 1.006\n", + "epoch: 2000, acc: 0.507, loss: 0.998\n", + "epoch: 2100, acc: 0.510, loss: 0.989\n", + "epoch: 2200, acc: 0.527, loss: 0.980\n", + "epoch: 2300, acc: 0.540, loss: 0.971\n", + "epoch: 2400, acc: 0.433, loss: 0.976\n", + "epoch: 2500, acc: 0.430, loss: 0.973\n", + "epoch: 2600, acc: 0.430, loss: 0.968\n", + "epoch: 2700, acc: 0.433, loss: 0.964\n", + "epoch: 2800, acc: 0.433, loss: 0.958\n", + "epoch: 2900, acc: 0.440, loss: 0.953\n", + "epoch: 3000, acc: 0.443, loss: 0.948\n", + "epoch: 3100, acc: 0.447, loss: 0.943\n", + "epoch: 3200, acc: 0.457, loss: 0.939\n", + "epoch: 3300, acc: 0.467, loss: 0.935\n", + "epoch: 3400, acc: 0.470, loss: 0.930\n", + "epoch: 3500, acc: 0.477, loss: 0.927\n", + "epoch: 3600, acc: 0.487, loss: 0.923\n", + "epoch: 3700, acc: 0.483, loss: 0.919\n", + "epoch: 3800, acc: 0.480, loss: 0.915\n", + "epoch: 3900, acc: 0.480, loss: 0.911\n", + "epoch: 4000, acc: 0.487, loss: 0.907\n", + "epoch: 4100, acc: 0.503, loss: 0.904\n", + "epoch: 4200, acc: 0.513, loss: 0.900\n", + "epoch: 4300, acc: 0.517, loss: 0.896\n", + "epoch: 4400, acc: 0.523, loss: 0.893\n", + "epoch: 4500, acc: 0.533, loss: 0.889\n", + "epoch: 4600, acc: 0.533, loss: 0.886\n", + "epoch: 4700, acc: 0.537, loss: 0.882\n", + "epoch: 4800, acc: 0.537, loss: 0.878\n", + "epoch: 4900, acc: 0.537, loss: 0.875\n", + "epoch: 5000, acc: 0.547, loss: 0.871\n", + "epoch: 5100, acc: 0.543, loss: 0.868\n", + "epoch: 5200, acc: 0.547, loss: 0.865\n", + "epoch: 5300, acc: 0.550, loss: 0.861\n", + "epoch: 5400, acc: 0.553, loss: 0.857\n", + "epoch: 5500, acc: 0.557, loss: 0.854\n", + "epoch: 5600, acc: 0.567, loss: 0.850\n", + "epoch: 5700, acc: 0.563, loss: 0.846\n", + "epoch: 5800, acc: 0.567, loss: 0.843\n", + "epoch: 5900, acc: 0.567, loss: 0.840\n", + "epoch: 6000, acc: 0.567, loss: 0.837\n", + "epoch: 6100, acc: 0.583, loss: 0.833\n", + "epoch: 6200, acc: 0.587, loss: 0.830\n", + "epoch: 6300, acc: 0.593, loss: 0.827\n", + "epoch: 6400, acc: 0.600, loss: 0.824\n", + "epoch: 6500, acc: 0.600, loss: 0.821\n", + "epoch: 6600, acc: 0.597, loss: 0.818\n", + "epoch: 6700, acc: 0.597, loss: 0.815\n", + "epoch: 6800, acc: 0.597, loss: 0.812\n", + "epoch: 6900, acc: 0.597, loss: 0.809\n", + "epoch: 7000, acc: 0.603, loss: 0.805\n", + "epoch: 7100, acc: 0.603, loss: 0.803\n", + "epoch: 7200, acc: 0.610, loss: 0.800\n", + "epoch: 7300, acc: 0.607, loss: 0.797\n", + "epoch: 7400, acc: 0.613, loss: 0.794\n", + "epoch: 7500, acc: 0.617, loss: 0.792\n", + "epoch: 7600, acc: 0.620, loss: 0.788\n", + "epoch: 7700, acc: 0.620, loss: 0.785\n", + "epoch: 7800, acc: 0.620, loss: 0.783\n", + "epoch: 7900, acc: 0.627, loss: 0.780\n", + "epoch: 8000, acc: 0.627, loss: 0.779\n", + "epoch: 8100, acc: 0.627, loss: 0.775\n", + "epoch: 8200, acc: 0.627, loss: 0.772\n", + "epoch: 8300, acc: 0.630, loss: 0.769\n", + "epoch: 8400, acc: 0.630, loss: 0.765\n", + "epoch: 8500, acc: 0.633, loss: 0.762\n", + "epoch: 8600, acc: 0.630, loss: 0.759\n", + "epoch: 8700, acc: 0.643, loss: 0.755\n", + "epoch: 8800, acc: 0.647, loss: 0.751\n", + "epoch: 8900, acc: 0.647, loss: 0.748\n", + "epoch: 9000, acc: 0.647, loss: 0.743\n", + "epoch: 9100, acc: 0.647, loss: 0.740\n", + "epoch: 9200, acc: 0.647, loss: 0.736\n", + "epoch: 9300, acc: 0.650, loss: 0.732\n", + "epoch: 9400, acc: 0.650, loss: 0.729\n", + "epoch: 9500, acc: 0.647, loss: 0.725\n", + "epoch: 9600, acc: 0.650, loss: 0.722\n", + "epoch: 9700, acc: 0.653, loss: 0.718\n", + "epoch: 9800, acc: 0.657, loss: 0.714\n", + "epoch: 9900, acc: 0.660, loss: 0.711\n", + "epoch: 10000, acc: 0.660, loss: 0.708\n", + "epoch: 10100, acc: 0.663, loss: 0.705\n", + "epoch: 10200, acc: 0.670, loss: 0.701\n", + "epoch: 10300, acc: 0.677, loss: 0.698\n", + "epoch: 10400, acc: 0.680, loss: 0.695\n", + "epoch: 10500, acc: 0.683, loss: 0.692\n", + "epoch: 10600, acc: 0.683, loss: 0.689\n", + "epoch: 10700, acc: 0.680, loss: 0.686\n", + "epoch: 10800, acc: 0.690, loss: 0.683\n", + "epoch: 10900, acc: 0.690, loss: 0.680\n", + "epoch: 11000, acc: 0.690, loss: 0.677\n", + "epoch: 11100, acc: 0.697, loss: 0.674\n", + "epoch: 11200, acc: 0.707, loss: 0.671\n", + "epoch: 11300, acc: 0.707, loss: 0.668\n", + "epoch: 11400, acc: 0.713, loss: 0.665\n", + "epoch: 11500, acc: 0.713, loss: 0.662\n", + "epoch: 11600, acc: 0.717, loss: 0.658\n", + "epoch: 11700, acc: 0.720, loss: 0.655\n", + "epoch: 11800, acc: 0.720, loss: 0.654\n", + "epoch: 11900, acc: 0.727, loss: 0.650\n", + "epoch: 12000, acc: 0.733, loss: 0.647\n", + "epoch: 12100, acc: 0.733, loss: 0.644\n", + "epoch: 12200, acc: 0.733, loss: 0.641\n", + "epoch: 12300, acc: 0.733, loss: 0.639\n", + "epoch: 12400, acc: 0.733, loss: 0.637\n", + "epoch: 12500, acc: 0.727, loss: 0.634\n", + "epoch: 12600, acc: 0.723, loss: 0.630\n", + "epoch: 12700, acc: 0.733, loss: 0.627\n", + "epoch: 12800, acc: 0.733, loss: 0.626\n", + "epoch: 12900, acc: 0.737, loss: 0.622\n", + "epoch: 13000, acc: 0.737, loss: 0.619\n", + "epoch: 13100, acc: 0.740, loss: 0.617\n", + "epoch: 13200, acc: 0.743, loss: 0.614\n", + "epoch: 13300, acc: 0.743, loss: 0.611\n", + "epoch: 13400, acc: 0.740, loss: 0.608\n", + "epoch: 13500, acc: 0.740, loss: 0.606\n", + "epoch: 13600, acc: 0.737, loss: 0.604\n", + "epoch: 13700, acc: 0.743, loss: 0.602\n", + "epoch: 13800, acc: 0.747, loss: 0.600\n", + "epoch: 13900, acc: 0.747, loss: 0.598\n", + "epoch: 14000, acc: 0.757, loss: 0.596\n", + "epoch: 14100, acc: 0.753, loss: 0.594\n", + "epoch: 14200, acc: 0.760, loss: 0.592\n", + "epoch: 14300, acc: 0.760, loss: 0.590\n", + "epoch: 14400, acc: 0.757, loss: 0.589\n", + "epoch: 14500, acc: 0.763, loss: 0.587\n", + "epoch: 14600, acc: 0.763, loss: 0.585\n", + "epoch: 14700, acc: 0.773, loss: 0.584\n", + "epoch: 14800, acc: 0.777, loss: 0.582\n", + "epoch: 14900, acc: 0.777, loss: 0.580\n", + "epoch: 15000, acc: 0.783, loss: 0.579\n", + "epoch: 15100, acc: 0.783, loss: 0.577\n", + "epoch: 15200, acc: 0.783, loss: 0.576\n", + "epoch: 15300, acc: 0.790, loss: 0.575\n", + "epoch: 15400, acc: 0.787, loss: 0.573\n", + "epoch: 15500, acc: 0.790, loss: 0.572\n", + "epoch: 15600, acc: 0.790, loss: 0.570\n", + "epoch: 15700, acc: 0.790, loss: 0.569\n", + "epoch: 15800, acc: 0.793, loss: 0.568\n", + "epoch: 15900, acc: 0.793, loss: 0.566\n", + "epoch: 16000, acc: 0.793, loss: 0.565\n", + "epoch: 16100, acc: 0.793, loss: 0.564\n", + "epoch: 16200, acc: 0.790, loss: 0.562\n", + "epoch: 16300, acc: 0.790, loss: 0.561\n", + "epoch: 16400, acc: 0.790, loss: 0.560\n", + "epoch: 16500, acc: 0.790, loss: 0.559\n", + "epoch: 16600, acc: 0.793, loss: 0.558\n", + "epoch: 16700, acc: 0.793, loss: 0.557\n", + "epoch: 16800, acc: 0.793, loss: 0.555\n", + "epoch: 16900, acc: 0.797, loss: 0.554\n", + "epoch: 17000, acc: 0.797, loss: 0.553\n", + "epoch: 17100, acc: 0.797, loss: 0.552\n", + "epoch: 17200, acc: 0.797, loss: 0.551\n", + "epoch: 17300, acc: 0.797, loss: 0.550\n", + "epoch: 17400, acc: 0.797, loss: 0.549\n", + "epoch: 17500, acc: 0.797, loss: 0.548\n", + "epoch: 17600, acc: 0.797, loss: 0.547\n", + "epoch: 17700, acc: 0.797, loss: 0.546\n", + "epoch: 17800, acc: 0.797, loss: 0.545\n", + "epoch: 17900, acc: 0.793, loss: 0.544\n", + "epoch: 18000, acc: 0.790, loss: 0.543\n", + "epoch: 18100, acc: 0.793, loss: 0.542\n", + "epoch: 18200, acc: 0.793, loss: 0.542\n", + "epoch: 18300, acc: 0.793, loss: 0.541\n", + "epoch: 18400, acc: 0.793, loss: 0.540\n", + "epoch: 18500, acc: 0.793, loss: 0.539\n", + "epoch: 18600, acc: 0.793, loss: 0.538\n", + "epoch: 18700, acc: 0.790, loss: 0.537\n", + "epoch: 18800, acc: 0.793, loss: 0.536\n", + "epoch: 18900, acc: 0.793, loss: 0.535\n", + "epoch: 19000, acc: 0.793, loss: 0.535\n", + "epoch: 19100, acc: 0.793, loss: 0.534\n", + "epoch: 19200, acc: 0.793, loss: 0.533\n", + "epoch: 19300, acc: 0.793, loss: 0.532\n", + "epoch: 19400, acc: 0.793, loss: 0.531\n", + "epoch: 19500, acc: 0.793, loss: 0.531\n", + "epoch: 19600, acc: 0.793, loss: 0.530\n", + "epoch: 19700, acc: 0.793, loss: 0.529\n", + "epoch: 19800, acc: 0.793, loss: 0.528\n", + "epoch: 19900, acc: 0.793, loss: 0.528\n", + "epoch: 20000, acc: 0.793, loss: 0.527\n", + "epoch: 20100, acc: 0.797, loss: 0.526\n", + "epoch: 20200, acc: 0.797, loss: 0.525\n", + "epoch: 20300, acc: 0.797, loss: 0.525\n", + "epoch: 20400, acc: 0.797, loss: 0.524\n", + "epoch: 20500, acc: 0.797, loss: 0.523\n", + "epoch: 20600, acc: 0.797, loss: 0.523\n", + "epoch: 20700, acc: 0.797, loss: 0.522\n", + "epoch: 20800, acc: 0.800, loss: 0.521\n", + "epoch: 20900, acc: 0.800, loss: 0.521\n", + "epoch: 21000, acc: 0.800, loss: 0.520\n", + "epoch: 21100, acc: 0.800, loss: 0.519\n", + "epoch: 21200, acc: 0.800, loss: 0.519\n", + "epoch: 21300, acc: 0.800, loss: 0.518\n", + "epoch: 21400, acc: 0.800, loss: 0.518\n", + "epoch: 21500, acc: 0.800, loss: 0.517\n", + "epoch: 21600, acc: 0.800, loss: 0.516\n", + "epoch: 21700, acc: 0.800, loss: 0.516\n", + "epoch: 21800, acc: 0.800, loss: 0.515\n", + "epoch: 21900, acc: 0.800, loss: 0.515\n", + "epoch: 22000, acc: 0.800, loss: 0.514\n", + "epoch: 22100, acc: 0.797, loss: 0.513\n", + "epoch: 22200, acc: 0.800, loss: 0.513\n", + "epoch: 22300, acc: 0.797, loss: 0.512\n", + "epoch: 22400, acc: 0.797, loss: 0.512\n", + "epoch: 22500, acc: 0.797, loss: 0.511\n", + "epoch: 22600, acc: 0.797, loss: 0.510\n", + "epoch: 22700, acc: 0.797, loss: 0.510\n", + "epoch: 22800, acc: 0.797, loss: 0.509\n", + "epoch: 22900, acc: 0.797, loss: 0.509\n", + "epoch: 23000, acc: 0.797, loss: 0.508\n", + "epoch: 23100, acc: 0.800, loss: 0.508\n", + "epoch: 23200, acc: 0.797, loss: 0.507\n", + "epoch: 23300, acc: 0.797, loss: 0.507\n", + "epoch: 23400, acc: 0.797, loss: 0.506\n", + "epoch: 23500, acc: 0.797, loss: 0.506\n", + "epoch: 23600, acc: 0.800, loss: 0.505\n", + "epoch: 23700, acc: 0.803, loss: 0.505\n", + "epoch: 23800, acc: 0.803, loss: 0.504\n", + "epoch: 23900, acc: 0.800, loss: 0.504\n", + "epoch: 24000, acc: 0.803, loss: 0.503\n", + "epoch: 24100, acc: 0.807, loss: 0.503\n", + "epoch: 24200, acc: 0.807, loss: 0.502\n", + "epoch: 24300, acc: 0.807, loss: 0.502\n", + "epoch: 24400, acc: 0.807, loss: 0.501\n", + "epoch: 24500, acc: 0.807, loss: 0.501\n", + "epoch: 24600, acc: 0.807, loss: 0.500\n", + "epoch: 24700, acc: 0.807, loss: 0.500\n", + "epoch: 24800, acc: 0.807, loss: 0.499\n", + "epoch: 24900, acc: 0.807, loss: 0.499\n", + "epoch: 25000, acc: 0.807, loss: 0.498\n", + "epoch: 25100, acc: 0.807, loss: 0.498\n", + "epoch: 25200, acc: 0.807, loss: 0.497\n", + "epoch: 25300, acc: 0.807, loss: 0.497\n", + "epoch: 25400, acc: 0.807, loss: 0.497\n", + "epoch: 25500, acc: 0.803, loss: 0.496\n", + "epoch: 25600, acc: 0.803, loss: 0.496\n", + "epoch: 25700, acc: 0.803, loss: 0.495\n", + "epoch: 25800, acc: 0.803, loss: 0.495\n", + "epoch: 25900, acc: 0.803, loss: 0.494\n", + "epoch: 26000, acc: 0.800, loss: 0.494\n", + "epoch: 26100, acc: 0.803, loss: 0.493\n", + "epoch: 26200, acc: 0.800, loss: 0.493\n", + "epoch: 26300, acc: 0.800, loss: 0.492\n", + "epoch: 26400, acc: 0.800, loss: 0.492\n", + "epoch: 26500, acc: 0.800, loss: 0.492\n", + "epoch: 26600, acc: 0.800, loss: 0.491\n", + "epoch: 26700, acc: 0.803, loss: 0.491\n", + "epoch: 26800, acc: 0.800, loss: 0.490\n", + "epoch: 26900, acc: 0.800, loss: 0.490\n", + "epoch: 27000, acc: 0.800, loss: 0.490\n", + "epoch: 27100, acc: 0.803, loss: 0.489\n", + "epoch: 27200, acc: 0.803, loss: 0.489\n", + "epoch: 27300, acc: 0.803, loss: 0.488\n", + "epoch: 27400, acc: 0.803, loss: 0.488\n", + "epoch: 27500, acc: 0.803, loss: 0.488\n", + "epoch: 27600, acc: 0.803, loss: 0.487\n", + "epoch: 27700, acc: 0.803, loss: 0.487\n", + "epoch: 27800, acc: 0.803, loss: 0.487\n", + "epoch: 27900, acc: 0.803, loss: 0.486\n", + "epoch: 28000, acc: 0.803, loss: 0.486\n", + "epoch: 28100, acc: 0.803, loss: 0.485\n", + "epoch: 28200, acc: 0.803, loss: 0.485\n", + "epoch: 28300, acc: 0.803, loss: 0.485\n", + "epoch: 28400, acc: 0.803, loss: 0.484\n", + "epoch: 28500, acc: 0.803, loss: 0.484\n", + "epoch: 28600, acc: 0.803, loss: 0.484\n", + "epoch: 28700, acc: 0.800, loss: 0.483\n", + "epoch: 28800, acc: 0.803, loss: 0.483\n", + "epoch: 28900, acc: 0.800, loss: 0.483\n", + "epoch: 29000, acc: 0.800, loss: 0.482\n", + "epoch: 29100, acc: 0.800, loss: 0.482\n", + "epoch: 29200, acc: 0.800, loss: 0.482\n", + "epoch: 29300, acc: 0.800, loss: 0.481\n", + "epoch: 29400, acc: 0.800, loss: 0.481\n", + "epoch: 29500, acc: 0.800, loss: 0.481\n", + "epoch: 29600, acc: 0.800, loss: 0.480\n", + "epoch: 29700, acc: 0.800, loss: 0.480\n", + "epoch: 29800, acc: 0.800, loss: 0.480\n", + "epoch: 29900, acc: 0.800, loss: 0.479\n", + "epoch: 30000, acc: 0.800, loss: 0.479\n", + "epoch: 30100, acc: 0.800, loss: 0.479\n", + "epoch: 30200, acc: 0.800, loss: 0.478\n", + "epoch: 30300, acc: 0.800, loss: 0.478\n", + "epoch: 30400, acc: 0.800, loss: 0.478\n", + "epoch: 30500, acc: 0.800, loss: 0.477\n", + "epoch: 30600, acc: 0.800, loss: 0.477\n", + "epoch: 30700, acc: 0.800, loss: 0.477\n", + "epoch: 30800, acc: 0.800, loss: 0.476\n", + "epoch: 30900, acc: 0.800, loss: 0.476\n", + "epoch: 31000, acc: 0.800, loss: 0.476\n", + "epoch: 31100, acc: 0.800, loss: 0.476\n", + "epoch: 31200, acc: 0.800, loss: 0.475\n", + "epoch: 31300, acc: 0.800, loss: 0.475\n", + "epoch: 31400, acc: 0.800, loss: 0.475\n", + "epoch: 31500, acc: 0.800, loss: 0.474\n", + "epoch: 31600, acc: 0.800, loss: 0.474\n", + "epoch: 31700, acc: 0.800, loss: 0.474\n", + "epoch: 31800, acc: 0.800, loss: 0.473\n", + "epoch: 31900, acc: 0.800, loss: 0.473\n", + "epoch: 32000, acc: 0.800, loss: 0.473\n", + "epoch: 32100, acc: 0.800, loss: 0.473\n", + "epoch: 32200, acc: 0.800, loss: 0.472\n", + "epoch: 32300, acc: 0.800, loss: 0.472\n", + "epoch: 32400, acc: 0.800, loss: 0.472\n", + "epoch: 32500, acc: 0.800, loss: 0.471\n", + "epoch: 32600, acc: 0.800, loss: 0.471\n", + "epoch: 32700, acc: 0.800, loss: 0.471\n", + "epoch: 32800, acc: 0.800, loss: 0.471\n", + "epoch: 32900, acc: 0.800, loss: 0.470\n", + "epoch: 33000, acc: 0.800, loss: 0.470\n", + "epoch: 33100, acc: 0.800, loss: 0.470\n", + "epoch: 33200, acc: 0.800, loss: 0.469\n", + "epoch: 33300, acc: 0.800, loss: 0.469\n", + "epoch: 33400, acc: 0.800, loss: 0.469\n", + "epoch: 33500, acc: 0.800, loss: 0.469\n", + "epoch: 33600, acc: 0.800, loss: 0.468\n", + "epoch: 33700, acc: 0.800, loss: 0.468\n", + "epoch: 33800, acc: 0.800, loss: 0.468\n", + "epoch: 33900, acc: 0.800, loss: 0.468\n", + "epoch: 34000, acc: 0.800, loss: 0.467\n", + "epoch: 34100, acc: 0.800, loss: 0.467\n", + "epoch: 34200, acc: 0.800, loss: 0.467\n", + "epoch: 34300, acc: 0.800, loss: 0.467\n", + "epoch: 34400, acc: 0.800, loss: 0.466\n", + "epoch: 34500, acc: 0.800, loss: 0.466\n", + "epoch: 34600, acc: 0.800, loss: 0.466\n", + "epoch: 34700, acc: 0.800, loss: 0.465\n", + "epoch: 34800, acc: 0.800, loss: 0.465\n", + "epoch: 34900, acc: 0.800, loss: 0.465\n", + "epoch: 35000, acc: 0.800, loss: 0.464\n", + "epoch: 35100, acc: 0.800, loss: 0.464\n", + "epoch: 35200, acc: 0.800, loss: 0.464\n", + "epoch: 35300, acc: 0.800, loss: 0.464\n", + "epoch: 35400, acc: 0.800, loss: 0.463\n", + "epoch: 35500, acc: 0.800, loss: 0.463\n", + "epoch: 35600, acc: 0.800, loss: 0.463\n", + "epoch: 35700, acc: 0.800, loss: 0.462\n", + "epoch: 35800, acc: 0.800, loss: 0.462\n", + "epoch: 35900, acc: 0.800, loss: 0.462\n", + "epoch: 36000, acc: 0.800, loss: 0.462\n", + "epoch: 36100, acc: 0.800, loss: 0.461\n", + "epoch: 36200, acc: 0.800, loss: 0.461\n", + "epoch: 36300, acc: 0.800, loss: 0.461\n", + "epoch: 36400, acc: 0.800, loss: 0.460\n", + "epoch: 36500, acc: 0.800, loss: 0.460\n", + "epoch: 36600, acc: 0.800, loss: 0.460\n", + "epoch: 36700, acc: 0.800, loss: 0.460\n", + "epoch: 36800, acc: 0.800, loss: 0.459\n", + "epoch: 36900, acc: 0.800, loss: 0.459\n", + "epoch: 37000, acc: 0.800, loss: 0.459\n", + "epoch: 37100, acc: 0.800, loss: 0.458\n", + "epoch: 37200, acc: 0.797, loss: 0.458\n", + "epoch: 37300, acc: 0.800, loss: 0.458\n", + "epoch: 37400, acc: 0.800, loss: 0.457\n", + "epoch: 37500, acc: 0.800, loss: 0.457\n", + "epoch: 37600, acc: 0.800, loss: 0.457\n", + "epoch: 37700, acc: 0.800, loss: 0.456\n", + "epoch: 37800, acc: 0.800, loss: 0.456\n", + "epoch: 37900, acc: 0.800, loss: 0.456\n", + "epoch: 38000, acc: 0.800, loss: 0.455\n", + "epoch: 38100, acc: 0.800, loss: 0.455\n", + "epoch: 38200, acc: 0.800, loss: 0.455\n", + "epoch: 38300, acc: 0.800, loss: 0.454\n", + "epoch: 38400, acc: 0.800, loss: 0.454\n", + "epoch: 38500, acc: 0.800, loss: 0.454\n", + "epoch: 38600, acc: 0.800, loss: 0.454\n", + "epoch: 38700, acc: 0.800, loss: 0.453\n", + "epoch: 38800, acc: 0.800, loss: 0.453\n", + "epoch: 38900, acc: 0.800, loss: 0.453\n", + "epoch: 39000, acc: 0.800, loss: 0.453\n", + "epoch: 39100, acc: 0.800, loss: 0.452\n", + "epoch: 39200, acc: 0.800, loss: 0.452\n", + "epoch: 39300, acc: 0.800, loss: 0.452\n", + "epoch: 39400, acc: 0.800, loss: 0.451\n", + "epoch: 39500, acc: 0.800, loss: 0.451\n", + "epoch: 39600, acc: 0.800, loss: 0.451\n", + "epoch: 39700, acc: 0.803, loss: 0.451\n", + "epoch: 39800, acc: 0.807, loss: 0.450\n", + "epoch: 39900, acc: 0.807, loss: 0.450\n", + "epoch: 40000, acc: 0.807, loss: 0.450\n", + "epoch: 40100, acc: 0.807, loss: 0.450\n", + "epoch: 40200, acc: 0.807, loss: 0.449\n", + "epoch: 40300, acc: 0.807, loss: 0.449\n", + "epoch: 40400, acc: 0.807, loss: 0.449\n", + "epoch: 40500, acc: 0.803, loss: 0.449\n", + "epoch: 40600, acc: 0.803, loss: 0.448\n", + "epoch: 40700, acc: 0.803, loss: 0.448\n", + "epoch: 40800, acc: 0.803, loss: 0.448\n", + "epoch: 40900, acc: 0.807, loss: 0.448\n", + "epoch: 41000, acc: 0.807, loss: 0.447\n", + "epoch: 41100, acc: 0.807, loss: 0.447\n", + "epoch: 41200, acc: 0.807, loss: 0.447\n", + "epoch: 41300, acc: 0.807, loss: 0.447\n", + "epoch: 41400, acc: 0.807, loss: 0.446\n", + "epoch: 41500, acc: 0.807, loss: 0.446\n", + "epoch: 41600, acc: 0.810, loss: 0.446\n", + "epoch: 41700, acc: 0.807, loss: 0.446\n", + "epoch: 41800, acc: 0.807, loss: 0.446\n", + "epoch: 41900, acc: 0.807, loss: 0.445\n", + "epoch: 42000, acc: 0.807, loss: 0.445\n", + "epoch: 42100, acc: 0.807, loss: 0.445\n", + "epoch: 42200, acc: 0.807, loss: 0.445\n", + "epoch: 42300, acc: 0.807, loss: 0.444\n", + "epoch: 42400, acc: 0.807, loss: 0.444\n", + "epoch: 42500, acc: 0.807, loss: 0.444\n", + "epoch: 42600, acc: 0.807, loss: 0.444\n", + "epoch: 42700, acc: 0.807, loss: 0.444\n", + "epoch: 42800, acc: 0.807, loss: 0.443\n", + "epoch: 42900, acc: 0.807, loss: 0.443\n", + "epoch: 43000, acc: 0.807, loss: 0.443\n", + "epoch: 43100, acc: 0.807, loss: 0.443\n", + "epoch: 43200, acc: 0.807, loss: 0.443\n", + "epoch: 43300, acc: 0.807, loss: 0.442\n", + "epoch: 43400, acc: 0.807, loss: 0.442\n", + "epoch: 43500, acc: 0.807, loss: 0.442\n", + "epoch: 43600, acc: 0.807, loss: 0.442\n", + "epoch: 43700, acc: 0.807, loss: 0.442\n", + "epoch: 43800, acc: 0.807, loss: 0.441\n", + "epoch: 43900, acc: 0.807, loss: 0.441\n", + "epoch: 44000, acc: 0.807, loss: 0.441\n", + "epoch: 44100, acc: 0.807, loss: 0.441\n", + "epoch: 44200, acc: 0.807, loss: 0.441\n", + "epoch: 44300, acc: 0.807, loss: 0.440\n", + "epoch: 44400, acc: 0.807, loss: 0.440\n", + "epoch: 44500, acc: 0.807, loss: 0.440\n", + "epoch: 44600, acc: 0.807, loss: 0.440\n", + "epoch: 44700, acc: 0.807, loss: 0.440\n", + "epoch: 44800, acc: 0.807, loss: 0.439\n", + "epoch: 44900, acc: 0.810, loss: 0.439\n", + "epoch: 45000, acc: 0.807, loss: 0.439\n", + "epoch: 45100, acc: 0.807, loss: 0.439\n", + "epoch: 45200, acc: 0.810, loss: 0.439\n", + "epoch: 45300, acc: 0.810, loss: 0.438\n", + "epoch: 45400, acc: 0.810, loss: 0.438\n", + "epoch: 45500, acc: 0.810, loss: 0.438\n", + "epoch: 45600, acc: 0.810, loss: 0.438\n", + "epoch: 45700, acc: 0.810, loss: 0.438\n", + "epoch: 45800, acc: 0.810, loss: 0.437\n", + "epoch: 45900, acc: 0.810, loss: 0.437\n", + "epoch: 46000, acc: 0.810, loss: 0.437\n", + "epoch: 46100, acc: 0.810, loss: 0.437\n", + "epoch: 46200, acc: 0.810, loss: 0.437\n", + "epoch: 46300, acc: 0.810, loss: 0.437\n", + "epoch: 46400, acc: 0.810, loss: 0.436\n", + "epoch: 46500, acc: 0.810, loss: 0.436\n", + "epoch: 46600, acc: 0.810, loss: 0.436\n", + "epoch: 46700, acc: 0.810, loss: 0.436\n", + "epoch: 46800, acc: 0.810, loss: 0.436\n", + "epoch: 46900, acc: 0.810, loss: 0.435\n", + "epoch: 47000, acc: 0.810, loss: 0.435\n", + "epoch: 47100, acc: 0.810, loss: 0.435\n", + "epoch: 47200, acc: 0.813, loss: 0.435\n", + "epoch: 47300, acc: 0.813, loss: 0.435\n", + "epoch: 47400, acc: 0.813, loss: 0.435\n", + "epoch: 47500, acc: 0.813, loss: 0.434\n", + "epoch: 47600, acc: 0.813, loss: 0.434\n", + "epoch: 47700, acc: 0.813, loss: 0.434\n", + "epoch: 47800, acc: 0.813, loss: 0.434\n", + "epoch: 47900, acc: 0.817, loss: 0.434\n", + "epoch: 48000, acc: 0.817, loss: 0.433\n", + "epoch: 48100, acc: 0.817, loss: 0.433\n", + "epoch: 48200, acc: 0.813, loss: 0.433\n", + "epoch: 48300, acc: 0.817, loss: 0.433\n", + "epoch: 48400, acc: 0.817, loss: 0.433\n", + "epoch: 48500, acc: 0.813, loss: 0.433\n", + "epoch: 48600, acc: 0.817, loss: 0.432\n", + "epoch: 48700, acc: 0.817, loss: 0.432\n", + "epoch: 48800, acc: 0.817, loss: 0.432\n", + "epoch: 48900, acc: 0.817, loss: 0.432\n", + "epoch: 49000, acc: 0.817, loss: 0.432\n", + "epoch: 49100, acc: 0.817, loss: 0.432\n", + "epoch: 49200, acc: 0.817, loss: 0.431\n", + "epoch: 49300, acc: 0.817, loss: 0.431\n", + "epoch: 49400, acc: 0.817, loss: 0.431\n", + "epoch: 49500, acc: 0.817, loss: 0.431\n", + "epoch: 49600, acc: 0.817, loss: 0.431\n", + "epoch: 49700, acc: 0.817, loss: 0.431\n", + "epoch: 49800, acc: 0.817, loss: 0.430\n", + "epoch: 49900, acc: 0.817, loss: 0.430\n", + "epoch: 50000, acc: 0.817, loss: 0.430\n", + "epoch: 50100, acc: 0.820, loss: 0.430\n", + "epoch: 50200, acc: 0.820, loss: 0.430\n", + "epoch: 50300, acc: 0.820, loss: 0.429\n", + "epoch: 50400, acc: 0.820, loss: 0.429\n", + "epoch: 50500, acc: 0.820, loss: 0.429\n", + "epoch: 50600, acc: 0.820, loss: 0.429\n", + "epoch: 50700, acc: 0.820, loss: 0.429\n", + "epoch: 50800, acc: 0.820, loss: 0.429\n", + "epoch: 50900, acc: 0.820, loss: 0.428\n", + "epoch: 51000, acc: 0.820, loss: 0.428\n", + "epoch: 51100, acc: 0.820, loss: 0.428\n", + "epoch: 51200, acc: 0.820, loss: 0.428\n", + "epoch: 51300, acc: 0.820, loss: 0.428\n", + "epoch: 51400, acc: 0.820, loss: 0.428\n", + "epoch: 51500, acc: 0.820, loss: 0.427\n", + "epoch: 51600, acc: 0.820, loss: 0.427\n", + "epoch: 51700, acc: 0.820, loss: 0.427\n", + "epoch: 51800, acc: 0.820, loss: 0.427\n", + "epoch: 51900, acc: 0.820, loss: 0.427\n", + "epoch: 52000, acc: 0.820, loss: 0.427\n", + "epoch: 52100, acc: 0.820, loss: 0.426\n", + "epoch: 52200, acc: 0.820, loss: 0.426\n", + "epoch: 52300, acc: 0.820, loss: 0.426\n", + "epoch: 52400, acc: 0.820, loss: 0.426\n", + "epoch: 52500, acc: 0.820, loss: 0.426\n", + "epoch: 52600, acc: 0.820, loss: 0.426\n", + "epoch: 52700, acc: 0.820, loss: 0.426\n", + "epoch: 52800, acc: 0.820, loss: 0.425\n", + "epoch: 52900, acc: 0.820, loss: 0.425\n", + "epoch: 53000, acc: 0.820, loss: 0.425\n", + "epoch: 53100, acc: 0.820, loss: 0.425\n", + "epoch: 53200, acc: 0.820, loss: 0.425\n", + "epoch: 53300, acc: 0.820, loss: 0.425\n", + "epoch: 53400, acc: 0.820, loss: 0.424\n", + "epoch: 53500, acc: 0.820, loss: 0.424\n", + "epoch: 53600, acc: 0.820, loss: 0.424\n", + "epoch: 53700, acc: 0.820, loss: 0.424\n", + "epoch: 53800, acc: 0.820, loss: 0.424\n", + "epoch: 53900, acc: 0.820, loss: 0.424\n", + "epoch: 54000, acc: 0.820, loss: 0.424\n", + "epoch: 54100, acc: 0.820, loss: 0.423\n", + "epoch: 54200, acc: 0.820, loss: 0.423\n", + "epoch: 54300, acc: 0.820, loss: 0.423\n", + "epoch: 54400, acc: 0.820, loss: 0.423\n", + "epoch: 54500, acc: 0.820, loss: 0.423\n", + "epoch: 54600, acc: 0.823, loss: 0.423\n", + "epoch: 54700, acc: 0.823, loss: 0.422\n", + "epoch: 54800, acc: 0.823, loss: 0.422\n", + "epoch: 54900, acc: 0.823, loss: 0.422\n", + "epoch: 55000, acc: 0.823, loss: 0.422\n", + "epoch: 55100, acc: 0.823, loss: 0.422\n", + "epoch: 55200, acc: 0.823, loss: 0.422\n", + "epoch: 55300, acc: 0.823, loss: 0.422\n", + "epoch: 55400, acc: 0.823, loss: 0.421\n", + "epoch: 55500, acc: 0.823, loss: 0.421\n", + "epoch: 55600, acc: 0.823, loss: 0.421\n", + "epoch: 55700, acc: 0.823, loss: 0.421\n", + "epoch: 55800, acc: 0.823, loss: 0.421\n", + "epoch: 55900, acc: 0.823, loss: 0.421\n", + "epoch: 56000, acc: 0.823, loss: 0.421\n", + "epoch: 56100, acc: 0.823, loss: 0.420\n", + "epoch: 56200, acc: 0.823, loss: 0.420\n", + "epoch: 56300, acc: 0.823, loss: 0.420\n", + "epoch: 56400, acc: 0.823, loss: 0.420\n", + "epoch: 56500, acc: 0.823, loss: 0.420\n", + "epoch: 56600, acc: 0.823, loss: 0.420\n", + "epoch: 56700, acc: 0.823, loss: 0.420\n", + "epoch: 56800, acc: 0.823, loss: 0.419\n", + "epoch: 56900, acc: 0.820, loss: 0.419\n", + "epoch: 57000, acc: 0.820, loss: 0.419\n", + "epoch: 57100, acc: 0.820, loss: 0.419\n", + "epoch: 57200, acc: 0.820, loss: 0.419\n", + "epoch: 57300, acc: 0.820, loss: 0.419\n", + "epoch: 57400, acc: 0.820, loss: 0.419\n", + "epoch: 57500, acc: 0.820, loss: 0.418\n", + "epoch: 57600, acc: 0.820, loss: 0.418\n", + "epoch: 57700, acc: 0.820, loss: 0.418\n", + "epoch: 57800, acc: 0.820, loss: 0.418\n", + "epoch: 57900, acc: 0.820, loss: 0.418\n", + "epoch: 58000, acc: 0.820, loss: 0.418\n", + "epoch: 58100, acc: 0.820, loss: 0.418\n", + "epoch: 58200, acc: 0.820, loss: 0.417\n", + "epoch: 58300, acc: 0.820, loss: 0.417\n", + "epoch: 58400, acc: 0.820, loss: 0.417\n", + "epoch: 58500, acc: 0.820, loss: 0.417\n", + "epoch: 58600, acc: 0.820, loss: 0.417\n", + "epoch: 58700, acc: 0.823, loss: 0.417\n", + "epoch: 58800, acc: 0.823, loss: 0.417\n", + "epoch: 58900, acc: 0.823, loss: 0.417\n", + "epoch: 59000, acc: 0.823, loss: 0.416\n", + "epoch: 59100, acc: 0.823, loss: 0.416\n", + "epoch: 59200, acc: 0.823, loss: 0.416\n", + "epoch: 59300, acc: 0.823, loss: 0.416\n", + "epoch: 59400, acc: 0.823, loss: 0.416\n", + "epoch: 59500, acc: 0.823, loss: 0.416\n", + "epoch: 59600, acc: 0.823, loss: 0.416\n", + "epoch: 59700, acc: 0.823, loss: 0.415\n", + "epoch: 59800, acc: 0.823, loss: 0.415\n", + "epoch: 59900, acc: 0.823, loss: 0.415\n", + "epoch: 60000, acc: 0.823, loss: 0.415\n", + "epoch: 60100, acc: 0.823, loss: 0.415\n", + "epoch: 60200, acc: 0.823, loss: 0.415\n", + "epoch: 60300, acc: 0.820, loss: 0.415\n", + "epoch: 60400, acc: 0.820, loss: 0.415\n", + "epoch: 60500, acc: 0.820, loss: 0.414\n", + "epoch: 60600, acc: 0.820, loss: 0.414\n", + "epoch: 60700, acc: 0.823, loss: 0.414\n", + "epoch: 60800, acc: 0.820, loss: 0.414\n", + "epoch: 60900, acc: 0.823, loss: 0.414\n", + "epoch: 61000, acc: 0.823, loss: 0.414\n", + "epoch: 61100, acc: 0.823, loss: 0.414\n", + "epoch: 61200, acc: 0.823, loss: 0.414\n", + "epoch: 61300, acc: 0.827, loss: 0.413\n", + "epoch: 61400, acc: 0.827, loss: 0.413\n", + "epoch: 61500, acc: 0.827, loss: 0.413\n", + "epoch: 61600, acc: 0.827, loss: 0.413\n", + "epoch: 61700, acc: 0.827, loss: 0.413\n", + "epoch: 61800, acc: 0.827, loss: 0.413\n", + "epoch: 61900, acc: 0.827, loss: 0.413\n", + "epoch: 62000, acc: 0.827, loss: 0.413\n", + "epoch: 62100, acc: 0.827, loss: 0.412\n", + "epoch: 62200, acc: 0.827, loss: 0.412\n", + "epoch: 62300, acc: 0.827, loss: 0.412\n", + "epoch: 62400, acc: 0.827, loss: 0.412\n", + "epoch: 62500, acc: 0.827, loss: 0.412\n", + "epoch: 62600, acc: 0.827, loss: 0.412\n", + "epoch: 62700, acc: 0.827, loss: 0.412\n", + "epoch: 62800, acc: 0.827, loss: 0.412\n", + "epoch: 62900, acc: 0.827, loss: 0.412\n", + "epoch: 63000, acc: 0.827, loss: 0.411\n", + "epoch: 63100, acc: 0.827, loss: 0.411\n", + "epoch: 63200, acc: 0.827, loss: 0.411\n", + "epoch: 63300, acc: 0.827, loss: 0.411\n", + "epoch: 63400, acc: 0.827, loss: 0.411\n", + "epoch: 63500, acc: 0.830, loss: 0.411\n", + "epoch: 63600, acc: 0.830, loss: 0.411\n", + "epoch: 63700, acc: 0.830, loss: 0.411\n", + "epoch: 63800, acc: 0.830, loss: 0.410\n", + "epoch: 63900, acc: 0.830, loss: 0.410\n", + "epoch: 64000, acc: 0.830, loss: 0.410\n", + "epoch: 64100, acc: 0.830, loss: 0.410\n", + "epoch: 64200, acc: 0.830, loss: 0.410\n", + "epoch: 64300, acc: 0.830, loss: 0.410\n", + "epoch: 64400, acc: 0.830, loss: 0.410\n", + "epoch: 64500, acc: 0.830, loss: 0.410\n", + "epoch: 64600, acc: 0.830, loss: 0.410\n", + "epoch: 64700, acc: 0.833, loss: 0.409\n", + "epoch: 64800, acc: 0.833, loss: 0.409\n", + "epoch: 64900, acc: 0.833, loss: 0.409\n", + "epoch: 65000, acc: 0.833, loss: 0.409\n", + "epoch: 65100, acc: 0.833, loss: 0.409\n", + "epoch: 65200, acc: 0.833, loss: 0.409\n", + "epoch: 65300, acc: 0.833, loss: 0.409\n", + "epoch: 65400, acc: 0.833, loss: 0.409\n", + "epoch: 65500, acc: 0.837, loss: 0.409\n", + "epoch: 65600, acc: 0.837, loss: 0.408\n", + "epoch: 65700, acc: 0.837, loss: 0.408\n", + "epoch: 65800, acc: 0.837, loss: 0.408\n", + "epoch: 65900, acc: 0.837, loss: 0.408\n", + "epoch: 66000, acc: 0.837, loss: 0.408\n", + "epoch: 66100, acc: 0.837, loss: 0.408\n", + "epoch: 66200, acc: 0.833, loss: 0.408\n", + "epoch: 66300, acc: 0.833, loss: 0.408\n", + "epoch: 66400, acc: 0.833, loss: 0.408\n", + "epoch: 66500, acc: 0.833, loss: 0.408\n", + "epoch: 66600, acc: 0.833, loss: 0.407\n", + "epoch: 66700, acc: 0.833, loss: 0.407\n", + "epoch: 66800, acc: 0.833, loss: 0.407\n", + "epoch: 66900, acc: 0.833, loss: 0.407\n", + "epoch: 67000, acc: 0.830, loss: 0.407\n", + "epoch: 67100, acc: 0.833, loss: 0.407\n", + "epoch: 67200, acc: 0.830, loss: 0.407\n", + "epoch: 67300, acc: 0.830, loss: 0.407\n", + "epoch: 67400, acc: 0.830, loss: 0.407\n", + "epoch: 67500, acc: 0.830, loss: 0.406\n", + "epoch: 67600, acc: 0.830, loss: 0.406\n", + "epoch: 67700, acc: 0.830, loss: 0.406\n", + "epoch: 67800, acc: 0.830, loss: 0.406\n", + "epoch: 67900, acc: 0.830, loss: 0.406\n", + "epoch: 68000, acc: 0.830, loss: 0.406\n", + "epoch: 68100, acc: 0.830, loss: 0.406\n", + "epoch: 68200, acc: 0.830, loss: 0.406\n", + "epoch: 68300, acc: 0.830, loss: 0.406\n", + "epoch: 68400, acc: 0.833, loss: 0.406\n", + "epoch: 68500, acc: 0.833, loss: 0.405\n", + "epoch: 68600, acc: 0.830, loss: 0.405\n", + "epoch: 68700, acc: 0.833, loss: 0.405\n", + "epoch: 68800, acc: 0.837, loss: 0.405\n", + "epoch: 68900, acc: 0.837, loss: 0.405\n", + "epoch: 69000, acc: 0.837, loss: 0.405\n", + "epoch: 69100, acc: 0.837, loss: 0.405\n", + "epoch: 69200, acc: 0.837, loss: 0.405\n", + "epoch: 69300, acc: 0.837, loss: 0.405\n", + "epoch: 69400, acc: 0.833, loss: 0.405\n", + "epoch: 69500, acc: 0.837, loss: 0.404\n", + "epoch: 69600, acc: 0.837, loss: 0.404\n", + "epoch: 69700, acc: 0.837, loss: 0.404\n", + "epoch: 69800, acc: 0.837, loss: 0.404\n", + "epoch: 69900, acc: 0.837, loss: 0.404\n", + "epoch: 70000, acc: 0.837, loss: 0.404\n", + "epoch: 70100, acc: 0.837, loss: 0.404\n", + "epoch: 70200, acc: 0.837, loss: 0.404\n", + "epoch: 70300, acc: 0.840, loss: 0.404\n", + "epoch: 70400, acc: 0.840, loss: 0.404\n", + "epoch: 70500, acc: 0.840, loss: 0.404\n", + "epoch: 70600, acc: 0.840, loss: 0.403\n", + "epoch: 70700, acc: 0.840, loss: 0.403\n", + "epoch: 70800, acc: 0.840, loss: 0.403\n", + "epoch: 70900, acc: 0.840, loss: 0.403\n", + "epoch: 71000, acc: 0.840, loss: 0.403\n", + "epoch: 71100, acc: 0.840, loss: 0.403\n", + "epoch: 71200, acc: 0.840, loss: 0.403\n", + "epoch: 71300, acc: 0.840, loss: 0.403\n", + "epoch: 71400, acc: 0.840, loss: 0.403\n", + "epoch: 71500, acc: 0.840, loss: 0.403\n", + "epoch: 71600, acc: 0.840, loss: 0.402\n", + "epoch: 71700, acc: 0.840, loss: 0.402\n", + "epoch: 71800, acc: 0.840, loss: 0.402\n", + "epoch: 71900, acc: 0.840, loss: 0.402\n", + "epoch: 72000, acc: 0.840, loss: 0.402\n", + "epoch: 72100, acc: 0.840, loss: 0.402\n", + "epoch: 72200, acc: 0.840, loss: 0.402\n", + "epoch: 72300, acc: 0.840, loss: 0.402\n", + "epoch: 72400, acc: 0.840, loss: 0.402\n", + "epoch: 72500, acc: 0.840, loss: 0.402\n", + "epoch: 72600, acc: 0.840, loss: 0.401\n", + "epoch: 72700, acc: 0.840, loss: 0.401\n", + "epoch: 72800, acc: 0.840, loss: 0.401\n", + "epoch: 72900, acc: 0.840, loss: 0.401\n", + "epoch: 73000, acc: 0.840, loss: 0.401\n", + "epoch: 73100, acc: 0.840, loss: 0.401\n", + "epoch: 73200, acc: 0.840, loss: 0.401\n", + "epoch: 73300, acc: 0.840, loss: 0.401\n", + "epoch: 73400, acc: 0.840, loss: 0.401\n", + "epoch: 73500, acc: 0.840, loss: 0.401\n", + "epoch: 73600, acc: 0.840, loss: 0.401\n", + "epoch: 73700, acc: 0.840, loss: 0.400\n", + "epoch: 73800, acc: 0.840, loss: 0.400\n", + "epoch: 73900, acc: 0.840, loss: 0.400\n", + "epoch: 74000, acc: 0.837, loss: 0.400\n", + "epoch: 74100, acc: 0.837, loss: 0.400\n", + "epoch: 74200, acc: 0.837, loss: 0.400\n", + "epoch: 74300, acc: 0.837, loss: 0.400\n", + "epoch: 74400, acc: 0.837, loss: 0.400\n", + "epoch: 74500, acc: 0.837, loss: 0.400\n", + "epoch: 74600, acc: 0.833, loss: 0.399\n", + "epoch: 74700, acc: 0.833, loss: 0.399\n", + "epoch: 74800, acc: 0.833, loss: 0.399\n", + "epoch: 74900, acc: 0.837, loss: 0.399\n", + "epoch: 75000, acc: 0.837, loss: 0.399\n", + "epoch: 75100, acc: 0.837, loss: 0.399\n", + "epoch: 75200, acc: 0.837, loss: 0.399\n", + "epoch: 75300, acc: 0.837, loss: 0.399\n", + "epoch: 75400, acc: 0.837, loss: 0.399\n", + "epoch: 75500, acc: 0.837, loss: 0.399\n", + "epoch: 75600, acc: 0.837, loss: 0.398\n", + "epoch: 75700, acc: 0.837, loss: 0.398\n", + "epoch: 75800, acc: 0.837, loss: 0.398\n", + "epoch: 75900, acc: 0.837, loss: 0.398\n", + "epoch: 76000, acc: 0.837, loss: 0.398\n", + "epoch: 76100, acc: 0.837, loss: 0.398\n", + "epoch: 76200, acc: 0.837, loss: 0.398\n", + "epoch: 76300, acc: 0.837, loss: 0.398\n", + "epoch: 76400, acc: 0.837, loss: 0.398\n", + "epoch: 76500, acc: 0.837, loss: 0.398\n", + "epoch: 76600, acc: 0.837, loss: 0.397\n", + "epoch: 76700, acc: 0.837, loss: 0.397\n", + "epoch: 76800, acc: 0.837, loss: 0.397\n", + "epoch: 76900, acc: 0.837, loss: 0.397\n", + "epoch: 77000, acc: 0.837, loss: 0.397\n", + "epoch: 77100, acc: 0.837, loss: 0.397\n", + "epoch: 77200, acc: 0.837, loss: 0.397\n", + "epoch: 77300, acc: 0.837, loss: 0.397\n", + "epoch: 77400, acc: 0.837, loss: 0.397\n", + "epoch: 77500, acc: 0.837, loss: 0.397\n", + "epoch: 77600, acc: 0.837, loss: 0.397\n", + "epoch: 77700, acc: 0.837, loss: 0.396\n", + "epoch: 77800, acc: 0.837, loss: 0.396\n", + "epoch: 77900, acc: 0.837, loss: 0.396\n", + "epoch: 78000, acc: 0.837, loss: 0.396\n", + "epoch: 78100, acc: 0.837, loss: 0.396\n", + "epoch: 78200, acc: 0.837, loss: 0.396\n", + "epoch: 78300, acc: 0.837, loss: 0.396\n", + "epoch: 78400, acc: 0.837, loss: 0.396\n", + "epoch: 78500, acc: 0.837, loss: 0.396\n", + "epoch: 78600, acc: 0.837, loss: 0.396\n", + "epoch: 78700, acc: 0.837, loss: 0.396\n", + "epoch: 78800, acc: 0.837, loss: 0.396\n", + "epoch: 78900, acc: 0.837, loss: 0.395\n", + "epoch: 79000, acc: 0.837, loss: 0.395\n", + "epoch: 79100, acc: 0.837, loss: 0.395\n", + "epoch: 79200, acc: 0.837, loss: 0.395\n", + "epoch: 79300, acc: 0.837, loss: 0.395\n", + "epoch: 79400, acc: 0.837, loss: 0.395\n", + "epoch: 79500, acc: 0.837, loss: 0.395\n", + "epoch: 79600, acc: 0.837, loss: 0.395\n", + "epoch: 79700, acc: 0.837, loss: 0.395\n", + "epoch: 79800, acc: 0.837, loss: 0.395\n", + "epoch: 79900, acc: 0.837, loss: 0.395\n", + "epoch: 80000, acc: 0.837, loss: 0.394\n", + "epoch: 80100, acc: 0.837, loss: 0.394\n", + "epoch: 80200, acc: 0.837, loss: 0.394\n", + "epoch: 80300, acc: 0.837, loss: 0.394\n", + "epoch: 80400, acc: 0.837, loss: 0.394\n", + "epoch: 80500, acc: 0.837, loss: 0.394\n", + "epoch: 80600, acc: 0.837, loss: 0.394\n", + "epoch: 80700, acc: 0.837, loss: 0.394\n", + "epoch: 80800, acc: 0.837, loss: 0.394\n", + "epoch: 80900, acc: 0.837, loss: 0.394\n", + "epoch: 81000, acc: 0.837, loss: 0.394\n", + "epoch: 81100, acc: 0.837, loss: 0.394\n", + "epoch: 81200, acc: 0.837, loss: 0.393\n", + "epoch: 81300, acc: 0.837, loss: 0.393\n", + "epoch: 81400, acc: 0.837, loss: 0.393\n", + "epoch: 81500, acc: 0.837, loss: 0.393\n", + "epoch: 81600, acc: 0.837, loss: 0.393\n", + "epoch: 81700, acc: 0.837, loss: 0.393\n", + "epoch: 81800, acc: 0.837, loss: 0.393\n", + "epoch: 81900, acc: 0.837, loss: 0.393\n", + "epoch: 82000, acc: 0.837, loss: 0.393\n", + "epoch: 82100, acc: 0.837, loss: 0.393\n", + "epoch: 82200, acc: 0.837, loss: 0.393\n", + "epoch: 82300, acc: 0.837, loss: 0.393\n", + "epoch: 82400, acc: 0.837, loss: 0.392\n", + "epoch: 82500, acc: 0.837, loss: 0.392\n", + "epoch: 82600, acc: 0.837, loss: 0.392\n", + "epoch: 82700, acc: 0.837, loss: 0.392\n", + "epoch: 82800, acc: 0.837, loss: 0.392\n", + "epoch: 82900, acc: 0.837, loss: 0.392\n", + "epoch: 83000, acc: 0.837, loss: 0.392\n", + "epoch: 83100, acc: 0.837, loss: 0.392\n", + "epoch: 83200, acc: 0.837, loss: 0.392\n", + "epoch: 83300, acc: 0.837, loss: 0.392\n", + "epoch: 83400, acc: 0.837, loss: 0.392\n", + "epoch: 83500, acc: 0.837, loss: 0.392\n", + "epoch: 83600, acc: 0.837, loss: 0.392\n", + "epoch: 83700, acc: 0.837, loss: 0.391\n", + "epoch: 83800, acc: 0.837, loss: 0.391\n", + "epoch: 83900, acc: 0.837, loss: 0.391\n", + "epoch: 84000, acc: 0.837, loss: 0.391\n", + "epoch: 84100, acc: 0.837, loss: 0.391\n", + "epoch: 84200, acc: 0.837, loss: 0.391\n", + "epoch: 84300, acc: 0.837, loss: 0.391\n", + "epoch: 84400, acc: 0.837, loss: 0.391\n", + "epoch: 84500, acc: 0.837, loss: 0.391\n", + "epoch: 84600, acc: 0.837, loss: 0.391\n", + "epoch: 84700, acc: 0.837, loss: 0.391\n", + "epoch: 84800, acc: 0.837, loss: 0.391\n", + "epoch: 84900, acc: 0.837, loss: 0.391\n", + "epoch: 85000, acc: 0.837, loss: 0.390\n", + "epoch: 85100, acc: 0.837, loss: 0.390\n", + "epoch: 85200, acc: 0.837, loss: 0.390\n", + "epoch: 85300, acc: 0.837, loss: 0.390\n", + "epoch: 85400, acc: 0.837, loss: 0.390\n", + "epoch: 85500, acc: 0.837, loss: 0.390\n", + "epoch: 85600, acc: 0.837, loss: 0.390\n", + "epoch: 85700, acc: 0.837, loss: 0.390\n", + "epoch: 85800, acc: 0.837, loss: 0.390\n", + "epoch: 85900, acc: 0.837, loss: 0.390\n", + "epoch: 86000, acc: 0.837, loss: 0.390\n", + "epoch: 86100, acc: 0.837, loss: 0.390\n", + "epoch: 86200, acc: 0.837, loss: 0.390\n", + "epoch: 86300, acc: 0.837, loss: 0.390\n", + "epoch: 86400, acc: 0.837, loss: 0.389\n", + "epoch: 86500, acc: 0.837, loss: 0.389\n", + "epoch: 86600, acc: 0.837, loss: 0.389\n", + "epoch: 86700, acc: 0.837, loss: 0.389\n", + "epoch: 86800, acc: 0.837, loss: 0.389\n", + "epoch: 86900, acc: 0.837, loss: 0.389\n", + "epoch: 87000, acc: 0.837, loss: 0.389\n", + "epoch: 87100, acc: 0.837, loss: 0.389\n", + "epoch: 87200, acc: 0.837, loss: 0.389\n", + "epoch: 87300, acc: 0.837, loss: 0.389\n", + "epoch: 87400, acc: 0.837, loss: 0.389\n", + "epoch: 87500, acc: 0.837, loss: 0.389\n", + "epoch: 87600, acc: 0.837, loss: 0.389\n", + "epoch: 87700, acc: 0.837, loss: 0.389\n", + "epoch: 87800, acc: 0.837, loss: 0.388\n", + "epoch: 87900, acc: 0.837, loss: 0.388\n", + "epoch: 88000, acc: 0.837, loss: 0.388\n", + "epoch: 88100, acc: 0.837, loss: 0.388\n", + "epoch: 88200, acc: 0.837, loss: 0.388\n", + "epoch: 88300, acc: 0.837, loss: 0.388\n", + "epoch: 88400, acc: 0.837, loss: 0.388\n", + "epoch: 88500, acc: 0.837, loss: 0.388\n", + "epoch: 88600, acc: 0.837, loss: 0.388\n", + "epoch: 88700, acc: 0.837, loss: 0.388\n", + "epoch: 88800, acc: 0.837, loss: 0.388\n", + "epoch: 88900, acc: 0.837, loss: 0.388\n", + "epoch: 89000, acc: 0.837, loss: 0.388\n", + "epoch: 89100, acc: 0.837, loss: 0.388\n", + "epoch: 89200, acc: 0.837, loss: 0.388\n", + "epoch: 89300, acc: 0.837, loss: 0.387\n", + "epoch: 89400, acc: 0.837, loss: 0.387\n", + "epoch: 89500, acc: 0.837, loss: 0.387\n", + "epoch: 89600, acc: 0.837, loss: 0.387\n", + "epoch: 89700, acc: 0.837, loss: 0.387\n", + "epoch: 89800, acc: 0.837, loss: 0.387\n", + "epoch: 89900, acc: 0.837, loss: 0.387\n", + "epoch: 90000, acc: 0.837, loss: 0.387\n", + "epoch: 90100, acc: 0.837, loss: 0.387\n", + "epoch: 90200, acc: 0.837, loss: 0.387\n", + "epoch: 90300, acc: 0.837, loss: 0.387\n", + "epoch: 90400, acc: 0.840, loss: 0.387\n", + "epoch: 90500, acc: 0.840, loss: 0.387\n", + "epoch: 90600, acc: 0.840, loss: 0.387\n", + "epoch: 90700, acc: 0.843, loss: 0.387\n", + "epoch: 90800, acc: 0.843, loss: 0.386\n", + "epoch: 90900, acc: 0.843, loss: 0.386\n", + "epoch: 91000, acc: 0.843, loss: 0.386\n", + "epoch: 91100, acc: 0.843, loss: 0.386\n", + "epoch: 91200, acc: 0.843, loss: 0.386\n", + "epoch: 91300, acc: 0.843, loss: 0.386\n", + "epoch: 91400, acc: 0.843, loss: 0.386\n", + "epoch: 91500, acc: 0.843, loss: 0.386\n", + "epoch: 91600, acc: 0.843, loss: 0.386\n", + "epoch: 91700, acc: 0.843, loss: 0.386\n", + "epoch: 91800, acc: 0.843, loss: 0.386\n", + "epoch: 91900, acc: 0.843, loss: 0.386\n", + "epoch: 92000, acc: 0.843, loss: 0.386\n", + "epoch: 92100, acc: 0.843, loss: 0.386\n", + "epoch: 92200, acc: 0.840, loss: 0.386\n", + "epoch: 92300, acc: 0.840, loss: 0.386\n", + "epoch: 92400, acc: 0.840, loss: 0.385\n", + "epoch: 92500, acc: 0.840, loss: 0.385\n", + "epoch: 92600, acc: 0.840, loss: 0.385\n", + "epoch: 92700, acc: 0.840, loss: 0.385\n", + "epoch: 92800, acc: 0.840, loss: 0.385\n", + "epoch: 92900, acc: 0.840, loss: 0.385\n", + "epoch: 93000, acc: 0.840, loss: 0.385\n", + "epoch: 93100, acc: 0.840, loss: 0.385\n", + "epoch: 93200, acc: 0.840, loss: 0.385\n", + "epoch: 93300, acc: 0.840, loss: 0.385\n", + "epoch: 93400, acc: 0.840, loss: 0.385\n", + "epoch: 93500, acc: 0.840, loss: 0.385\n", + "epoch: 93600, acc: 0.840, loss: 0.385\n", + "epoch: 93700, acc: 0.840, loss: 0.385\n", + "epoch: 93800, acc: 0.840, loss: 0.385\n", + "epoch: 93900, acc: 0.840, loss: 0.385\n", + "epoch: 94000, acc: 0.840, loss: 0.384\n", + "epoch: 94100, acc: 0.840, loss: 0.384\n", + "epoch: 94200, acc: 0.840, loss: 0.384\n", + "epoch: 94300, acc: 0.840, loss: 0.384\n", + "epoch: 94400, acc: 0.840, loss: 0.384\n", + "epoch: 94500, acc: 0.840, loss: 0.384\n", + "epoch: 94600, acc: 0.840, loss: 0.384\n", + "epoch: 94700, acc: 0.840, loss: 0.384\n", + "epoch: 94800, acc: 0.840, loss: 0.384\n", + "epoch: 94900, acc: 0.840, loss: 0.384\n", + "epoch: 95000, acc: 0.840, loss: 0.384\n", + "epoch: 95100, acc: 0.840, loss: 0.384\n", + "epoch: 95200, acc: 0.840, loss: 0.384\n", + "epoch: 95300, acc: 0.840, loss: 0.384\n", + "epoch: 95400, acc: 0.840, loss: 0.384\n", + "epoch: 95500, acc: 0.840, loss: 0.384\n", + "epoch: 95600, acc: 0.840, loss: 0.384\n", + "epoch: 95700, acc: 0.840, loss: 0.383\n", + "epoch: 95800, acc: 0.840, loss: 0.383\n", + "epoch: 95900, acc: 0.840, loss: 0.383\n", + "epoch: 96000, acc: 0.840, loss: 0.383\n", + "epoch: 96100, acc: 0.840, loss: 0.383\n", + "epoch: 96200, acc: 0.840, loss: 0.383\n", + "epoch: 96300, acc: 0.840, loss: 0.383\n", + "epoch: 96400, acc: 0.840, loss: 0.383\n", + "epoch: 96500, acc: 0.840, loss: 0.383\n", + "epoch: 96600, acc: 0.840, loss: 0.383\n", + "epoch: 96700, acc: 0.840, loss: 0.383\n", + "epoch: 96800, acc: 0.840, loss: 0.383\n", + "epoch: 96900, acc: 0.840, loss: 0.383\n", + "epoch: 97000, acc: 0.840, loss: 0.383\n", + "epoch: 97100, acc: 0.840, loss: 0.383\n", + "epoch: 97200, acc: 0.840, loss: 0.383\n", + "epoch: 97300, acc: 0.840, loss: 0.383\n", + "epoch: 97400, acc: 0.840, loss: 0.382\n", + "epoch: 97500, acc: 0.840, loss: 0.382\n", + "epoch: 97600, acc: 0.840, loss: 0.382\n", + "epoch: 97700, acc: 0.840, loss: 0.382\n", + "epoch: 97800, acc: 0.840, loss: 0.382\n", + "epoch: 97900, acc: 0.840, loss: 0.382\n", + "epoch: 98000, acc: 0.840, loss: 0.382\n", + "epoch: 98100, acc: 0.843, loss: 0.382\n", + "epoch: 98200, acc: 0.843, loss: 0.382\n", + "epoch: 98300, acc: 0.843, loss: 0.382\n", + "epoch: 98400, acc: 0.843, loss: 0.382\n", + "epoch: 98500, acc: 0.843, loss: 0.382\n", + "epoch: 98600, acc: 0.843, loss: 0.382\n", + "epoch: 98700, acc: 0.843, loss: 0.382\n", + "epoch: 98800, acc: 0.843, loss: 0.382\n", + "epoch: 98900, acc: 0.843, loss: 0.382\n", + "epoch: 99000, acc: 0.843, loss: 0.382\n", + "epoch: 99100, acc: 0.843, loss: 0.382\n", + "epoch: 99200, acc: 0.843, loss: 0.381\n", + "epoch: 99300, acc: 0.843, loss: 0.381\n", + "epoch: 99400, acc: 0.843, loss: 0.381\n", + "epoch: 99500, acc: 0.843, loss: 0.381\n", + "epoch: 99600, acc: 0.843, loss: 0.381\n", + "epoch: 99700, acc: 0.843, loss: 0.381\n", + "epoch: 99800, acc: 0.843, loss: 0.381\n", + "epoch: 99900, acc: 0.843, loss: 0.381\n", + "epoch: 100000, acc: 0.843, loss: 0.381\n" + ] + } + ], + "source": [ + "# Create dataset\n", + "X, y = spiral_data(samples=100, classes=3)\n", + "\n", + "# Create Dense layer with 2 input features and 64 output values\n", + "dense1 = Layer_Dense(2, 64)\n", + "\n", + "# Create ReLU activation (to be used with Dense layer)\n", + "activation1 = Activation_ReLU()\n", + "\n", + "# Create second Dense layer with 64 input features (as we take output\n", + "# of previous layer here) and 3 output values (output values)\n", + "dense2 = Layer_Dense(64, 3)\n", + "\n", + "# Create Softmax classifier's combined loss and activation\n", + "loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n", + "\n", + "# Create optimizer\n", + "optimizer = Optimizer_SGD(learning_rate=1.0, decay=1e-3)\n", + "\n", + "# Train in loop\n", + "for epoch in range(10001):\n", + " # Perform a forward pass of our training data through this layer\n", + " dense1.forward(X)\n", + " \n", + " # Perform a forward pass through activation function\n", + " # takes the output of first dense layer here\n", + " activation1.forward(dense1.output)\n", + " \n", + " # Perform a forward pass through second Dense layer\n", + " # takes outputs of activation function of first layer as inputs\n", + " dense2.forward(activation1.output)\n", + " \n", + " # Perform a forward pass through the activation/loss function\n", + " # takes the output of second dense layer here and returns loss\n", + " loss = loss_activation.forward(dense2.output, y)\n", + " \n", + " # Calculate accuracy from output of activation2 and targets\n", + " # calculate values along first axis\n", + " predictions = np.argmax(loss_activation.output, axis=1)\n", + " if len(y.shape) == 2:\n", + " y = np.argmax(y, axis=1)\n", + " accuracy = np.mean(predictions == y)\n", + " \n", + " if not epoch % 100:\n", + " print(f'epoch: {epoch}, ' +\n", + " f'acc: {accuracy:.3f}, ' +\n", + " f'loss: {loss:.3f}')\n", + " \n", + " # Backward pass\n", + " loss_activation.backward(loss_activation.output, y)\n", + " dense2.backward(loss_activation.dinputs)\n", + " activation1.backward(dense2.dinputs)\n", + " dense1.backward(activation1.dinputs)\n", + " \n", + " # Update weights and biases\n", + " optimizer.pre_update_params()\n", + " optimizer.update_params(dense1)\n", + " optimizer.update_params(dense2)\n", + " optimizer.post_update_params()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}