"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(X[:, 0], X[:, 1], c=y, cmap='brg')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "9xpEuZM9Fjme"
},
"source": [
"\n",
"\n",
"The neural network will not be aware of the color differences as the data have no class encodings
"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "m9vYNDymFjme"
},
"source": [
"\n",
"DENSE LAYER CLASS \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_bHtuDc1Fjme",
"outputId": "0e533278-4141-4e66-cb18-7e6c49a338dc"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0.0000000e+00 0.0000000e+00 0.0000000e+00]\n",
" [-1.0475188e-04 1.1395361e-04 -4.7983500e-05]\n",
" [-2.7414842e-04 3.1729150e-04 -8.6921798e-05]\n",
" [-4.2188365e-04 5.2666257e-04 -5.5912682e-05]\n",
" [-5.7707680e-04 7.1401405e-04 -8.9430439e-05]]\n"
]
}
],
"source": [
"import numpy as np\n",
"import nnfs\n",
"from nnfs.datasets import spiral_data\n",
"nnfs.init()\n",
"# Dense layer\n",
"class Layer_Dense:\n",
" # Layer initialization\n",
" def __init__(self, n_inputs, n_neurons):\n",
" # Initialize weights and biases\n",
" self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)\n",
" self.biases = np.zeros((1, n_neurons))\n",
"\n",
" # Forward pass\n",
" def forward(self, inputs):\n",
" # Calculate output values from inputs, weights and biases\n",
" self.output = np.dot(inputs, self.weights) + self.biases\n",
"\n",
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"# Create Dense layer with 2 input features and 3 output values\n",
"dense1 = Layer_Dense(2, 3)\n",
"# Perform a forward pass of our training data through this layer\n",
"dense1.forward(X)\n",
"\n",
"\n",
"# Let's see output of the first few samples:\n",
"print(dense1.output[:5])\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "gFxBpF1ZFjme"
},
"source": [
"\n",
"ACTIVATION FUNCTION: RELU \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rGjvDHswFjme",
"outputId": "13209c8a-ec91-456b-b459-d02c32fce102"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0. 2. 0. 3.3 0. 1.1 2.2 0. ]\n"
]
}
],
"source": [
"import numpy as np\n",
"inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]\n",
"output = np.maximum(0, inputs)\n",
"print(output)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "niT8Hh_5Fjme"
},
"outputs": [],
"source": [
"# ReLU activation\n",
"class Activation_ReLU:\n",
" # Forward pass\n",
" def forward(self, inputs):\n",
" # Calculate output values from input\n",
" self.output = np.maximum(0, inputs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZzQlDPPfFjme",
"outputId": "023f5b81-cba9-41f0-e1cc-fdc486d63b99"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0. 0. 0.]\n",
" [0. 0. 0.]\n",
" [0. 0. 0.]\n",
" [0. 0. 0.]\n",
" [0. 0. 0.]]\n"
]
}
],
"source": [
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"# Create Dense layer with 2 input features and 3 output values\n",
"dense1 = Layer_Dense(2, 3)\n",
"# Create ReLU activation (to be used with Dense layer):\n",
"activation1 = Activation_ReLU()\n",
"# Make a forward pass of our training data through this layer\n",
"dense1.forward(X)\n",
"# Forward pass through activation func.\n",
"# Takes in output from previous layer\n",
"activation1.forward(dense1.output)\n",
"# Let's see output of the first few samples:\n",
"print(activation1.output[:5])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vT3iHO6rFjme"
},
"source": [
"\n",
"ACTIVATION FUNCTION: SOFTMAX \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fvhMk3zGFjme",
"outputId": "868e3bd4-6153-445c-ac23-1ded36133c00"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"45\n",
"[12 15 18]\n",
"(3,)\n",
"[ 6 15 24]\n",
"(3,)\n",
"[[12 15 18]]\n",
"(1, 3)\n",
"[[ 6]\n",
" [15]\n",
" [24]]\n",
"(3, 1)\n",
"[7 8 9]\n",
"[3 6 9]\n"
]
}
],
"source": [
"### TRY THESE EXERCISES FOR YOURSELF!\n",
"\n",
"A = [[1, 2, 3], [4, 5, 6], [7, 8,9]]\n",
"print(np.sum(A))\n",
"\n",
"print(np.sum(A, axis = 0))\n",
"print(np.sum(A, axis = 0).shape)\n",
"\n",
"print(np.sum(A, axis = 1))\n",
"print(np.sum(A, axis = 1).shape)\n",
"\n",
"print(np.sum(A, axis = 0,keepdims = True))\n",
"print(np.sum(A, axis = 0,keepdims = True).shape)\n",
"\n",
"print(np.sum(A, axis = 1,keepdims = True))\n",
"print(np.sum(A, axis = 1,keepdims = True).shape)\n",
"\n",
"print(np.max(A, axis = 0))\n",
"print(np.max(A, axis = 1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4Ux1-5ZUFjmi",
"outputId": "7de739d1-deca-48ed-fe79-3fe5f8e32565"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.06414769 0.17437149 0.47399085 0.28748998]\n",
" [0.04517666 0.90739747 0.00224921 0.04517666]\n",
" [0.00522984 0.34875873 0.63547983 0.0105316 ]]\n"
]
},
{
"data": {
"text/plain": [
"array([1., 1., 1.])"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"inputs = [[1, 2, 3, 2.5],\n",
" [2., 5., -1., 2],\n",
" [-1.5, 2.7, 3.3, -0.8]]\n",
"\n",
"# Get unnormalized probabilities\n",
"exp_values = np.exp(inputs - np.max(inputs, axis=1,keepdims=True))\n",
" # Normalize them for each sample\n",
"probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)\n",
"print(probabilities)\n",
"np.sum(probabilities, axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "kRdUvsWAFjml"
},
"outputs": [],
"source": [
"# Softmax activation\n",
"class Activation_Softmax:\n",
" # Forward pass\n",
" def forward(self, inputs):\n",
" # Get unnormalized probabilities\n",
" exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))\n",
" # Normalize them for each sample\n",
" probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)\n",
" self.output = probabilities"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "RzQG-tdnFjml"
},
"source": [
"\n",
"ONE FORWARD PASS (WITHOUT LOSS) \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Sha3z7fPFjml",
"outputId": "a38ee32e-abad-4752-8ed7-a6614931aca7"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.33333334 0.33333334 0.33333334]\n",
" [0.33333364 0.3333334 0.3333329 ]\n",
" [0.33333385 0.3333335 0.33333266]\n",
" [0.33333433 0.3333336 0.33333206]\n",
" [0.33333462 0.33333373 0.33333164]]\n"
]
}
],
"source": [
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"# Create Dense layer with 2 input features and 3 output values\n",
"dense1 = Layer_Dense(2, 3)\n",
"# Create ReLU activation (to be used with Dense layer):\n",
"activation1 = Activation_ReLU()\n",
"# Create second Dense layer with 3 input features (as we take output\n",
"# of previous layer here) and 3 output values\n",
"dense2 = Layer_Dense(3, 3)\n",
"# Create Softmax activation (to be used with Dense layer):\n",
"activation2 = Activation_Softmax()\n",
"\n",
"# Make a forward pass of our training data through this layer\n",
"dense1.forward(X)\n",
"\n",
"# Make a forward pass through activation function\n",
"# it takes the output of first dense layer here\n",
"activation1.forward(dense1.output)\n",
"# Make a forward pass through second Dense layer\n",
"# it takes outputs of activation function of first layer as inputs\n",
"dense2.forward(activation1.output)\n",
"# Make a forward pass through activation function\n",
"# it takes the output of second dense layer here\n",
"activation2.forward(dense2.output)\n",
"# Let's see output of the first few samples:\n",
"print(activation2.output[:5])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "In5zGYb4Fjml"
},
"source": [
"\n",
"CALCULATING NETWORK ERROR WITH LOSS \n",
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "aQnZKLOOFjml"
},
"source": [
"\n",
"CROSS ENTROPY LOSS BUILDING BLOCKS IN PYTHON\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "D8AgvY5hFjml",
"outputId": "2e395228-3098-4f22-d753-2b8b176a095b"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.7 0.5 0.9]\n"
]
}
],
"source": [
"softmax_outputs = np.array([[0.7, 0.1, 0.2],\n",
" [0.1, 0.5, 0.4],\n",
" [0.02, 0.9, 0.08]])\n",
"class_targets = [0, 1, 1]\n",
"print(softmax_outputs[range(len(softmax_outputs)), class_targets])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Blvy02fjFjmm",
"outputId": "4d35fb6e-eb49-43ef-af0f-c1bc4faf2216"
},
"outputs": [
{
"data": {
"text/plain": [
"range(0, 3)"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"range(len(softmax_outputs))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "nyIehvEcFjmm",
"outputId": "d0b7eec5-55ea-4409-b867-9d8ae4f64065"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.35667494 0.69314718 0.10536052]\n",
"0.38506088005216804\n"
]
}
],
"source": [
"print(-np.log(softmax_outputs[\n",
" range(len(softmax_outputs)), class_targets\n",
"]))\n",
"neg_log = -np.log(softmax_outputs[\n",
" range(len(softmax_outputs)), class_targets\n",
" ])\n",
"average_loss = np.mean(neg_log)\n",
"print(average_loss)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NxZXtcNdFjmm"
},
"source": [
"\n",
"IF DATA IS ONE HOT ENCODED, HOW TO EXTRACT THE RELEVANT PREDICTIONS\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oIHjRQnpFjmm",
"outputId": "bac7a3a7-8796-47cc-f75e-c4a46f88ef8e"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.35667494 0.69314718 0.10536052]\n",
"0.38506088005216804\n"
]
}
],
"source": [
"y_true_check = np.array([\n",
" [1, 0, 0],\n",
" [0, 1, 0],\n",
" [0, 1, 0]\n",
"])\n",
"\n",
"y_pred_clipped_check = np.array([\n",
" [0.7, 0.2, 0.1],\n",
" [0.1, 0.5, 0.4],\n",
" [0.02, 0.9, 0.08]\n",
"])\n",
"\n",
"A = y_true_check*y_pred_clipped_check\n",
"B = np.sum(A, axis = 1)\n",
"C = - np.log(B)\n",
"\n",
"print(C)\n",
"print(np.mean(C))\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nCWwEIPZFjmm"
},
"source": [
"\n",
"IMPLEMENTING THE LOSS CLASS\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Nw_ZQP13Fjmm"
},
"outputs": [],
"source": [
"# Common loss class\n",
"class Loss:\n",
" # Calculates the data and regularization losses\n",
" # given model output and ground truth values\n",
" def calculate(self, output, y):\n",
" # Calculate sample losses\n",
" sample_losses = self.forward(output, y)\n",
" # Calculate mean loss\n",
" data_loss = np.mean(sample_losses)\n",
" # Return loss\n",
" return data_loss"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vJ1fW8VLFjmm"
},
"source": [
"\n",
"IMPLEMENTING THE CATEGORICAL CROSS ENTROPY CLASS\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "bkfGuRE0Fjmm"
},
"outputs": [],
"source": [
"# Cross-entropy loss\n",
"class Loss_CategoricalCrossentropy(Loss):\n",
" # Forward pass\n",
" def forward(self, y_pred, y_true):\n",
" # Number of samples in a batch\n",
" samples = len(y_pred)\n",
" # Clip data to prevent division by 0\n",
" # Clip both sides to not drag mean towards any value\n",
" y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)\n",
" # Probabilities for target values -\n",
" # only if categorical labels\n",
" if len(y_true.shape) == 1:\n",
" correct_confidences = y_pred_clipped[\n",
" range(samples),\n",
" y_true\n",
" ]\n",
" # Mask values - only for one-hot encoded labels\n",
" elif len(y_true.shape) == 2:\n",
" correct_confidences = np.sum(\n",
" y_pred_clipped*y_true,\n",
" axis=1\n",
" )\n",
" # Losses\n",
" negative_log_likelihoods = -np.log(correct_confidences)\n",
" return negative_log_likelihoods"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Q9YEVg4dFjmm",
"outputId": "3fa4ae1f-ffbf-480e-a1f0-61bd4482febb"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.38506088005216804\n"
]
}
],
"source": [
"softmax_outputs = np.array([[0.7, 0.1, 0.2],\n",
" [0.1, 0.5, 0.4],\n",
" [0.02, 0.9, 0.08]])\n",
"class_targets = np.array([[1, 0, 0],\n",
" [0, 1, 0],\n",
" [0, 1, 0]])\n",
"loss_function = Loss_CategoricalCrossentropy()\n",
"loss = loss_function.calculate(softmax_outputs, class_targets)\n",
"print(loss)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "llVv_pJDFjmm"
},
"source": [
"\n",
"FULL CODE UPTO THIS POINT\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ocqJJcMsFjmm",
"outputId": "9f6418fe-8a42-4b1f-d5d7-efdd017815da"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.33333334 0.33333334 0.33333334]\n",
" [0.3333341 0.33333302 0.3333329 ]\n",
" [0.3333341 0.33333302 0.33333296]\n",
" [0.3333341 0.333333 0.33333293]\n",
" [0.3333364 0.33333203 0.33333158]]\n",
"loss: 1.0986193\n",
"acc: 0.28\n"
]
}
],
"source": [
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"# Create Dense layer with 2 input features and 3 output values\n",
"dense1 = Layer_Dense(2, 3)\n",
"# Create ReLU activation (to be used with Dense layer):\n",
"activation1 = Activation_ReLU()\n",
"# Create second Dense layer with 3 input features (as we take output\n",
"# of previous layer here) and 3 output values\n",
"dense2 = Layer_Dense(3, 3)\n",
"# Create Softmax activation (to be used with Dense layer):\n",
"activation2 = Activation_Softmax()\n",
"# Create loss function\n",
"loss_function = Loss_CategoricalCrossentropy()\n",
"\n",
"\n",
"# Perform a forward pass of our training data through this layer\n",
"dense1.forward(X)\n",
"# Perform a forward pass through activation function\n",
"# it takes the output of first dense layer here\n",
"activation1.forward(dense1.output)\n",
"\n",
"# Perform a forward pass through second Dense layer\n",
"# it takes outputs of activation function of first layer as inputs\n",
"dense2.forward(activation1.output)\n",
"# Perform a forward pass through activation function\n",
"# it takes the output of second dense layer here\n",
"activation2.forward(dense2.output)\n",
"# Let's see output of the first few samples:\n",
"print(activation2.output[:5])\n",
"# Perform a forward pass through activation function\n",
"# it takes the output of second dense layer here and returns loss\n",
"loss = loss_function.calculate(activation2.output, y)\n",
"# Print loss value\n",
"print('loss:', loss)\n",
"\n",
"# Calculate accuracy from output of activation2 and targets\n",
"# calculate values along first axis\n",
"predictions = np.argmax(activation2.output, axis=1)\n",
"if len(y.shape) == 2:\n",
" y = np.argmax(y, axis=1)\n",
"accuracy = np.mean(predictions == y)\n",
"# Print accuracy\n",
"print('acc:', accuracy)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NC3lHEW0Fjmn"
},
"source": [
"\n",
"INTRODUCING ACCURACY \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Hxxm7ePwFjmn",
"outputId": "32f1df40-341a-497d-fa9a-a4a4f283b516"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"acc: 1.0\n"
]
}
],
"source": [
"import numpy as np\n",
"# Probabilities of 3 samples\n",
"softmax_outputs = np.array([[0.7, 0.2, 0.1],\n",
" [0.1, 0.5, 0.4],\n",
" [0.02, 0.9, 0.08]])\n",
"# Target (ground-truth) labels for 3 samples\n",
"class_targets = np.array([0, 1, 1])\n",
"# Calculate values along second axis (axis of index 1)\n",
"predictions = np.argmax(softmax_outputs, axis=1)\n",
"# If targets are one-hot encoded - convert them\n",
"if len(class_targets.shape) == 2:\n",
" class_targets = np.argmax(class_targets, axis=1)\n",
"# True evaluates to 1; False to 0\n",
"accuracy = np.mean(predictions == class_targets)\n",
"print('acc:', accuracy)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "28USj38zFjmn"
},
"source": [
"\n",
"THE NEED FOR OPTIMIZATION \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YlIycjcjFjmn",
"outputId": "bf133cf2-dea4-49f5-cb61-3122dd15cf5b"
},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#SIMPLER DATASET\n",
"import matplotlib.pyplot as plt\n",
"import nnfs\n",
"from nnfs.datasets import vertical_data\n",
"nnfs.init()\n",
"X, y = vertical_data(samples=100, classes=3)\n",
"plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap='brg')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "dPzWOMqSFjmn"
},
"source": [
"\n",
"STRATEGY 1: RANDOMLY SELECT WEIGHTS AND BIASES - DOES NOT WORK!\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_klS09rqFjmn",
"outputId": "a012ca91-3393-4ab1-9443-3e81828ba998"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"New set of weights found, iteration: 0 loss: 1.1016203 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 1 loss: 1.1002508 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 2 loss: 1.0992025 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 3 loss: 1.0986239 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 10 loss: 1.0984299 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 22 loss: 1.0976521 acc: 0.36333333333333334\n",
"New set of weights found, iteration: 150 loss: 1.0974255 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 874 loss: 1.0972673 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 894 loss: 1.096895 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 1036 loss: 1.095428 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 88633 loss: 1.0952065 acc: 0.3333333333333333\n"
]
}
],
"source": [
"# Create dataset\n",
"X, y = vertical_data(samples=100, classes=3)\n",
"# Create model\n",
"dense1 = Layer_Dense(2, 3) # first dense layer, 2 inputs\n",
"activation1 = Activation_ReLU()\n",
"dense2 = Layer_Dense(3, 3) # second dense layer, 3 inputs, 3 outputs\n",
"activation2 = Activation_Softmax()\n",
"# Create loss function\n",
"loss_function = Loss_CategoricalCrossentropy()\n",
"\n",
"# Helper variables\n",
"lowest_loss = 9999999 # some initial value\n",
"best_dense1_weights = dense1.weights.copy()\n",
"best_dense1_biases = dense1.biases.copy()\n",
"best_dense2_weights = dense2.weights.copy()\n",
"best_dense2_biases = dense2.biases.copy()\n",
"\n",
"for iteration in range(100000):\n",
" # Generate a new set of weights for iteration\n",
" dense1.weights = 0.05 * np.random.randn(2, 3)\n",
" dense1.biases = 0.05 * np.random.randn(1, 3)\n",
" dense2.weights = 0.05 * np.random.randn(3, 3)\n",
" dense2.biases = 0.05 * np.random.randn(1, 3)\n",
" # Perform a forward pass of the training data through this layer\n",
" dense1.forward(X)\n",
" activation1.forward(dense1.output)\n",
" dense2.forward(activation1.output)\n",
" activation2.forward(dense2.output)\n",
" # Perform a forward pass through activation function\n",
" # it takes the output of second dense layer here and returns loss\n",
" loss = loss_function.calculate(activation2.output, y)\n",
" # Calculate accuracy from output of activation2 and targets\n",
" # calculate values along first axis\n",
" predictions = np.argmax(activation2.output, axis=1)\n",
" accuracy = np.mean(predictions == y)\n",
" # If loss is smaller - print and save weights and biases aside\n",
" if loss < lowest_loss:\n",
" print('New set of weights found, iteration:', iteration,'loss:', loss, 'acc:', accuracy)\n",
" best_dense1_weights = dense1.weights.copy()\n",
" best_dense1_biases = dense1.biases.copy()\n",
" best_dense2_weights = dense2.weights.copy()\n",
" best_dense2_biases = dense2.biases.copy()\n",
" lowest_loss = loss"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "pSEJ6av5Fjmn"
},
"source": [
"\n",
"STRATEGY 2: RANDOMLY ADJUST WEIGHTS AND BIASES - WORKS!\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "3_gxk2XdFjmn",
"outputId": "e2a285c4-d44d-4c3a-c61b-a7e7d533f85b"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"New set of weights found, iteration: 0 loss: 1.1008747 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 3 loss: 1.1005714 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 4 loss: 1.099462 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 9 loss: 1.0994359 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 10 loss: 1.09855 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 13 loss: 1.098517 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 14 loss: 1.0938607 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 15 loss: 1.0920315 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 17 loss: 1.091391 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 19 loss: 1.0910357 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 20 loss: 1.0898421 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 21 loss: 1.0843327 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 26 loss: 1.0835577 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 27 loss: 1.0823517 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 28 loss: 1.0778279 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 31 loss: 1.076321 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 35 loss: 1.0729524 acc: 0.3\n",
"New set of weights found, iteration: 36 loss: 1.0699975 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 47 loss: 1.0631136 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 52 loss: 1.062574 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 53 loss: 1.0624933 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 54 loss: 1.0592291 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 57 loss: 1.0574998 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 58 loss: 1.0493913 acc: 0.38\n",
"New set of weights found, iteration: 64 loss: 1.0450443 acc: 0.65\n",
"New set of weights found, iteration: 68 loss: 1.0377563 acc: 0.65\n",
"New set of weights found, iteration: 70 loss: 1.0333902 acc: 0.66\n",
"New set of weights found, iteration: 71 loss: 1.0315365 acc: 0.63\n",
"New set of weights found, iteration: 74 loss: 1.0283976 acc: 0.6033333333333334\n",
"New set of weights found, iteration: 75 loss: 1.0214951 acc: 0.6133333333333333\n",
"New set of weights found, iteration: 82 loss: 1.0101981 acc: 0.6466666666666666\n",
"New set of weights found, iteration: 84 loss: 1.0100108 acc: 0.5733333333333334\n",
"New set of weights found, iteration: 90 loss: 1.0035288 acc: 0.7133333333333334\n",
"New set of weights found, iteration: 92 loss: 1.0008268 acc: 0.65\n",
"New set of weights found, iteration: 95 loss: 0.99175525 acc: 0.6\n",
"New set of weights found, iteration: 98 loss: 0.9812336 acc: 0.66\n",
"New set of weights found, iteration: 101 loss: 0.97777444 acc: 0.58\n",
"New set of weights found, iteration: 106 loss: 0.977026 acc: 0.67\n",
"New set of weights found, iteration: 110 loss: 0.97546613 acc: 0.6766666666666666\n",
"New set of weights found, iteration: 111 loss: 0.9656759 acc: 0.9066666666666666\n",
"New set of weights found, iteration: 115 loss: 0.96165353 acc: 0.8733333333333333\n",
"New set of weights found, iteration: 116 loss: 0.94877195 acc: 0.9\n",
"New set of weights found, iteration: 122 loss: 0.93169373 acc: 0.8766666666666667\n",
"New set of weights found, iteration: 127 loss: 0.9187146 acc: 0.81\n",
"New set of weights found, iteration: 131 loss: 0.91708404 acc: 0.6666666666666666\n",
"New set of weights found, iteration: 136 loss: 0.9127953 acc: 0.6933333333333334\n",
"New set of weights found, iteration: 139 loss: 0.90787965 acc: 0.6666666666666666\n",
"New set of weights found, iteration: 141 loss: 0.9043988 acc: 0.66\n",
"New set of weights found, iteration: 146 loss: 0.90065634 acc: 0.6666666666666666\n",
"New set of weights found, iteration: 147 loss: 0.89855623 acc: 0.6533333333333333\n",
"New set of weights found, iteration: 149 loss: 0.8932796 acc: 0.69\n",
"New set of weights found, iteration: 152 loss: 0.87403554 acc: 0.84\n",
"New set of weights found, iteration: 153 loss: 0.8707889 acc: 0.7666666666666667\n",
"New set of weights found, iteration: 156 loss: 0.8667261 acc: 0.88\n",
"New set of weights found, iteration: 158 loss: 0.8490861 acc: 0.87\n",
"New set of weights found, iteration: 162 loss: 0.84764665 acc: 0.8333333333333334\n",
"New set of weights found, iteration: 165 loss: 0.8428589 acc: 0.8033333333333333\n",
"New set of weights found, iteration: 166 loss: 0.8239612 acc: 0.66\n",
"New set of weights found, iteration: 167 loss: 0.7897708 acc: 0.7\n",
"New set of weights found, iteration: 171 loss: 0.7849403 acc: 0.6733333333333333\n",
"New set of weights found, iteration: 172 loss: 0.782472 acc: 0.6633333333333333\n",
"New set of weights found, iteration: 173 loss: 0.7803537 acc: 0.6633333333333333\n",
"New set of weights found, iteration: 175 loss: 0.7747049 acc: 0.67\n",
"New set of weights found, iteration: 178 loss: 0.77419984 acc: 0.6666666666666666\n",
"New set of weights found, iteration: 179 loss: 0.75924987 acc: 0.7233333333333334\n",
"New set of weights found, iteration: 183 loss: 0.7520049 acc: 0.67\n",
"New set of weights found, iteration: 187 loss: 0.7399888 acc: 0.7366666666666667\n",
"New set of weights found, iteration: 188 loss: 0.73904145 acc: 0.8333333333333334\n",
"New set of weights found, iteration: 190 loss: 0.7320137 acc: 0.8266666666666667\n",
"New set of weights found, iteration: 195 loss: 0.712903 acc: 0.69\n",
"New set of weights found, iteration: 198 loss: 0.70528257 acc: 0.7266666666666667\n",
"New set of weights found, iteration: 199 loss: 0.70251924 acc: 0.6966666666666667\n",
"New set of weights found, iteration: 200 loss: 0.69334394 acc: 0.72\n",
"New set of weights found, iteration: 201 loss: 0.6803275 acc: 0.79\n",
"New set of weights found, iteration: 205 loss: 0.6770639 acc: 0.7666666666666667\n",
"New set of weights found, iteration: 206 loss: 0.66997415 acc: 0.8733333333333333\n",
"New set of weights found, iteration: 207 loss: 0.6599941 acc: 0.8066666666666666\n",
"New set of weights found, iteration: 208 loss: 0.65543926 acc: 0.7766666666666666\n",
"New set of weights found, iteration: 209 loss: 0.62252766 acc: 0.8033333333333333\n",
"New set of weights found, iteration: 210 loss: 0.61185175 acc: 0.7766666666666666\n",
"New set of weights found, iteration: 211 loss: 0.599142 acc: 0.8766666666666667\n",
"New set of weights found, iteration: 213 loss: 0.5920534 acc: 0.8766666666666667\n",
"New set of weights found, iteration: 222 loss: 0.5918576 acc: 0.8533333333333334\n",
"New set of weights found, iteration: 229 loss: 0.57633215 acc: 0.8533333333333334\n",
"New set of weights found, iteration: 234 loss: 0.55926883 acc: 0.8933333333333333\n",
"New set of weights found, iteration: 239 loss: 0.5579059 acc: 0.83\n",
"New set of weights found, iteration: 243 loss: 0.55486155 acc: 0.8466666666666667\n",
"New set of weights found, iteration: 246 loss: 0.5507333 acc: 0.8866666666666667\n",
"New set of weights found, iteration: 247 loss: 0.545519 acc: 0.8833333333333333\n",
"New set of weights found, iteration: 248 loss: 0.5346363 acc: 0.86\n",
"New set of weights found, iteration: 264 loss: 0.5342746 acc: 0.83\n",
"New set of weights found, iteration: 265 loss: 0.5253426 acc: 0.8866666666666667\n",
"New set of weights found, iteration: 267 loss: 0.51280546 acc: 0.9066666666666666\n",
"New set of weights found, iteration: 269 loss: 0.50337905 acc: 0.9233333333333333\n",
"New set of weights found, iteration: 276 loss: 0.5024293 acc: 0.89\n",
"New set of weights found, iteration: 278 loss: 0.491296 acc: 0.9133333333333333\n",
"New set of weights found, iteration: 283 loss: 0.48956776 acc: 0.9033333333333333\n",
"New set of weights found, iteration: 284 loss: 0.48599878 acc: 0.93\n",
"New set of weights found, iteration: 289 loss: 0.48009065 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 290 loss: 0.47792414 acc: 0.9233333333333333\n",
"New set of weights found, iteration: 291 loss: 0.46914592 acc: 0.9133333333333333\n",
"New set of weights found, iteration: 298 loss: 0.46764258 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 300 loss: 0.45708776 acc: 0.8933333333333333\n",
"New set of weights found, iteration: 301 loss: 0.45304757 acc: 0.8966666666666666\n",
"New set of weights found, iteration: 304 loss: 0.45023417 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 308 loss: 0.44732147 acc: 0.8833333333333333\n",
"New set of weights found, iteration: 309 loss: 0.44072458 acc: 0.88\n",
"New set of weights found, iteration: 313 loss: 0.4319237 acc: 0.8933333333333333\n",
"New set of weights found, iteration: 318 loss: 0.42440805 acc: 0.92\n",
"New set of weights found, iteration: 320 loss: 0.41862 acc: 0.92\n",
"New set of weights found, iteration: 324 loss: 0.41297048 acc: 0.9166666666666666\n",
"New set of weights found, iteration: 326 loss: 0.41245985 acc: 0.9\n",
"New set of weights found, iteration: 328 loss: 0.40676734 acc: 0.9233333333333333\n",
"New set of weights found, iteration: 329 loss: 0.40640786 acc: 0.93\n",
"New set of weights found, iteration: 335 loss: 0.39842996 acc: 0.93\n",
"New set of weights found, iteration: 338 loss: 0.39804557 acc: 0.94\n",
"New set of weights found, iteration: 344 loss: 0.39646825 acc: 0.9233333333333333\n",
"New set of weights found, iteration: 347 loss: 0.38945505 acc: 0.93\n",
"New set of weights found, iteration: 348 loss: 0.3871968 acc: 0.9166666666666666\n",
"New set of weights found, iteration: 351 loss: 0.3779128 acc: 0.9133333333333333\n",
"New set of weights found, iteration: 354 loss: 0.37311223 acc: 0.9233333333333333\n",
"New set of weights found, iteration: 355 loss: 0.37276617 acc: 0.9166666666666666\n",
"New set of weights found, iteration: 356 loss: 0.36852098 acc: 0.9166666666666666\n",
"New set of weights found, iteration: 360 loss: 0.35868368 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 362 loss: 0.35691482 acc: 0.93\n",
"New set of weights found, iteration: 367 loss: 0.3429358 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 368 loss: 0.33983532 acc: 0.93\n",
"New set of weights found, iteration: 375 loss: 0.33676398 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 378 loss: 0.33293056 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 379 loss: 0.32861033 acc: 0.93\n",
"New set of weights found, iteration: 380 loss: 0.32313567 acc: 0.93\n",
"New set of weights found, iteration: 383 loss: 0.32154855 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 386 loss: 0.32056552 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 392 loss: 0.32019016 acc: 0.93\n",
"New set of weights found, iteration: 394 loss: 0.31832498 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 400 loss: 0.315471 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 401 loss: 0.3055538 acc: 0.93\n",
"New set of weights found, iteration: 407 loss: 0.3045038 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 410 loss: 0.30153093 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 417 loss: 0.29838136 acc: 0.92\n",
"New set of weights found, iteration: 420 loss: 0.29762506 acc: 0.93\n",
"New set of weights found, iteration: 428 loss: 0.29648978 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 434 loss: 0.29306 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 435 loss: 0.29192674 acc: 0.93\n",
"New set of weights found, iteration: 440 loss: 0.28488693 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 444 loss: 0.28334972 acc: 0.93\n",
"New set of weights found, iteration: 448 loss: 0.28309777 acc: 0.93\n",
"New set of weights found, iteration: 449 loss: 0.28108674 acc: 0.93\n",
"New set of weights found, iteration: 453 loss: 0.27773702 acc: 0.94\n",
"New set of weights found, iteration: 461 loss: 0.27312914 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 467 loss: 0.2729932 acc: 0.93\n",
"New set of weights found, iteration: 472 loss: 0.27115387 acc: 0.93\n",
"New set of weights found, iteration: 473 loss: 0.26995963 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 475 loss: 0.26944205 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 477 loss: 0.268688 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 479 loss: 0.26242334 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 493 loss: 0.26207444 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 495 loss: 0.25908068 acc: 0.93\n",
"New set of weights found, iteration: 496 loss: 0.2590734 acc: 0.93\n",
"New set of weights found, iteration: 497 loss: 0.2582146 acc: 0.94\n",
"New set of weights found, iteration: 503 loss: 0.25407296 acc: 0.93\n",
"New set of weights found, iteration: 505 loss: 0.25202662 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 506 loss: 0.24973544 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 512 loss: 0.24969201 acc: 0.93\n",
"New set of weights found, iteration: 517 loss: 0.24860601 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 519 loss: 0.24686712 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 529 loss: 0.24602742 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 535 loss: 0.2445193 acc: 0.93\n",
"New set of weights found, iteration: 536 loss: 0.24081425 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 538 loss: 0.24043368 acc: 0.93\n",
"New set of weights found, iteration: 546 loss: 0.23582341 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 550 loss: 0.23467208 acc: 0.93\n",
"New set of weights found, iteration: 552 loss: 0.23393926 acc: 0.93\n",
"New set of weights found, iteration: 557 loss: 0.22943695 acc: 0.93\n",
"New set of weights found, iteration: 564 loss: 0.22614151 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 570 loss: 0.22351934 acc: 0.93\n",
"New set of weights found, iteration: 582 loss: 0.2227324 acc: 0.93\n",
"New set of weights found, iteration: 585 loss: 0.21830775 acc: 0.93\n",
"New set of weights found, iteration: 597 loss: 0.2167703 acc: 0.93\n",
"New set of weights found, iteration: 610 loss: 0.21555844 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 625 loss: 0.21515213 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 629 loss: 0.21122937 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 631 loss: 0.21094893 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 637 loss: 0.21082413 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 659 loss: 0.21066985 acc: 0.93\n",
"New set of weights found, iteration: 661 loss: 0.20929192 acc: 0.93\n",
"New set of weights found, iteration: 684 loss: 0.20894809 acc: 0.93\n",
"New set of weights found, iteration: 686 loss: 0.20748574 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 689 loss: 0.205267 acc: 0.93\n",
"New set of weights found, iteration: 708 loss: 0.20465927 acc: 0.93\n",
"New set of weights found, iteration: 712 loss: 0.20393105 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 729 loss: 0.20358147 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 733 loss: 0.20317748 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 739 loss: 0.20279907 acc: 0.94\n",
"New set of weights found, iteration: 740 loss: 0.20236613 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 746 loss: 0.2021528 acc: 0.93\n",
"New set of weights found, iteration: 748 loss: 0.20100321 acc: 0.93\n",
"New set of weights found, iteration: 754 loss: 0.1993275 acc: 0.93\n",
"New set of weights found, iteration: 757 loss: 0.19792211 acc: 0.93\n",
"New set of weights found, iteration: 773 loss: 0.19783711 acc: 0.93\n",
"New set of weights found, iteration: 775 loss: 0.19740187 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 776 loss: 0.19721994 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 777 loss: 0.1961473 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 779 loss: 0.19598241 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 784 loss: 0.19526182 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 785 loss: 0.19456321 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 791 loss: 0.1943641 acc: 0.93\n",
"New set of weights found, iteration: 799 loss: 0.19334234 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 806 loss: 0.19311096 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 809 loss: 0.19277118 acc: 0.93\n",
"New set of weights found, iteration: 811 loss: 0.19264112 acc: 0.93\n",
"New set of weights found, iteration: 815 loss: 0.190967 acc: 0.93\n",
"New set of weights found, iteration: 890 loss: 0.18890005 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 892 loss: 0.18846218 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 893 loss: 0.18713883 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 903 loss: 0.1869782 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 909 loss: 0.18646298 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 917 loss: 0.18598829 acc: 0.93\n",
"New set of weights found, iteration: 927 loss: 0.18594755 acc: 0.93\n",
"New set of weights found, iteration: 932 loss: 0.18573302 acc: 0.93\n",
"New set of weights found, iteration: 942 loss: 0.18538505 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 946 loss: 0.18424016 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 959 loss: 0.1835666 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 969 loss: 0.1830834 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 974 loss: 0.18286093 acc: 0.93\n",
"New set of weights found, iteration: 983 loss: 0.1820501 acc: 0.93\n",
"New set of weights found, iteration: 985 loss: 0.18183175 acc: 0.93\n",
"New set of weights found, iteration: 993 loss: 0.18173474 acc: 0.93\n",
"New set of weights found, iteration: 995 loss: 0.18133913 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1001 loss: 0.18080577 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1006 loss: 0.1802216 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1025 loss: 0.17995203 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1042 loss: 0.17973644 acc: 0.93\n",
"New set of weights found, iteration: 1061 loss: 0.17944387 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1082 loss: 0.17943431 acc: 0.93\n",
"New set of weights found, iteration: 1083 loss: 0.17871827 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1093 loss: 0.17833588 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1123 loss: 0.1782659 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1144 loss: 0.17753273 acc: 0.93\n",
"New set of weights found, iteration: 1154 loss: 0.17727007 acc: 0.93\n",
"New set of weights found, iteration: 1165 loss: 0.17724434 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1166 loss: 0.17719936 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1171 loss: 0.17672187 acc: 0.9366666666666666\n",
"New set of weights found, iteration: 1176 loss: 0.17671774 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1212 loss: 0.17666677 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1223 loss: 0.17643958 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1275 loss: 0.1762153 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1303 loss: 0.17542142 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1378 loss: 0.17535225 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1382 loss: 0.17529377 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1404 loss: 0.17457567 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1457 loss: 0.17447841 acc: 0.9333333333333333\n",
"New set of weights found, iteration: 1461 loss: 0.17424846 acc: 0.93\n",
"New set of weights found, iteration: 1477 loss: 0.1737377 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1548 loss: 0.17335981 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1580 loss: 0.1733058 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1587 loss: 0.17321306 acc: 0.93\n",
"New set of weights found, iteration: 1596 loss: 0.17308939 acc: 0.93\n",
"New set of weights found, iteration: 1614 loss: 0.17301595 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1644 loss: 0.17284796 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1647 loss: 0.17258313 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1662 loss: 0.1723816 acc: 0.93\n",
"New set of weights found, iteration: 1706 loss: 0.17223743 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 1719 loss: 0.1719875 acc: 0.93\n",
"New set of weights found, iteration: 1978 loss: 0.17198084 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 2001 loss: 0.17195481 acc: 0.93\n",
"New set of weights found, iteration: 2026 loss: 0.1717552 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 2031 loss: 0.17165588 acc: 0.93\n",
"New set of weights found, iteration: 2066 loss: 0.17161626 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 2074 loss: 0.17140518 acc: 0.93\n",
"New set of weights found, iteration: 2441 loss: 0.17138883 acc: 0.93\n",
"New set of weights found, iteration: 2636 loss: 0.17134853 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 2718 loss: 0.17130204 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 2775 loss: 0.17122428 acc: 0.93\n",
"New set of weights found, iteration: 2829 loss: 0.1711681 acc: 0.93\n",
"New set of weights found, iteration: 2874 loss: 0.17114341 acc: 0.93\n",
"New set of weights found, iteration: 2978 loss: 0.17100853 acc: 0.93\n",
"New set of weights found, iteration: 3138 loss: 0.17093514 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 3293 loss: 0.17080545 acc: 0.9266666666666666\n",
"New set of weights found, iteration: 3486 loss: 0.1707664 acc: 0.93\n",
"New set of weights found, iteration: 3681 loss: 0.17070974 acc: 0.93\n",
"New set of weights found, iteration: 4038 loss: 0.17066137 acc: 0.93\n",
"New set of weights found, iteration: 4090 loss: 0.17065905 acc: 0.93\n",
"New set of weights found, iteration: 4116 loss: 0.17063397 acc: 0.93\n",
"New set of weights found, iteration: 4258 loss: 0.17062972 acc: 0.93\n",
"New set of weights found, iteration: 5960 loss: 0.17062148 acc: 0.93\n",
"New set of weights found, iteration: 6352 loss: 0.17058212 acc: 0.93\n",
"New set of weights found, iteration: 8192 loss: 0.17057395 acc: 0.93\n",
"New set of weights found, iteration: 8254 loss: 0.17056267 acc: 0.93\n",
"New set of weights found, iteration: 9463 loss: 0.17056097 acc: 0.93\n"
]
}
],
"source": [
"# Create dataset\n",
"X, y = vertical_data(samples=100, classes=3)\n",
"# Create model\n",
"dense1 = Layer_Dense(2, 3) # first dense layer, 2 inputs\n",
"activation1 = Activation_ReLU()\n",
"dense2 = Layer_Dense(3, 3) # second dense layer, 3 inputs, 3 outputs\n",
"activation2 = Activation_Softmax()\n",
"# Create loss function\n",
"loss_function = Loss_CategoricalCrossentropy()\n",
"# Helper variables\n",
"lowest_loss = 9999999 # some initial value\n",
"best_dense1_weights = dense1.weights.copy()\n",
"best_dense1_biases = dense1.biases.copy()\n",
"best_dense2_weights = dense2.weights.copy()\n",
"best_dense2_biases = dense2.biases.copy()\n",
"for iteration in range(10000):\n",
" # Update weights with some small random values\n",
" dense1.weights += 0.05 * np.random.randn(2, 3)\n",
" dense1.biases += 0.05 * np.random.randn(1, 3)\n",
" dense2.weights += 0.05 * np.random.randn(3, 3)\n",
" dense2.biases += 0.05 * np.random.randn(1, 3)\n",
" # Perform a forward pass of our training data through this layer\n",
" dense1.forward(X)\n",
" activation1.forward(dense1.output)\n",
" dense2.forward(activation1.output)\n",
" activation2.forward(dense2.output)\n",
" # Perform a forward pass through activation function\n",
" # it takes the output of second dense layer here and returns loss\n",
" loss = loss_function.calculate(activation2.output, y)\n",
" # Calculate accuracy from output of activation2 and targets\n",
" # calculate values along first axis\n",
" predictions = np.argmax(activation2.output, axis=1)\n",
" accuracy = np.mean(predictions == y)\n",
" # If loss is smaller - print and save weights and biases aside\n",
" if loss < lowest_loss:\n",
" print('New set of weights found, iteration:', iteration,'loss:', loss, 'acc:', accuracy)\n",
" best_dense1_weights = dense1.weights.copy()\n",
" best_dense1_biases = dense1.biases.copy()\n",
" best_dense2_weights = dense2.weights.copy()\n",
" best_dense2_biases = dense2.biases.copy()\n",
" lowest_loss = loss\n",
" # Revert weights and biases\n",
" else:\n",
" dense1.weights = best_dense1_weights.copy()\n",
" dense1.biases = best_dense1_biases.copy()\n",
" dense2.weights = best_dense2_weights.copy()\n",
" dense2.biases = best_dense2_biases.copy()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2hJxmRLrFjmo"
},
"source": [
"\n",
"STRATEGY 2: FOR SPIRAL DATASET - DOES NOT WORK!\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TtnQHR0PFjmo",
"outputId": "9fb9bb12-2c59-4f08-e419-b19fad1e595e"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"New set of weights found, iteration: 0 loss: 1.0991902 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 1 loss: 1.0988214 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 6 loss: 1.0982754 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 14 loss: 1.0978643 acc: 0.3333333333333333\n",
"New set of weights found, iteration: 23 loss: 1.0972557 acc: 0.35\n",
"New set of weights found, iteration: 37 loss: 1.0970367 acc: 0.35\n",
"New set of weights found, iteration: 40 loss: 1.0958394 acc: 0.37\n",
"New set of weights found, iteration: 41 loss: 1.0950066 acc: 0.4066666666666667\n",
"New set of weights found, iteration: 44 loss: 1.0948946 acc: 0.37333333333333335\n",
"New set of weights found, iteration: 53 loss: 1.0943341 acc: 0.3933333333333333\n",
"New set of weights found, iteration: 58 loss: 1.0942849 acc: 0.36666666666666664\n",
"New set of weights found, iteration: 59 loss: 1.0941792 acc: 0.35\n",
"New set of weights found, iteration: 60 loss: 1.0926698 acc: 0.38666666666666666\n",
"New set of weights found, iteration: 61 loss: 1.0913428 acc: 0.3933333333333333\n",
"New set of weights found, iteration: 63 loss: 1.0910325 acc: 0.4066666666666667\n",
"New set of weights found, iteration: 65 loss: 1.0902771 acc: 0.3933333333333333\n",
"New set of weights found, iteration: 75 loss: 1.0892566 acc: 0.39\n",
"New set of weights found, iteration: 88 loss: 1.0892477 acc: 0.39666666666666667\n",
"New set of weights found, iteration: 90 loss: 1.0862132 acc: 0.39\n",
"New set of weights found, iteration: 95 loss: 1.0858525 acc: 0.38333333333333336\n",
"New set of weights found, iteration: 99 loss: 1.0850619 acc: 0.44\n",
"New set of weights found, iteration: 103 loss: 1.0833515 acc: 0.4166666666666667\n",
"New set of weights found, iteration: 108 loss: 1.0818787 acc: 0.41333333333333333\n",
"New set of weights found, iteration: 111 loss: 1.0813941 acc: 0.39\n",
"New set of weights found, iteration: 113 loss: 1.0787892 acc: 0.38666666666666666\n",
"New set of weights found, iteration: 114 loss: 1.0777413 acc: 0.36333333333333334\n",
"New set of weights found, iteration: 116 loss: 1.0772293 acc: 0.38\n",
"New set of weights found, iteration: 121 loss: 1.0759072 acc: 0.3933333333333333\n",
"New set of weights found, iteration: 132 loss: 1.075869 acc: 0.43\n",
"New set of weights found, iteration: 134 loss: 1.075571 acc: 0.39666666666666667\n",
"New set of weights found, iteration: 143 loss: 1.0749155 acc: 0.4066666666666667\n",
"New set of weights found, iteration: 148 loss: 1.0747138 acc: 0.4066666666666667\n",
"New set of weights found, iteration: 156 loss: 1.0740647 acc: 0.42\n",
"New set of weights found, iteration: 183 loss: 1.0736305 acc: 0.42333333333333334\n",
"New set of weights found, iteration: 184 loss: 1.0735968 acc: 0.4266666666666667\n",
"New set of weights found, iteration: 196 loss: 1.0734725 acc: 0.4266666666666667\n",
"New set of weights found, iteration: 197 loss: 1.0726037 acc: 0.44\n",
"New set of weights found, iteration: 198 loss: 1.0724471 acc: 0.4533333333333333\n",
"New set of weights found, iteration: 213 loss: 1.0718489 acc: 0.38666666666666666\n",
"New set of weights found, iteration: 225 loss: 1.07182 acc: 0.4166666666666667\n",
"New set of weights found, iteration: 228 loss: 1.0711939 acc: 0.41\n",
"New set of weights found, iteration: 276 loss: 1.0709915 acc: 0.42\n",
"New set of weights found, iteration: 395 loss: 1.0707066 acc: 0.42333333333333334\n",
"New set of weights found, iteration: 471 loss: 1.070367 acc: 0.42\n",
"New set of weights found, iteration: 1317 loss: 1.0702449 acc: 0.43333333333333335\n",
"New set of weights found, iteration: 1857 loss: 1.0702258 acc: 0.43333333333333335\n",
"New set of weights found, iteration: 2198 loss: 1.070218 acc: 0.4266666666666667\n",
"New set of weights found, iteration: 2315 loss: 1.0701423 acc: 0.44333333333333336\n",
"New set of weights found, iteration: 5115 loss: 1.0700669 acc: 0.43333333333333335\n",
"New set of weights found, iteration: 5942 loss: 1.069947 acc: 0.44333333333333336\n"
]
}
],
"source": [
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)# Create model\n",
"dense1 = Layer_Dense(2, 3) # first dense layer, 2 inputs\n",
"activation1 = Activation_ReLU()\n",
"dense2 = Layer_Dense(3, 3) # second dense layer, 3 inputs, 3 outputs\n",
"activation2 = Activation_Softmax()\n",
"# Create loss function\n",
"loss_function = Loss_CategoricalCrossentropy()\n",
"# Helper variables\n",
"lowest_loss = 9999999 # some initial value\n",
"best_dense1_weights = dense1.weights.copy()\n",
"best_dense1_biases = dense1.biases.copy()\n",
"best_dense2_weights = dense2.weights.copy()\n",
"best_dense2_biases = dense2.biases.copy()\n",
"for iteration in range(10000):\n",
" # Update weights with some small random values\n",
" dense1.weights += 0.05 * np.random.randn(2, 3)\n",
" dense1.biases += 0.05 * np.random.randn(1, 3)\n",
" dense2.weights += 0.05 * np.random.randn(3, 3)\n",
" dense2.biases += 0.05 * np.random.randn(1, 3)\n",
" # Perform a forward pass of our training data through this layer\n",
" dense1.forward(X)\n",
" activation1.forward(dense1.output)\n",
" dense2.forward(activation1.output)\n",
" activation2.forward(dense2.output)\n",
" # Perform a forward pass through activation function\n",
" # it takes the output of second dense layer here and returns loss\n",
" loss = loss_function.calculate(activation2.output, y)\n",
" # Calculate accuracy from output of activation2 and targets\n",
" # calculate values along first axis\n",
" predictions = np.argmax(activation2.output, axis=1)\n",
" accuracy = np.mean(predictions == y)\n",
" # If loss is smaller - print and save weights and biases aside\n",
" if loss < lowest_loss:\n",
" print('New set of weights found, iteration:', iteration,'loss:', loss, 'acc:', accuracy)\n",
" best_dense1_weights = dense1.weights.copy()\n",
" best_dense1_biases = dense1.biases.copy()\n",
" best_dense2_weights = dense2.weights.copy()\n",
" best_dense2_biases = dense2.biases.copy()\n",
" lowest_loss = loss\n",
" # Revert weights and biases\n",
" else:\n",
" dense1.weights = best_dense1_weights.copy()\n",
" dense1.biases = best_dense1_biases.copy()\n",
" dense2.weights = best_dense2_weights.copy()\n",
" dense2.biases = best_dense2_biases.copy()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "shaawiNMFjmo"
},
"source": [
"\n",
"BACKPROPAGATION \n",
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QOQ4rGwpFjmo"
},
"source": [
"\n",
"GRADIENTS OF THE LOSS WITH RESPECT TO WEIGHTS\n",
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "OltKYL03Fjmo"
},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "PX3ZDMbJFjmo",
"outputId": "a9bbfedd-430e-4aa0-cf2e-a1871cfb8cc2"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0.5 0.5 0.5]\n",
" [20.1 20.1 20.1]\n",
" [10.9 10.9 10.9]\n",
" [ 4.1 4.1 4.1]]\n"
]
}
],
"source": [
"import numpy as np\n",
"# Passed-in gradient from the next layer\n",
"# for the purpose of this example we're going to use\n",
"# an array of an incremental gradient values\n",
"dvalues = np.array([[1., 1., 1.],\n",
" [2., 2., 2.],\n",
" [3., 3., 3.]])\n",
"# We have 3 sets of inputs - samples\n",
"inputs = np.array([[1, 2, 3, 2.5],\n",
" [2., 5., -1., 2],\n",
" [-1.5, 2.7, 3.3, -0.8]])\n",
"# sum weights of given input\n",
"# and multiply by the passed-in gradient for this neuron\n",
"dweights = np.dot(inputs.T, dvalues)\n",
"print(dweights)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "11Jzrm4-Fjmo"
},
"source": [
"\n",
"GRADIENTS OF THE LOSS WITH RESPECT TO BIASES\n",
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "OT8ehIYWFjmo"
},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "nhGHvU8HFjmo",
"outputId": "cdef2cae-dfe6-4470-c06e-39abb3442fe1"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[6. 6. 6.]]\n"
]
}
],
"source": [
"import numpy as np\n",
"# Passed-in gradient from the next layer\n",
"# for the purpose of this example we're going to use\n",
"# an array of an incremental gradient values\n",
"dvalues = np.array([[1., 1., 1.],\n",
" [2., 2., 2.],\n",
" [3., 3., 3.]])\n",
"# One bias for each neuron\n",
"# biases are the row vector with a shape (1, neurons)\n",
"biases = np.array([[2, 3, 0.5]])\n",
"# dbiases - sum values, do this over samples (first axis), keepdims\n",
"# since this by default will produce a plain list -\n",
"# we explained this in the chapter 4\n",
"dbiases = np.sum(dvalues, axis=0, keepdims=True)\n",
"print(dbiases)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1dBu2U50Fjmo"
},
"source": [
"\n",
"GRADIENTS OF THE LOSS WITH RESPECT TO INPUTS\n",
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "S6CoYaxNFjmr"
},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "K1hy-OyrFjms",
"outputId": "7427ef16-a3d0-4ea4-cb56-657c6f55c471"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0.44 -0.38 -0.07 1.37]\n",
" [ 0.88 -0.76 -0.14 2.74]\n",
" [ 1.32 -1.14 -0.21 4.11]]\n"
]
}
],
"source": [
"import numpy as np\n",
"# Passed-in gradient from the next layer\n",
"# for the purpose of this example we're going to use\n",
"# an array of an incremental gradient values\n",
"dvalues = np.array([[1., 1., 1.],\n",
" [2., 2., 2.],\n",
" [3., 3., 3.]])\n",
"# We have 3 sets of weights - one set for each neuron\n",
"# we have 4 inputs, thus 4 weights\n",
"# recall that we keep weights transposed\n",
"weights = np.array([[0.2, 0.8, -0.5, 1],\n",
" [0.5, -0.91, 0.26, -0.5],\n",
" [-0.26, -0.27, 0.17, 0.87]]).T\n",
"# sum weights of given input\n",
"# and multiply by the passed-in gradient for this neuron\n",
"dinputs = np.dot(dvalues, weights.T)\n",
"print(dinputs)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wd-g9Sg6Fjms"
},
"source": [
"\n",
"ADDING THE \"BACKWARD\" METHOD IN THE LAYER-DENSE CLASS\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "obOwiDVWFjms"
},
"outputs": [],
"source": [
"# class Layer_Dense:\n",
"# ...\n",
"# # Backward pass\n",
"# def backward(self, dvalues):\n",
"# # Gradients on parameters\n",
"# self.dweights = np.dot(self.inputs.T, dvalues)\n",
"# self.dbiases = np.sum(dvalues, axis=0, keepdims=True)\n",
"# # Gradient on values\n",
"# self.dinputs = np.dot(dvalues, self.weights.T)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UcDJcQHiFjms"
},
"source": [
"\n",
"ADDING THE \"BACKWARD\" METHOD IN THE RELU ACTIVATION CLASS\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "D6Th9Qw-Fjms"
},
"outputs": [],
"source": [
"# # ReLU activation\n",
"# class Activation_ReLU:\n",
"# # Forward pass\n",
"# def forward(self, inputs):\n",
"# # Remember input values\n",
"# self.inputs = inputs\n",
"# self.output = np.maximum(0, inputs)\n",
"# # Backward pass\n",
"# def backward(self, dvalues):\n",
"# # Since we need to modify the original variable,\n",
"# # let's make a copy of the values first\n",
"# self.dinputs = dvalues.copy()\n",
"# # Zero gradient where input values were negative\n",
"# self.dinputs[self.inputs <= 0] = 0"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VXsxCajzFjms"
},
"source": [
"\n",
"LOSS FUNCTION BACKPROPAGATION\n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "piEwnKVRFjms"
},
"outputs": [],
"source": [
"# Common loss class\n",
"class Loss:\n",
" # Calculates the data and regularization losses\n",
" # given model output and ground truth values\n",
" def calculate(self, output, y):\n",
" # Calculate sample losses\n",
" sample_losses = self.forward(output, y)\n",
" # Calculate mean loss\n",
" data_loss = np.mean(sample_losses)\n",
" # Return loss\n",
" return data_loss"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "DxVLILbuFjms"
},
"outputs": [],
"source": [
"class Loss_CategoricalCrossentropy(Loss):\n",
" # Forward pass\n",
" def forward(self, y_pred, y_true):\n",
" # Number of samples in a batch\n",
" samples = len(y_pred)\n",
"\n",
" # Clip data to prevent division by 0\n",
" # Clip both sides to not drag mean towards any value\n",
" y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)\n",
"\n",
" # Probabilities for target values -\n",
" # only if categorical labels\n",
" if len(y_true.shape) == 1:\n",
" correct_confidences = y_pred_clipped[\n",
" range(samples),\n",
" y_true\n",
" ]\n",
" # Mask values - only for one-hot encoded labels\n",
" elif len(y_true.shape) == 2:\n",
" correct_confidences = np.sum(\n",
" y_pred_clipped * y_true,\n",
" axis=1\n",
" )\n",
"\n",
" # Losses\n",
" negative_log_likelihoods = -np.log(correct_confidences)\n",
" return negative_log_likelihoods\n",
"\n",
" # Backward pass\n",
" def backward(self, dvalues, y_true):\n",
" # Number of samples\n",
" samples = len(dvalues)\n",
" # Number of labels in every sample\n",
" # We'll use the first sample to count them\n",
" labels = len(dvalues[0])\n",
"\n",
" # If labels are sparse, turn them into one-hot vector\n",
" if len(y_true.shape) == 1:\n",
" y_true = np.eye(labels)[y_true]\n",
"\n",
" # Calculate gradient\n",
" self.dinputs = -y_true / dvalues\n",
" # Normalize gradient\n",
" self.dinputs = self.dinputs / samples\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YygD9joxFjms"
},
"source": [
"\n",
"Softmax classifier - combined Softmax activation and cross-entropy loss for faster backward step\n",
"
\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2k1LfT_3Fjms"
},
"outputs": [],
"source": [
"# Softmax classifier - combined Softmax activation\n",
"# and cross-entropy loss for faster backward step\n",
"class Activation_Softmax_Loss_CategoricalCrossentropy:\n",
" # Creates activation and loss function objects\n",
" def __init__(self):\n",
" self.activation = Activation_Softmax()\n",
" self.loss = Loss_CategoricalCrossentropy()\n",
"\n",
" # Forward pass\n",
" def forward(self, inputs, y_true):\n",
" # Output layer's activation function\n",
" self.activation.forward(inputs)\n",
" # Set the output\n",
" self.output = self.activation.output\n",
" # Calculate and return loss value\n",
" return self.loss.calculate(self.output, y_true)\n",
"\n",
" # Backward pass\n",
" def backward(self, dvalues, y_true):\n",
" # Number of samples\n",
" samples = len(dvalues)\n",
" # If labels are one-hot encoded,\n",
" # turn them into discrete values\n",
" if len(y_true.shape) == 2:\n",
" y_true = np.argmax(y_true, axis=1)\n",
" # Copy so we can safely modify\n",
" self.dinputs = dvalues.copy()\n",
" # Calculate gradient\n",
" self.dinputs[range(samples), y_true] -= 1\n",
" # Normalize gradient\n",
" self.dinputs = self.dinputs / samples\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ftANOmqyFjms",
"outputId": "6f8bc7ac-3ee9-4e36-d3a7-4f5f2cc00bdd"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Gradients: combined loss and activation:\n",
"[[-0.1 0.03333333 0.06666667]\n",
" [ 0.03333333 -0.16666667 0.13333333]\n",
" [ 0.00666667 -0.03333333 0.02666667]]\n"
]
}
],
"source": [
"softmax_outputs = np.array([[0.7, 0.1, 0.2],\n",
" [0.1, 0.5, 0.4],\n",
" [0.02, 0.9, 0.08]])\n",
"class_targets = np.array([0, 1, 1])\n",
"softmax_loss = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
"softmax_loss.backward(softmax_outputs, class_targets)\n",
"dvalues1 = softmax_loss.dinputs\n",
"print('Gradients: combined loss and activation:')\n",
"print(dvalues1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6MZYl0XMFjmt"
},
"source": [
"\n",
"ALL CLASSES TOGETHER \n",
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hmRI3JiSFjmt"
},
"source": [
"\n",
"\n",
"CREATING LAYERS: FORWARD AND BACKWARD PASS
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4KkiOXfWFjmt"
},
"outputs": [],
"source": [
"# Dense layer\n",
"class Layer_Dense:\n",
" # Layer initialization\n",
" def __init__(self, n_inputs, n_neurons):\n",
" # Initialize weights and biases\n",
" self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)\n",
" self.biases = np.zeros((1, n_neurons))\n",
"\n",
" # Forward pass\n",
" def forward(self, inputs):\n",
" # Remember input values\n",
" self.inputs = inputs\n",
" # Calculate output values from input ones, weights and biases\n",
" self.output = np.dot(inputs, self.weights) + self.biases\n",
"\n",
" # Backward pass\n",
" def backward(self, dvalues):\n",
" # Gradients on parameters\n",
" self.dweights = np.dot(self.inputs.T, dvalues)\n",
" self.dbiases = np.sum(dvalues, axis=0, keepdims=True)\n",
" # Gradient on values\n",
" self.dinputs = np.dot(dvalues, self.weights.T)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "DB42nEQnFjmt"
},
"source": [
"\n",
"\n",
"RELU ACTIVATION: FORWARD AND BACKWARD PASS
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oJJrADlcFjmt"
},
"outputs": [],
"source": [
"# ReLU activation\n",
"class Activation_ReLU:\n",
" # Forward pass\n",
" def forward(self, inputs):\n",
" # Remember input values\n",
" self.inputs = inputs\n",
" # Calculate output values from inputs\n",
" self.output = np.maximum(0, inputs)\n",
"\n",
" # Backward pass\n",
" def backward(self, dvalues):\n",
" # Since we need to modify the original variable,\n",
" # let’s make a copy of values first\n",
" self.dinputs = dvalues.copy()\n",
" # Zero gradient where input values were negative\n",
" self.dinputs[self.inputs <= 0] = 0\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "82a7lCIQFjmt"
},
"source": [
"\n",
"\n",
"SOFTMAX ACTIVATION: FORWARD PASS
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4-kY3NeDFjmt"
},
"outputs": [],
"source": [
"# Softmax activation\n",
"class Activation_Softmax:\n",
" # Forward pass\n",
" def forward(self, inputs):\n",
" # Get unnormalized probabilities\n",
" exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))\n",
" # Normalize them for each sample\n",
" probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)\n",
" self.output = probabilities"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NROH2PwUFjmt"
},
"source": [
"\n",
"LOSS CLASS
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "3pxeeZjbFjmt"
},
"outputs": [],
"source": [
"# Common loss class\n",
"class Loss:\n",
" # Calculates the data and regularization losses\n",
" # given model output and ground truth values\n",
" def calculate(self, output, y):\n",
" # Calculate sample losses\n",
" sample_losses = self.forward(output, y)\n",
" # Calculate mean loss\n",
" data_loss = np.mean(sample_losses)\n",
" # Return loss\n",
" return data_loss"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "JUnaSdwBFjmt"
},
"source": [
"\n",
"\n",
"CATEGORICAL CROSS ENTROPY LOSS: FORWARD AND BACKWARD PASS
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "u8Q55WE_Fjmt"
},
"outputs": [],
"source": [
"class Loss_CategoricalCrossentropy(Loss):\n",
" # Forward pass\n",
" def forward(self, y_pred, y_true):\n",
" # Number of samples in a batch\n",
" samples = len(y_pred)\n",
"\n",
" # Clip data to prevent division by 0\n",
" # Clip both sides to not drag mean towards any value\n",
" y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)\n",
"\n",
" # Probabilities for target values -\n",
" # only if categorical labels\n",
" if len(y_true.shape) == 1:\n",
" correct_confidences = y_pred_clipped[\n",
" range(samples),\n",
" y_true\n",
" ]\n",
" # Mask values - only for one-hot encoded labels\n",
" elif len(y_true.shape) == 2:\n",
" correct_confidences = np.sum(\n",
" y_pred_clipped * y_true,\n",
" axis=1\n",
" )\n",
"\n",
" # Losses\n",
" negative_log_likelihoods = -np.log(correct_confidences)\n",
" return negative_log_likelihoods\n",
"\n",
" # Backward pass\n",
" def backward(self, dvalues, y_true):\n",
" # Number of samples\n",
" samples = len(dvalues)\n",
" # Number of labels in every sample\n",
" # We'll use the first sample to count them\n",
" labels = len(dvalues[0])\n",
"\n",
" # If labels are sparse, turn them into one-hot vector\n",
" if len(y_true.shape) == 1:\n",
" y_true = np.eye(labels)[y_true]\n",
"\n",
" # Calculate gradient\n",
" self.dinputs = -y_true / dvalues\n",
" # Normalize gradient\n",
" self.dinputs = self.dinputs / samples"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hbib1JllFjmt"
},
"source": [
"\n",
"\n",
"COMBINED SOFTMAX ACTIVATION AND CATEGORICAL CROSS ENTROPY FOR LAST LAYER: FORWARD AND BACKWARD PASS
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "wTdI_gu9Fjmt"
},
"outputs": [],
"source": [
"# Softmax classifier - combined Softmax activation\n",
"# and cross-entropy loss for faster backward step\n",
"class Activation_Softmax_Loss_CategoricalCrossentropy:\n",
" # Creates activation and loss function objects\n",
" def __init__(self):\n",
" self.activation = Activation_Softmax()\n",
" self.loss = Loss_CategoricalCrossentropy()\n",
"\n",
" # Forward pass\n",
" def forward(self, inputs, y_true):\n",
" # Output layer's activation function\n",
" self.activation.forward(inputs)\n",
" # Set the output\n",
" self.output = self.activation.output\n",
" # Calculate and return loss value\n",
" return self.loss.calculate(self.output, y_true)\n",
"\n",
" # Backward pass\n",
" def backward(self, dvalues, y_true):\n",
" # Number of samples\n",
" samples = len(dvalues)\n",
" # If labels are one-hot encoded,\n",
" # turn them into discrete values\n",
" if len(y_true.shape) == 2:\n",
" y_true = np.argmax(y_true, axis=1)\n",
" # Copy so we can safely modify\n",
" self.dinputs = dvalues.copy()\n",
" # Calculate gradient\n",
" self.dinputs[range(samples), y_true] -= 1\n",
" # Normalize gradient\n",
" self.dinputs = self.dinputs / samples"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "STEbc7_DFjmt"
},
"source": [
"\n",
"\n",
"DATASET
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "7tHQAI-bFjmu",
"outputId": "1e3b7a17-42a8-4faa-8eb7-b72572046230"
},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from nnfs.datasets import spiral_data\n",
"import numpy as np\n",
"import nnfs\n",
"nnfs.init()\n",
"import matplotlib.pyplot as plt\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"plt.scatter(X[:, 0], X[:, 1], c=y, cmap='brg')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KOZ15RsfFjmu"
},
"source": [
"\n",
"FULL CODE UPTO THIS POINT: FORWARD AND BACKWARD PASS \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "MvVmaqA1Fjmu",
"outputId": "5d6b274f-adaa-44e5-f248-bd2f74d4ee1c"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.33333334 0.33333334 0.33333334]\n",
" [0.33333334 0.33333334 0.33333334]\n",
" [0.33333334 0.3333333 0.33333334]\n",
" [0.3333335 0.33333302 0.33333355]\n",
" [0.33333334 0.33333334 0.33333334]]\n",
"loss: 1.0986118\n",
"acc: 0.33666666666666667\n",
"[[ 7.7209341e-05 -1.0590541e-04 -8.3512554e-05]\n",
" [ 2.8525142e-04 6.1521467e-05 -9.9994701e-05]]\n",
"[[ 0.00036935 -0.00025332 0.00021116]]\n",
"[[ 1.04255480e-04 1.59160336e-05 -1.20171506e-04]\n",
" [-4.91499777e-05 1.94195833e-04 -1.45045851e-04]\n",
" [ 3.61476232e-05 1.08254273e-04 -1.44401885e-04]]\n",
"[[ 1.0388438e-05 -1.0533840e-05 3.9814040e-08]]\n"
]
}
],
"source": [
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"# Create Dense layer with 2 input features and 3 output values\n",
"dense1 = Layer_Dense(2, 3)\n",
"# Create ReLU activation (to be used with Dense layer):\n",
"activation1 = Activation_ReLU()\n",
"# Create second Dense layer with 3 input features (as we take output\n",
"# of previous layer here) and 3 output values (output values)\n",
"dense2 = Layer_Dense(3, 3)\n",
"# Create Softmax classifier’s combined loss and activation\n",
"loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
"# Perform a forward pass of our training data through this layer\n",
"dense1.forward(X)\n",
"# Perform a forward pass through activation function\n",
"# takes the output of first dense layer here\n",
"activation1.forward(dense1.output)\n",
"# Perform a forward pass through second Dense layer\n",
"# takes outputs of activation function of first layer as inputs\n",
"dense2.forward(activation1.output)\n",
"# Perform a forward pass through the activation/loss function\n",
"# takes the output of second dense layer here and returns loss\n",
"loss = loss_activation.forward(dense2.output, y)\n",
"\n",
"# Let’s see output of the first few samples:\n",
"print(loss_activation.output[:5])\n",
"# Print loss value\n",
"print('loss:', loss)\n",
"# Calculate accuracy from output of activation2 and targets\n",
"# calculate values along first axis\n",
"predictions = np.argmax(loss_activation.output, axis=1)\n",
"if len(y.shape) == 2:\n",
" y = np.argmax(y, axis=1)\n",
"accuracy = np.mean(predictions == y)\n",
"# Print accuracy\n",
"print('acc:', accuracy)\n",
"# Backward pass\n",
"loss_activation.backward(loss_activation.output, y)\n",
"dense2.backward(loss_activation.dinputs)\n",
"activation1.backward(dense2.dinputs)\n",
"dense1.backward(activation1.dinputs)\n",
"# Print gradients\n",
"print(dense1.dweights)\n",
"print(dense1.dbiases)\n",
"print(dense2.dweights)\n",
"print(dense2.dbiases)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "yDgOzov1Fjmu",
"outputId": "7d32d783-2485-4ecf-e4ce-cae23205e516"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Iteration 1, Loss: 36.0\n",
"Iteration 2, Loss: 33.872397424621624\n",
"Iteration 3, Loss: 31.87054345809546\n",
"Iteration 4, Loss: 29.98699091998773\n",
"Iteration 5, Loss: 28.214761511794592\n",
"Iteration 6, Loss: 26.54726775906168\n",
"Iteration 7, Loss: 24.978326552541866\n",
"Iteration 8, Loss: 23.5021050739742\n",
"Iteration 9, Loss: 22.11313179151597\n",
"Iteration 10, Loss: 20.806246424284897\n",
"Iteration 11, Loss: 19.576596334671486\n",
"Iteration 12, Loss: 18.41961908608719\n",
"Iteration 13, Loss: 17.33101994032309\n",
"Iteration 14, Loss: 16.306757070164853\n",
"Iteration 15, Loss: 15.343027506224132\n",
"Iteration 16, Loss: 14.436253786815284\n",
"Iteration 17, Loss: 13.583071280700132\n",
"Iteration 18, Loss: 12.780312744165439\n",
"Iteration 19, Loss: 12.024995767388878\n",
"Iteration 20, Loss: 11.314319082257104\n",
"Iteration 21, Loss: 10.64564263994962\n",
"Iteration 22, Loss: 10.016485041642266\n",
"Iteration 23, Loss: 9.424510031713222\n",
"Iteration 24, Loss: 8.867521365009814\n",
"Iteration 25, Loss: 8.34345204094211\n",
"Iteration 26, Loss: 7.850353118483743\n",
"Iteration 27, Loss: 7.386397874602818\n",
"Iteration 28, Loss: 6.94986173712617\n",
"Iteration 29, Loss: 6.539124434950737\n",
"Iteration 30, Loss: 6.1526621719118015\n",
"Iteration 31, Loss: 5.789039869058961\n",
"Iteration 32, Loss: 5.446907999417336\n",
"Iteration 33, Loss: 5.124995576577539\n",
"Iteration 34, Loss: 4.822108497170647\n",
"Iteration 35, Loss: 4.537121521071987\n",
"Iteration 36, Loss: 4.268978030723312\n",
"Iteration 37, Loss: 4.01668121563854\n",
"Iteration 38, Loss: 3.7792956126389763\n",
"Iteration 39, Loss: 3.5559389510643094\n",
"Iteration 40, Loss: 3.345782865003274\n",
"Iteration 41, Loss: 3.1480471758404285\n",
"Iteration 42, Loss: 2.961997679823884\n",
"Iteration 43, Loss: 2.78694359065541\n",
"Iteration 44, Loss: 2.622235303237792\n",
"Iteration 45, Loss: 2.467261121418954\n",
"Iteration 46, Loss: 2.321446092335641\n",
"Iteration 47, Loss: 2.184248486806066\n",
"Iteration 48, Loss: 2.0551593804914616\n",
"Iteration 49, Loss: 1.9336995852420789\n",
"Iteration 50, Loss: 1.8194178573235094\n",
"Iteration 51, Loss: 1.7118903069357754\n",
"Iteration 52, Loss: 1.6107175940030252\n",
"Iteration 53, Loss: 1.5155241897377694\n",
"Iteration 54, Loss: 1.4259567411109748\n",
"Iteration 55, Loss: 1.3416826255281136\n",
"Iteration 56, Loss: 1.262389208248047\n",
"Iteration 57, Loss: 1.1877819791340551\n",
"Iteration 58, Loss: 1.1175840765571434\n",
"Iteration 59, Loss: 1.0515348500680068\n",
"Iteration 60, Loss: 0.9893891461492582\n",
"Iteration 61, Loss: 0.930916260625565\n",
"Iteration 62, Loss: 0.875899078709395\n",
"Iteration 63, Loss: 0.8241334819517507\n",
"Iteration 64, Loss: 0.7754271861095672\n",
"Iteration 65, Loss: 0.7295994320679934\n",
"Iteration 66, Loss: 0.6864801042040583\n",
"Iteration 67, Loss: 0.6459091389617334\n",
"Iteration 68, Loss: 0.6077358933180028\n",
"Iteration 69, Loss: 0.5718187120029812\n",
"Iteration 70, Loss: 0.5380242202642829\n",
"Iteration 71, Loss: 0.5062269967452033\n",
"Iteration 72, Loss: 0.4763089781884024\n",
"Iteration 73, Loss: 0.4481591180173807\n",
"Iteration 74, Loss: 0.42167291418136477\n",
"Iteration 75, Loss: 0.3967520449790852\n",
"Iteration 76, Loss: 0.3733039992368791\n",
"Iteration 77, Loss: 0.3512417316144445\n",
"Iteration 78, Loss: 0.33048334753976116\n",
"Iteration 79, Loss: 0.31095177724411444\n",
"Iteration 80, Loss: 0.2925745286179104\n",
"Iteration 81, Loss: 0.2752833763568879\n",
"Iteration 82, Loss: 0.25901412505149535\n",
"Iteration 83, Loss: 0.2437063914735247\n",
"Iteration 84, Loss: 0.22930333977371198\n",
"Iteration 85, Loss: 0.21575151284725816\n",
"Iteration 86, Loss: 0.2030006012946216\n",
"Iteration 87, Loss: 0.19100326852350488\n",
"Iteration 88, Loss: 0.17971497196649536\n",
"Iteration 89, Loss: 0.1690938194815031\n",
"Iteration 90, Loss: 0.1591003719214838\n",
"Iteration 91, Loss: 0.14969754273736763\n",
"Iteration 92, Loss: 0.14085041966208015\n",
"Iteration 93, Loss: 0.13252615564761738\n",
"Iteration 94, Loss: 0.1246938532452423\n",
"Iteration 95, Loss: 0.11732446503349986\n",
"Iteration 96, Loss: 0.11039058885430607\n",
"Iteration 97, Loss: 0.10386649785129919\n",
"Iteration 98, Loss: 0.09772798570124883\n",
"Iteration 99, Loss: 0.09195226348280558\n",
"Iteration 100, Loss: 0.0865178816583512\n",
"Iteration 101, Loss: 0.08140467291758889\n",
"Iteration 102, Loss: 0.07659366262828358\n",
"Iteration 103, Loss: 0.07206697005843195\n",
"Iteration 104, Loss: 0.06780781192053903\n",
"Iteration 105, Loss: 0.06380037696069592\n",
"Iteration 106, Loss: 0.06002977345222309\n",
"Iteration 107, Loss: 0.0564820075507719\n",
"Iteration 108, Loss: 0.05314393144118542\n",
"Iteration 109, Loss: 0.050003114234231524\n",
"Iteration 110, Loss: 0.04704793686603195\n",
"Iteration 111, Loss: 0.04426740148833972\n",
"Iteration 112, Loss: 0.04165120020443161\n",
"Iteration 113, Loss: 0.03918961375201954\n",
"Iteration 114, Loss: 0.0368735034129829\n",
"Iteration 115, Loss: 0.034694277992582755\n",
"Iteration 116, Loss: 0.032643851730490094\n",
"Iteration 117, Loss: 0.03071459534999028\n",
"Iteration 118, Loss: 0.028899363239415818\n",
"Iteration 119, Loss: 0.027191414181739672\n",
"Iteration 120, Loss: 0.02558439994540113\n",
"Iteration 121, Loss: 0.024072362337913877\n",
"Iteration 122, Loss: 0.022649683089386127\n",
"Iteration 123, Loss: 0.021311092099735786\n",
"Iteration 124, Loss: 0.02005160424149179\n",
"Iteration 125, Loss: 0.01886655505507656\n",
"Iteration 126, Loss: 0.017751540667355833\n",
"Iteration 127, Loss: 0.016702427744061103\n",
"Iteration 128, Loss: 0.01571531497821091\n",
"Iteration 129, Loss: 0.014786535770396103\n",
"Iteration 130, Loss: 0.013912651762769943\n",
"Iteration 131, Loss: 0.013090418519936803\n",
"Iteration 132, Loss: 0.012316768931710837\n",
"Iteration 133, Loss: 0.011588849600126475\n",
"Iteration 134, Loss: 0.010903943586632107\n",
"Iteration 135, Loss: 0.010259526183227799\n",
"Iteration 136, Loss: 0.009653186757193668\n",
"Iteration 137, Loss: 0.009082688171817357\n",
"Iteration 138, Loss: 0.008545899068542421\n",
"Iteration 139, Loss: 0.00804083320361364\n",
"Iteration 140, Loss: 0.007565618804557518\n",
"Iteration 141, Loss: 0.007118492429622391\n",
"Iteration 142, Loss: 0.006697793120481266\n",
"Iteration 143, Loss: 0.0063019473730584336\n",
"Iteration 144, Loss: 0.005929501997799936\n",
"Iteration 145, Loss: 0.005579070290327091\n",
"Iteration 146, Loss: 0.005249347396309216\n",
"Iteration 147, Loss: 0.004939114136252681\n",
"Iteration 148, Loss: 0.004647215154254898\n",
"Iteration 149, Loss: 0.00437256400626425\n",
"Iteration 150, Loss: 0.004114139259196158\n",
"Iteration 151, Loss: 0.0038709956233987848\n",
"Iteration 152, Loss: 0.0036422222163822442\n",
"Iteration 153, Loss: 0.0034269635873455254\n",
"Iteration 154, Loss: 0.0032244300300798123\n",
"Iteration 155, Loss: 0.003033866206344064\n",
"Iteration 156, Loss: 0.0028545694817259646\n",
"Iteration 157, Loss: 0.0026858615040063873\n",
"Iteration 158, Loss: 0.002527124440860861\n",
"Iteration 159, Loss: 0.002377772426750458\n",
"Iteration 160, Loss: 0.0022372501846465924\n",
"Iteration 161, Loss: 0.002105026221950533\n",
"Iteration 162, Loss: 0.0019806188966821317\n",
"Iteration 163, Loss: 0.001863566163059441\n",
"Iteration 164, Loss: 0.0017534302886055876\n",
"Iteration 165, Loss: 0.0016498016244949178\n",
"Iteration 166, Loss: 0.0015522968336895225\n",
"Iteration 167, Loss: 0.0014605572212372654\n",
"Iteration 168, Loss: 0.0013742383231737623\n",
"Iteration 169, Loss: 0.0012930183418168389\n",
"Iteration 170, Loss: 0.0012166008279945002\n",
"Iteration 171, Loss: 0.0011447005613673634\n",
"Iteration 172, Loss: 0.0010770513341135804\n",
"Iteration 173, Loss: 0.001013397095948145\n",
"Iteration 174, Loss: 0.0009535029620325111\n",
"Iteration 175, Loss: 0.0008971534673183893\n",
"Iteration 176, Loss: 0.0008441301639000644\n",
"Iteration 177, Loss: 0.0007942435095401501\n",
"Iteration 178, Loss: 0.0007473036766382048\n",
"Iteration 179, Loss: 0.0007031374518087182\n",
"Iteration 180, Loss: 0.0006615806720993984\n",
"Iteration 181, Loss: 0.0006224808039162045\n",
"Iteration 182, Loss: 0.0005856932236775429\n",
"Iteration 183, Loss: 0.0005510780772974099\n",
"Iteration 184, Loss: 0.0005185112321657664\n",
"Iteration 185, Loss: 0.00048786689510026934\n",
"Iteration 186, Loss: 0.00045903387854597503\n",
"Iteration 187, Loss: 0.00043190420223823955\n",
"Iteration 188, Loss: 0.000406378034681195\n",
"Iteration 189, Loss: 0.00038236074013664776\n",
"Iteration 190, Loss: 0.0003597649139507893\n",
"Iteration 191, Loss: 0.0003385032407062897\n",
"Iteration 192, Loss: 0.00031849748027454767\n",
"Iteration 193, Loss: 0.00029967346881992795\n",
"Iteration 194, Loss: 0.0002819629431575354\n",
"Iteration 195, Loss: 0.0002652991815966534\n",
"Iteration 196, Loss: 0.00024961903501571355\n",
"Iteration 197, Loss: 0.00023486641976601822\n",
"Iteration 198, Loss: 0.00022098629075865584\n",
"Iteration 199, Loss: 0.00020792651372860275\n",
"Iteration 200, Loss: 0.00019563773612380077\n",
"Final weights: [-3.3990955 -0.20180899 0.80271349]\n",
"Final bias: 0.6009044964517248\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"# Initial parameters\n",
"weights = np.array([-3.0, -1.0, 2.0])\n",
"bias = 1.0\n",
"inputs = np.array([1.0, -2.0, 3.0])\n",
"target_output = 0.0\n",
"learning_rate = 0.001\n",
"\n",
"def relu(x):\n",
" return np.maximum(0, x)\n",
"\n",
"def relu_derivative(x):\n",
" return np.where(x > 0, 1.0, 0.0)\n",
"\n",
"for iteration in range(200):\n",
" # Forward pass\n",
" linear_output = np.dot(weights, inputs) + bias\n",
" output = relu(linear_output)\n",
" loss = (output - target_output) ** 2\n",
"\n",
" # Backward pass\n",
" dloss_doutput = 2 * (output - target_output)\n",
" doutput_dlinear = relu_derivative(linear_output)\n",
" dlinear_dweights = inputs\n",
" dlinear_dbias = 1.0\n",
"\n",
" dloss_dlinear = dloss_doutput * doutput_dlinear\n",
" dloss_dweights = dloss_dlinear * dlinear_dweights\n",
" dloss_dbias = dloss_dlinear * dlinear_dbias\n",
"\n",
" # Update weights and bias\n",
" weights -= learning_rate * dloss_dweights\n",
" bias -= learning_rate * dloss_dbias\n",
"\n",
" # Print the loss for this iteration\n",
" print(f\"Iteration {iteration + 1}, Loss: {loss}\")\n",
"\n",
"print(\"Final weights:\", weights)\n",
"print(\"Final bias:\", bias)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZV_fcx05Fjmu"
},
"source": [
"\n",
"OPTIMIZERS GRADIENT DESCENT \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "lCy3lHCsFjmu"
},
"outputs": [],
"source": [
"# SGD optimizer\n",
"class Optimizer_SGD:\n",
" # Initialize optimizer - set settings,\n",
" # learning rate of 1. is default for this optimizer\n",
" def __init__(self, learning_rate=0.5):\n",
" self.learning_rate = learning_rate\n",
" # Update parameters\n",
" def update_params(self, layer):\n",
" layer.weights += -self.learning_rate * layer.dweights\n",
" layer.biases += -self.learning_rate * layer.dbiases"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oXmtcdZeFjmu",
"outputId": "daa697a4-64b9-4833-d38b-82bd7848d970"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch: 0, acc: 0.337, loss: 1.099\n",
"epoch: 100, acc: 0.403, loss: 1.097\n",
"epoch: 200, acc: 0.407, loss: 1.084\n",
"epoch: 300, acc: 0.403, loss: 1.077\n",
"epoch: 400, acc: 0.403, loss: 1.075\n",
"epoch: 500, acc: 0.403, loss: 1.074\n",
"epoch: 600, acc: 0.397, loss: 1.073\n",
"epoch: 700, acc: 0.397, loss: 1.073\n",
"epoch: 800, acc: 0.400, loss: 1.072\n",
"epoch: 900, acc: 0.410, loss: 1.071\n",
"epoch: 1000, acc: 0.410, loss: 1.070\n",
"epoch: 1100, acc: 0.410, loss: 1.068\n",
"epoch: 1200, acc: 0.410, loss: 1.066\n",
"epoch: 1300, acc: 0.413, loss: 1.063\n",
"epoch: 1400, acc: 0.410, loss: 1.058\n",
"epoch: 1500, acc: 0.430, loss: 1.053\n",
"epoch: 1600, acc: 0.443, loss: 1.046\n",
"epoch: 1700, acc: 0.447, loss: 1.037\n",
"epoch: 1800, acc: 0.427, loss: 1.052\n",
"epoch: 1900, acc: 0.417, loss: 1.053\n",
"epoch: 2000, acc: 0.407, loss: 1.051\n",
"epoch: 2100, acc: 0.410, loss: 1.048\n",
"epoch: 2200, acc: 0.413, loss: 1.044\n",
"epoch: 2300, acc: 0.420, loss: 1.041\n",
"epoch: 2400, acc: 0.413, loss: 1.037\n",
"epoch: 2500, acc: 0.417, loss: 1.033\n",
"epoch: 2600, acc: 0.427, loss: 1.027\n",
"epoch: 2700, acc: 0.437, loss: 1.026\n",
"epoch: 2800, acc: 0.440, loss: 1.020\n",
"epoch: 2900, acc: 0.460, loss: 1.016\n",
"epoch: 3000, acc: 0.470, loss: 1.013\n",
"epoch: 3100, acc: 0.477, loss: 1.009\n",
"epoch: 3200, acc: 0.477, loss: 1.010\n",
"epoch: 3300, acc: 0.463, loss: 1.002\n",
"epoch: 3400, acc: 0.437, loss: 0.996\n",
"epoch: 3500, acc: 0.420, loss: 1.026\n",
"epoch: 3600, acc: 0.473, loss: 0.982\n",
"epoch: 3700, acc: 0.450, loss: 0.975\n",
"epoch: 3800, acc: 0.480, loss: 0.993\n",
"epoch: 3900, acc: 0.493, loss: 0.965\n",
"epoch: 4000, acc: 0.447, loss: 0.953\n",
"epoch: 4100, acc: 0.440, loss: 0.963\n",
"epoch: 4200, acc: 0.530, loss: 0.947\n",
"epoch: 4300, acc: 0.490, loss: 0.929\n",
"epoch: 4400, acc: 0.470, loss: 0.948\n",
"epoch: 4500, acc: 0.537, loss: 0.917\n",
"epoch: 4600, acc: 0.453, loss: 0.904\n",
"epoch: 4700, acc: 0.477, loss: 0.934\n",
"epoch: 4800, acc: 0.507, loss: 0.903\n",
"epoch: 4900, acc: 0.483, loss: 0.922\n",
"epoch: 5000, acc: 0.467, loss: 0.877\n",
"epoch: 5100, acc: 0.453, loss: 0.911\n",
"epoch: 5200, acc: 0.523, loss: 0.885\n",
"epoch: 5300, acc: 0.510, loss: 0.918\n",
"epoch: 5400, acc: 0.480, loss: 0.874\n",
"epoch: 5500, acc: 0.510, loss: 0.877\n",
"epoch: 5600, acc: 0.480, loss: 0.897\n",
"epoch: 5700, acc: 0.490, loss: 0.860\n",
"epoch: 5800, acc: 0.557, loss: 0.886\n",
"epoch: 5900, acc: 0.500, loss: 0.879\n",
"epoch: 6000, acc: 0.493, loss: 0.861\n",
"epoch: 6100, acc: 0.470, loss: 0.917\n",
"epoch: 6200, acc: 0.503, loss: 0.872\n",
"epoch: 6300, acc: 0.563, loss: 0.870\n",
"epoch: 6400, acc: 0.470, loss: 0.894\n",
"epoch: 6500, acc: 0.500, loss: 0.864\n",
"epoch: 6600, acc: 0.567, loss: 0.877\n",
"epoch: 6700, acc: 0.480, loss: 0.872\n",
"epoch: 6800, acc: 0.513, loss: 0.848\n",
"epoch: 6900, acc: 0.550, loss: 0.849\n",
"epoch: 7000, acc: 0.573, loss: 0.856\n",
"epoch: 7100, acc: 0.520, loss: 0.840\n",
"epoch: 7200, acc: 0.540, loss: 0.827\n",
"epoch: 7300, acc: 0.610, loss: 0.820\n",
"epoch: 7400, acc: 0.550, loss: 0.810\n",
"epoch: 7500, acc: 0.547, loss: 0.809\n",
"epoch: 7600, acc: 0.627, loss: 0.825\n",
"epoch: 7700, acc: 0.553, loss: 0.809\n",
"epoch: 7800, acc: 0.580, loss: 0.801\n",
"epoch: 7900, acc: 0.607, loss: 0.798\n",
"epoch: 8000, acc: 0.533, loss: 0.832\n",
"epoch: 8100, acc: 0.577, loss: 0.834\n",
"epoch: 8200, acc: 0.513, loss: 0.848\n",
"epoch: 8300, acc: 0.567, loss: 0.799\n",
"epoch: 8400, acc: 0.570, loss: 0.790\n",
"epoch: 8500, acc: 0.603, loss: 0.789\n",
"epoch: 8600, acc: 0.613, loss: 0.806\n",
"epoch: 8700, acc: 0.573, loss: 0.788\n",
"epoch: 8800, acc: 0.603, loss: 0.783\n",
"epoch: 8900, acc: 0.587, loss: 0.827\n",
"epoch: 9000, acc: 0.537, loss: 0.841\n",
"epoch: 9100, acc: 0.610, loss: 0.788\n",
"epoch: 9200, acc: 0.580, loss: 0.767\n",
"epoch: 9300, acc: 0.597, loss: 0.776\n",
"epoch: 9400, acc: 0.613, loss: 0.794\n",
"epoch: 9500, acc: 0.553, loss: 0.799\n",
"epoch: 9600, acc: 0.610, loss: 0.783\n",
"epoch: 9700, acc: 0.583, loss: 0.755\n",
"epoch: 9800, acc: 0.593, loss: 0.779\n",
"epoch: 9900, acc: 0.613, loss: 0.794\n",
"epoch: 10000, acc: 0.573, loss: 0.768\n"
]
}
],
"source": [
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"\n",
"# Create Dense layer with 2 input features and 64 output values\n",
"dense1 = Layer_Dense(2, 64)\n",
"\n",
"# Create ReLU activation (to be used with Dense layer)\n",
"activation1 = Activation_ReLU()\n",
"\n",
"# Create second Dense layer with 64 input features (as we take output\n",
"# of previous layer here) and 3 output values (output values)\n",
"dense2 = Layer_Dense(64, 3)\n",
"\n",
"# Create Softmax classifier's combined loss and activation\n",
"loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
"\n",
"# Create optimizer\n",
"optimizer = Optimizer_SGD()\n",
"\n",
"# Train in loop\n",
"for epoch in range(10001):\n",
" # Perform a forward pass of our training data through this layer\n",
" dense1.forward(X)\n",
"\n",
" # Perform a forward pass through activation function\n",
" # takes the output of first dense layer here\n",
" activation1.forward(dense1.output)\n",
"\n",
" # Perform a forward pass through second Dense layer\n",
" # takes outputs of activation function of first layer as inputs\n",
" dense2.forward(activation1.output)\n",
"\n",
" # Perform a forward pass through the activation/loss function\n",
" # takes the output of second dense layer here and returns loss\n",
" loss = loss_activation.forward(dense2.output, y)\n",
"\n",
" # Calculate accuracy from output of activation2 and targets\n",
" # calculate values along first axis\n",
" predictions = np.argmax(loss_activation.output, axis=1)\n",
" if len(y.shape) == 2:\n",
" y = np.argmax(y, axis=1)\n",
" accuracy = np.mean(predictions == y)\n",
"\n",
" if not epoch % 100:\n",
" print(f'epoch: {epoch}, ' +\n",
" f'acc: {accuracy:.3f}, ' +\n",
" f'loss: {loss:.3f}')\n",
"\n",
" # Backward pass\n",
" loss_activation.backward(loss_activation.output, y)\n",
" dense2.backward(loss_activation.dinputs)\n",
" activation1.backward(dense2.dinputs)\n",
" dense1.backward(activation1.dinputs)\n",
"\n",
" # Update weights and biases\n",
" optimizer.update_params(dense1)\n",
" optimizer.update_params(dense2)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lXLjUFnmFjmx"
},
"source": [
"\n",
"OPTIMIZERS: LEARNING RATE DECAY \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "y99yd9jrFjmx"
},
"outputs": [],
"source": [
"class Optimizer_SGD:\n",
" # Initialize optimizer - set settings,\n",
" # learning rate of 1. is default for this optimizer\n",
" def __init__(self, learning_rate=1., decay=0.):\n",
" self.learning_rate = learning_rate\n",
" self.current_learning_rate = learning_rate\n",
" self.decay = decay\n",
" self.iterations = 0\n",
"\n",
" # Call once before any parameter updates\n",
" def pre_update_params(self):\n",
" if self.decay:\n",
" self.current_learning_rate = self.learning_rate * \\\n",
" (1. / (1. + self.decay * self.iterations))\n",
"\n",
" # Update parameters\n",
" def update_params(self, layer):\n",
" layer.weights += -self.current_learning_rate * layer.dweights\n",
" layer.biases += -self.current_learning_rate * layer.dbiases\n",
"\n",
" # Call once after any parameter updates\n",
" def post_update_params(self):\n",
" self.iterations += 1\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fXewScxsFjmx",
"outputId": "f55251a7-a85b-4fd3-b75e-c314e4a9e01a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch: 0, acc: 0.287, loss: 1.099, lr: 1.0\n",
"epoch: 100, acc: 0.410, loss: 1.083, lr: 0.9099181073703367\n",
"epoch: 200, acc: 0.447, loss: 1.067, lr: 0.8340283569641367\n",
"epoch: 300, acc: 0.443, loss: 1.065, lr: 0.7698229407236336\n",
"epoch: 400, acc: 0.433, loss: 1.064, lr: 0.7147962830593281\n",
"epoch: 500, acc: 0.437, loss: 1.063, lr: 0.66711140760507\n",
"epoch: 600, acc: 0.423, loss: 1.062, lr: 0.6253908692933083\n",
"epoch: 700, acc: 0.430, loss: 1.061, lr: 0.5885815185403178\n",
"epoch: 800, acc: 0.427, loss: 1.060, lr: 0.5558643690939411\n",
"epoch: 900, acc: 0.430, loss: 1.058, lr: 0.526592943654555\n",
"epoch: 1000, acc: 0.443, loss: 1.056, lr: 0.5002501250625312\n",
"epoch: 1100, acc: 0.447, loss: 1.054, lr: 0.4764173415912339\n",
"epoch: 1200, acc: 0.450, loss: 1.052, lr: 0.45475216007276037\n",
"epoch: 1300, acc: 0.450, loss: 1.050, lr: 0.43497172683775553\n",
"epoch: 1400, acc: 0.460, loss: 1.048, lr: 0.4168403501458941\n",
"epoch: 1500, acc: 0.453, loss: 1.045, lr: 0.4001600640256102\n",
"epoch: 1600, acc: 0.447, loss: 1.042, lr: 0.3847633705271258\n",
"epoch: 1700, acc: 0.447, loss: 1.038, lr: 0.3705075954057058\n",
"epoch: 1800, acc: 0.447, loss: 1.034, lr: 0.35727045373347627\n",
"epoch: 1900, acc: 0.467, loss: 1.028, lr: 0.3449465332873405\n",
"epoch: 2000, acc: 0.477, loss: 1.022, lr: 0.33344448149383127\n",
"epoch: 2100, acc: 0.500, loss: 1.015, lr: 0.32268473701193934\n",
"epoch: 2200, acc: 0.510, loss: 1.007, lr: 0.31259768677711786\n",
"epoch: 2300, acc: 0.520, loss: 1.000, lr: 0.3031221582297666\n",
"epoch: 2400, acc: 0.537, loss: 0.992, lr: 0.29420417769932333\n",
"epoch: 2500, acc: 0.547, loss: 0.984, lr: 0.2857959416976279\n",
"epoch: 2600, acc: 0.547, loss: 0.977, lr: 0.2778549597110308\n",
"epoch: 2700, acc: 0.543, loss: 0.969, lr: 0.2703433360367667\n",
"epoch: 2800, acc: 0.550, loss: 0.963, lr: 0.26322716504343247\n",
"epoch: 2900, acc: 0.563, loss: 0.956, lr: 0.25647601949217746\n",
"epoch: 3000, acc: 0.567, loss: 0.951, lr: 0.25006251562890724\n",
"epoch: 3100, acc: 0.570, loss: 0.945, lr: 0.2439619419370578\n",
"epoch: 3200, acc: 0.577, loss: 0.940, lr: 0.23815194093831865\n",
"epoch: 3300, acc: 0.577, loss: 0.935, lr: 0.23261223540358225\n",
"epoch: 3400, acc: 0.580, loss: 0.930, lr: 0.22732439190725165\n",
"epoch: 3500, acc: 0.580, loss: 0.925, lr: 0.22227161591464767\n",
"epoch: 3600, acc: 0.580, loss: 0.920, lr: 0.21743857360295715\n",
"epoch: 3700, acc: 0.590, loss: 0.916, lr: 0.21281123643328367\n",
"epoch: 3800, acc: 0.593, loss: 0.911, lr: 0.20837674515524068\n",
"epoch: 3900, acc: 0.593, loss: 0.907, lr: 0.20412329046744235\n",
"epoch: 4000, acc: 0.590, loss: 0.902, lr: 0.2000400080016003\n",
"epoch: 4100, acc: 0.587, loss: 0.898, lr: 0.19611688566385566\n",
"epoch: 4200, acc: 0.587, loss: 0.893, lr: 0.19234468166955185\n",
"epoch: 4300, acc: 0.583, loss: 0.889, lr: 0.18871485185884126\n",
"epoch: 4400, acc: 0.587, loss: 0.884, lr: 0.18521948508983144\n",
"epoch: 4500, acc: 0.590, loss: 0.880, lr: 0.18185124568103292\n",
"epoch: 4600, acc: 0.590, loss: 0.875, lr: 0.1786033220217896\n",
"epoch: 4700, acc: 0.597, loss: 0.871, lr: 0.1754693805930865\n",
"epoch: 4800, acc: 0.613, loss: 0.866, lr: 0.17244352474564578\n",
"epoch: 4900, acc: 0.617, loss: 0.861, lr: 0.16952025767079165\n",
"epoch: 5000, acc: 0.620, loss: 0.856, lr: 0.16669444907484582\n",
"epoch: 5100, acc: 0.610, loss: 0.851, lr: 0.16396130513198884\n",
"epoch: 5200, acc: 0.617, loss: 0.847, lr: 0.16131634134537828\n",
"epoch: 5300, acc: 0.623, loss: 0.842, lr: 0.15875535799333226\n",
"epoch: 5400, acc: 0.627, loss: 0.837, lr: 0.1562744178777934\n",
"epoch: 5500, acc: 0.627, loss: 0.832, lr: 0.15386982612709646\n",
"epoch: 5600, acc: 0.633, loss: 0.828, lr: 0.15153811183512653\n",
"epoch: 5700, acc: 0.627, loss: 0.822, lr: 0.14927601134497687\n",
"epoch: 5800, acc: 0.633, loss: 0.817, lr: 0.14708045300779526\n",
"epoch: 5900, acc: 0.637, loss: 0.813, lr: 0.14494854326714016\n",
"epoch: 6000, acc: 0.640, loss: 0.808, lr: 0.1428775539362766\n",
"epoch: 6100, acc: 0.643, loss: 0.803, lr: 0.1408649105507818\n",
"epoch: 6200, acc: 0.653, loss: 0.798, lr: 0.13890818169190167\n",
"epoch: 6300, acc: 0.653, loss: 0.793, lr: 0.13700506918755992\n",
"epoch: 6400, acc: 0.657, loss: 0.788, lr: 0.13515339910798757\n",
"epoch: 6500, acc: 0.653, loss: 0.783, lr: 0.13335111348179757\n",
"epoch: 6600, acc: 0.657, loss: 0.779, lr: 0.13159626266614027\n",
"epoch: 6700, acc: 0.660, loss: 0.774, lr: 0.12988699831146902\n",
"epoch: 6800, acc: 0.667, loss: 0.769, lr: 0.12822156686754713\n",
"epoch: 6900, acc: 0.667, loss: 0.764, lr: 0.126598303582732\n",
"epoch: 7000, acc: 0.670, loss: 0.760, lr: 0.12501562695336915\n",
"epoch: 7100, acc: 0.667, loss: 0.755, lr: 0.12347203358439313\n",
"epoch: 7200, acc: 0.667, loss: 0.751, lr: 0.12196609342602757\n",
"epoch: 7300, acc: 0.670, loss: 0.747, lr: 0.12049644535486204\n",
"epoch: 7400, acc: 0.677, loss: 0.742, lr: 0.11906179307060363\n",
"epoch: 7500, acc: 0.683, loss: 0.738, lr: 0.11766090128250381\n",
"epoch: 7600, acc: 0.677, loss: 0.734, lr: 0.11629259216187929\n",
"epoch: 7700, acc: 0.680, loss: 0.730, lr: 0.11495574203931487\n",
"epoch: 7800, acc: 0.683, loss: 0.726, lr: 0.11364927832708263\n",
"epoch: 7900, acc: 0.680, loss: 0.722, lr: 0.11237217664906168\n",
"epoch: 8000, acc: 0.677, loss: 0.718, lr: 0.11112345816201799\n",
"epoch: 8100, acc: 0.680, loss: 0.715, lr: 0.10990218705352237\n",
"epoch: 8200, acc: 0.680, loss: 0.711, lr: 0.10870746820306555\n",
"epoch: 8300, acc: 0.680, loss: 0.708, lr: 0.1075384449940854\n",
"epoch: 8400, acc: 0.680, loss: 0.704, lr: 0.10639429726566654\n",
"epoch: 8500, acc: 0.687, loss: 0.701, lr: 0.10527423939362038\n",
"epoch: 8600, acc: 0.687, loss: 0.698, lr: 0.10417751849150952\n",
"epoch: 8700, acc: 0.687, loss: 0.694, lr: 0.10310341272296113\n",
"epoch: 8800, acc: 0.693, loss: 0.691, lr: 0.1020512297173181\n",
"epoch: 8900, acc: 0.690, loss: 0.688, lr: 0.10102030508132134\n",
"epoch: 9000, acc: 0.697, loss: 0.685, lr: 0.1000100010001\n",
"epoch: 9100, acc: 0.700, loss: 0.682, lr: 0.09901970492127933\n",
"epoch: 9200, acc: 0.700, loss: 0.679, lr: 0.09804882831650162\n",
"epoch: 9300, acc: 0.703, loss: 0.676, lr: 0.09709680551509856\n",
"epoch: 9400, acc: 0.703, loss: 0.673, lr: 0.09616309260505818\n",
"epoch: 9500, acc: 0.707, loss: 0.670, lr: 0.09524716639679968\n",
"epoch: 9600, acc: 0.697, loss: 0.666, lr: 0.09434852344560807\n",
"epoch: 9700, acc: 0.717, loss: 0.662, lr: 0.09346667912889055\n",
"epoch: 9800, acc: 0.713, loss: 0.658, lr: 0.09260116677470137\n",
"epoch: 9900, acc: 0.713, loss: 0.656, lr: 0.09175153683824203\n",
"epoch: 10000, acc: 0.717, loss: 0.653, lr: 0.09091735612328393\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"# Assuming the necessary classes (Layer_Dense, Activation_ReLU,\n",
"# Activation_Softmax_Loss_CategoricalCrossentropy, and spiral_data) are defined elsewhere\n",
"\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"\n",
"# Create Dense layer with 2 input features and 64 output values\n",
"dense1 = Layer_Dense(2, 64)\n",
"\n",
"# Create ReLU activation (to be used with Dense layer)\n",
"activation1 = Activation_ReLU()\n",
"\n",
"# Create second Dense layer with 64 input features (as we take output\n",
"# of previous layer here) and 3 output values (output values)\n",
"dense2 = Layer_Dense(64, 3)\n",
"\n",
"# Create Softmax classifier's combined loss and activation\n",
"loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
"\n",
"# Create optimizer\n",
"optimizer = Optimizer_SGD(decay=1e-3)\n",
"\n",
"# Train in loop\n",
"for epoch in range(10001):\n",
" # Perform a forward pass of our training data through this layer\n",
" dense1.forward(X)\n",
"\n",
" # Perform a forward pass through activation function\n",
" # takes the output of first dense layer here\n",
" activation1.forward(dense1.output)\n",
"\n",
" # Perform a forward pass through second Dense layer\n",
" # takes outputs of activation function of first layer as inputs\n",
" dense2.forward(activation1.output)\n",
"\n",
" # Perform a forward pass through the activation/loss function\n",
" # takes the output of second dense layer here and returns loss\n",
" loss = loss_activation.forward(dense2.output, y)\n",
"\n",
" # Calculate accuracy from output of activation2 and targets\n",
" # calculate values along first axis\n",
" predictions = np.argmax(loss_activation.output, axis=1)\n",
" if len(y.shape) == 2:\n",
" y = np.argmax(y, axis=1)\n",
" accuracy = np.mean(predictions == y)\n",
"\n",
" if not epoch % 100:\n",
" print(f'epoch: {epoch}, ' +\n",
" f'acc: {accuracy:.3f}, ' +\n",
" f'loss: {loss:.3f}, ' +\n",
" f'lr: {optimizer.current_learning_rate}')\n",
"\n",
" # Backward pass\n",
" loss_activation.backward(loss_activation.output, y)\n",
" dense2.backward(loss_activation.dinputs)\n",
" activation1.backward(dense2.dinputs)\n",
" dense1.backward(activation1.dinputs)\n",
"\n",
" # Update weights and biases\n",
" optimizer.pre_update_params()\n",
" optimizer.update_params(dense1)\n",
" optimizer.update_params(dense2)\n",
" optimizer.post_update_params()\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7nloxILOFjmx"
},
"source": [
"\n",
"OPTIMIZERS: MOMENTUM \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "nMYZmJkfFjmx"
},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"class Optimizer_SGD:\n",
" # Initialize optimizer - set settings,\n",
" # learning rate of 1. is default for this optimizer\n",
" def __init__(self, learning_rate=1., decay=0., momentum=0.):\n",
" self.learning_rate = learning_rate\n",
" self.current_learning_rate = learning_rate\n",
" self.decay = decay\n",
" self.iterations = 0\n",
" self.momentum = momentum\n",
"\n",
" # Call once before any parameter updates\n",
" def pre_update_params(self):\n",
" if self.decay:\n",
" self.current_learning_rate = self.learning_rate * \\\n",
" (1. / (1. + self.decay * self.iterations))\n",
"\n",
" # Update parameters\n",
" def update_params(self, layer):\n",
" # If we use momentum\n",
" if self.momentum:\n",
" # If layer does not contain momentum arrays, create them\n",
" # filled with zeros\n",
" if not hasattr(layer, 'weight_momentums'):\n",
" layer.weight_momentums = np.zeros_like(layer.weights)\n",
" layer.bias_momentums = np.zeros_like(layer.biases)\n",
"\n",
" # Build weight updates with momentum - take previous\n",
" # updates multiplied by retain factor and update with\n",
" # current gradients\n",
" weight_updates = self.momentum * layer.weight_momentums - \\\n",
" self.current_learning_rate * layer.dweights\n",
" layer.weight_momentums = weight_updates\n",
"\n",
" # Build bias updates\n",
" bias_updates = self.momentum * layer.bias_momentums - \\\n",
" self.current_learning_rate * layer.dbiases\n",
" layer.bias_momentums = bias_updates\n",
"\n",
" # Vanilla SGD updates (as before momentum update)\n",
" else:\n",
" weight_updates = -self.current_learning_rate * layer.dweights\n",
" bias_updates = -self.current_learning_rate * layer.dbiases\n",
"\n",
" # Update weights and biases using either\n",
" # vanilla or momentum updates\n",
" layer.weights += weight_updates\n",
" layer.biases += bias_updates\n",
"\n",
" # Call once after any parameter updates\n",
" def post_update_params(self):\n",
" self.iterations += 1\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "iGJ6XL-cFjmy",
"outputId": "ab7f1d7d-954d-4838-e2cc-46f62c18d28c"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch: 0, acc: 0.320, loss: 1.099, lr: 1.0\n",
"epoch: 100, acc: 0.423, loss: 1.030, lr: 0.9099181073703367\n",
"epoch: 200, acc: 0.430, loss: 0.966, lr: 0.8340283569641367\n",
"epoch: 300, acc: 0.717, loss: 0.703, lr: 0.7698229407236336\n",
"epoch: 400, acc: 0.763, loss: 0.494, lr: 0.7147962830593281\n",
"epoch: 500, acc: 0.800, loss: 0.451, lr: 0.66711140760507\n",
"epoch: 600, acc: 0.870, loss: 0.347, lr: 0.6253908692933083\n",
"epoch: 700, acc: 0.580, loss: 1.827, lr: 0.5885815185403178\n",
"epoch: 800, acc: 0.837, loss: 0.343, lr: 0.5558643690939411\n",
"epoch: 900, acc: 0.870, loss: 0.290, lr: 0.526592943654555\n",
"epoch: 1000, acc: 0.897, loss: 0.263, lr: 0.5002501250625312\n",
"epoch: 1100, acc: 0.900, loss: 0.245, lr: 0.4764173415912339\n",
"epoch: 1200, acc: 0.910, loss: 0.233, lr: 0.45475216007276037\n",
"epoch: 1300, acc: 0.917, loss: 0.225, lr: 0.43497172683775553\n",
"epoch: 1400, acc: 0.923, loss: 0.218, lr: 0.4168403501458941\n",
"epoch: 1500, acc: 0.917, loss: 0.212, lr: 0.4001600640256102\n",
"epoch: 1600, acc: 0.923, loss: 0.206, lr: 0.3847633705271258\n",
"epoch: 1700, acc: 0.913, loss: 0.198, lr: 0.3705075954057058\n",
"epoch: 1800, acc: 0.920, loss: 0.192, lr: 0.35727045373347627\n",
"epoch: 1900, acc: 0.910, loss: 0.186, lr: 0.3449465332873405\n",
"epoch: 2000, acc: 0.913, loss: 0.181, lr: 0.33344448149383127\n",
"epoch: 2100, acc: 0.923, loss: 0.176, lr: 0.32268473701193934\n",
"epoch: 2200, acc: 0.917, loss: 0.173, lr: 0.31259768677711786\n",
"epoch: 2300, acc: 0.917, loss: 0.170, lr: 0.3031221582297666\n",
"epoch: 2400, acc: 0.923, loss: 0.167, lr: 0.29420417769932333\n",
"epoch: 2500, acc: 0.927, loss: 0.164, lr: 0.2857959416976279\n",
"epoch: 2600, acc: 0.923, loss: 0.162, lr: 0.2778549597110308\n",
"epoch: 2700, acc: 0.923, loss: 0.160, lr: 0.2703433360367667\n",
"epoch: 2800, acc: 0.927, loss: 0.159, lr: 0.26322716504343247\n",
"epoch: 2900, acc: 0.927, loss: 0.157, lr: 0.25647601949217746\n",
"epoch: 3000, acc: 0.930, loss: 0.156, lr: 0.25006251562890724\n",
"epoch: 3100, acc: 0.930, loss: 0.154, lr: 0.2439619419370578\n",
"epoch: 3200, acc: 0.930, loss: 0.153, lr: 0.23815194093831865\n",
"epoch: 3300, acc: 0.930, loss: 0.152, lr: 0.23261223540358225\n",
"epoch: 3400, acc: 0.930, loss: 0.151, lr: 0.22732439190725165\n",
"epoch: 3500, acc: 0.930, loss: 0.149, lr: 0.22227161591464767\n",
"epoch: 3600, acc: 0.933, loss: 0.148, lr: 0.21743857360295715\n",
"epoch: 3700, acc: 0.937, loss: 0.147, lr: 0.21281123643328367\n",
"epoch: 3800, acc: 0.937, loss: 0.146, lr: 0.20837674515524068\n",
"epoch: 3900, acc: 0.940, loss: 0.146, lr: 0.20412329046744235\n",
"epoch: 4000, acc: 0.940, loss: 0.145, lr: 0.2000400080016003\n",
"epoch: 4100, acc: 0.940, loss: 0.144, lr: 0.19611688566385566\n",
"epoch: 4200, acc: 0.937, loss: 0.143, lr: 0.19234468166955185\n",
"epoch: 4300, acc: 0.940, loss: 0.143, lr: 0.18871485185884126\n",
"epoch: 4400, acc: 0.937, loss: 0.142, lr: 0.18521948508983144\n",
"epoch: 4500, acc: 0.937, loss: 0.142, lr: 0.18185124568103292\n",
"epoch: 4600, acc: 0.940, loss: 0.141, lr: 0.1786033220217896\n",
"epoch: 4700, acc: 0.940, loss: 0.141, lr: 0.1754693805930865\n",
"epoch: 4800, acc: 0.940, loss: 0.140, lr: 0.17244352474564578\n",
"epoch: 4900, acc: 0.940, loss: 0.139, lr: 0.16952025767079165\n",
"epoch: 5000, acc: 0.940, loss: 0.139, lr: 0.16669444907484582\n",
"epoch: 5100, acc: 0.940, loss: 0.138, lr: 0.16396130513198884\n",
"epoch: 5200, acc: 0.943, loss: 0.138, lr: 0.16131634134537828\n",
"epoch: 5300, acc: 0.943, loss: 0.138, lr: 0.15875535799333226\n",
"epoch: 5400, acc: 0.943, loss: 0.137, lr: 0.1562744178777934\n",
"epoch: 5500, acc: 0.943, loss: 0.137, lr: 0.15386982612709646\n",
"epoch: 5600, acc: 0.943, loss: 0.136, lr: 0.15153811183512653\n",
"epoch: 5700, acc: 0.943, loss: 0.136, lr: 0.14927601134497687\n",
"epoch: 5800, acc: 0.943, loss: 0.136, lr: 0.14708045300779526\n",
"epoch: 5900, acc: 0.943, loss: 0.135, lr: 0.14494854326714016\n",
"epoch: 6000, acc: 0.943, loss: 0.135, lr: 0.1428775539362766\n",
"epoch: 6100, acc: 0.943, loss: 0.135, lr: 0.1408649105507818\n",
"epoch: 6200, acc: 0.943, loss: 0.134, lr: 0.13890818169190167\n",
"epoch: 6300, acc: 0.943, loss: 0.134, lr: 0.13700506918755992\n",
"epoch: 6400, acc: 0.947, loss: 0.134, lr: 0.13515339910798757\n",
"epoch: 6500, acc: 0.947, loss: 0.133, lr: 0.13335111348179757\n",
"epoch: 6600, acc: 0.950, loss: 0.133, lr: 0.13159626266614027\n",
"epoch: 6700, acc: 0.947, loss: 0.133, lr: 0.12988699831146902\n",
"epoch: 6800, acc: 0.950, loss: 0.133, lr: 0.12822156686754713\n",
"epoch: 6900, acc: 0.950, loss: 0.132, lr: 0.126598303582732\n",
"epoch: 7000, acc: 0.950, loss: 0.132, lr: 0.12501562695336915\n",
"epoch: 7100, acc: 0.950, loss: 0.132, lr: 0.12347203358439313\n",
"epoch: 7200, acc: 0.950, loss: 0.132, lr: 0.12196609342602757\n",
"epoch: 7300, acc: 0.950, loss: 0.131, lr: 0.12049644535486204\n",
"epoch: 7400, acc: 0.950, loss: 0.131, lr: 0.11906179307060363\n",
"epoch: 7500, acc: 0.950, loss: 0.131, lr: 0.11766090128250381\n",
"epoch: 7600, acc: 0.950, loss: 0.131, lr: 0.11629259216187929\n",
"epoch: 7700, acc: 0.950, loss: 0.131, lr: 0.11495574203931487\n",
"epoch: 7800, acc: 0.950, loss: 0.130, lr: 0.11364927832708263\n",
"epoch: 7900, acc: 0.950, loss: 0.130, lr: 0.11237217664906168\n",
"epoch: 8000, acc: 0.950, loss: 0.130, lr: 0.11112345816201799\n",
"epoch: 8100, acc: 0.950, loss: 0.130, lr: 0.10990218705352237\n",
"epoch: 8200, acc: 0.953, loss: 0.130, lr: 0.10870746820306555\n",
"epoch: 8300, acc: 0.950, loss: 0.130, lr: 0.1075384449940854\n",
"epoch: 8400, acc: 0.953, loss: 0.130, lr: 0.10639429726566654\n",
"epoch: 8500, acc: 0.953, loss: 0.129, lr: 0.10527423939362038\n",
"epoch: 8600, acc: 0.953, loss: 0.129, lr: 0.10417751849150952\n",
"epoch: 8700, acc: 0.953, loss: 0.129, lr: 0.10310341272296113\n",
"epoch: 8800, acc: 0.953, loss: 0.129, lr: 0.1020512297173181\n",
"epoch: 8900, acc: 0.950, loss: 0.129, lr: 0.10102030508132134\n",
"epoch: 9000, acc: 0.953, loss: 0.129, lr: 0.1000100010001\n",
"epoch: 9100, acc: 0.953, loss: 0.129, lr: 0.09901970492127933\n",
"epoch: 9200, acc: 0.953, loss: 0.128, lr: 0.09804882831650162\n",
"epoch: 9300, acc: 0.953, loss: 0.128, lr: 0.09709680551509856\n",
"epoch: 9400, acc: 0.953, loss: 0.128, lr: 0.09616309260505818\n",
"epoch: 9500, acc: 0.953, loss: 0.128, lr: 0.09524716639679968\n",
"epoch: 9600, acc: 0.953, loss: 0.128, lr: 0.09434852344560807\n",
"epoch: 9700, acc: 0.953, loss: 0.128, lr: 0.09346667912889055\n",
"epoch: 9800, acc: 0.953, loss: 0.128, lr: 0.09260116677470137\n",
"epoch: 9900, acc: 0.953, loss: 0.128, lr: 0.09175153683824203\n",
"epoch: 10000, acc: 0.953, loss: 0.128, lr: 0.09091735612328393\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"# Assuming the necessary classes (Layer_Dense, Activation_ReLU,\n",
"# Activation_Softmax_Loss_CategoricalCrossentropy, Optimizer_SGD, and spiral_data) are defined elsewhere\n",
"\n",
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"\n",
"# Create Dense layer with 2 input features and 64 output values\n",
"dense1 = Layer_Dense(2, 64)\n",
"\n",
"# Create ReLU activation (to be used with Dense layer)\n",
"activation1 = Activation_ReLU()\n",
"\n",
"# Create second Dense layer with 64 input features (as we take output\n",
"# of previous layer here) and 3 output values (output values)\n",
"dense2 = Layer_Dense(64, 3)\n",
"\n",
"# Create Softmax classifier's combined loss and activation\n",
"loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
"\n",
"# Create optimizer\n",
"optimizer = Optimizer_SGD(decay=1e-3, momentum=0.9)\n",
"\n",
"# Train in loop\n",
"for epoch in range(10001):\n",
" # Perform a forward pass of our training data through this layer\n",
" dense1.forward(X)\n",
"\n",
" # Perform a forward pass through activation function\n",
" # takes the output of first dense layer here\n",
" activation1.forward(dense1.output)\n",
"\n",
" # Perform a forward pass through second Dense layer\n",
" # takes outputs of activation function of first layer as inputs\n",
" dense2.forward(activation1.output)\n",
"\n",
" # Perform a forward pass through the activation/loss function\n",
" # takes the output of second dense layer here and returns loss\n",
" loss = loss_activation.forward(dense2.output, y)\n",
"\n",
" # Calculate accuracy from output of activation2 and targets\n",
" # calculate values along first axis\n",
" predictions = np.argmax(loss_activation.output, axis=1)\n",
" if len(y.shape) == 2:\n",
" y = np.argmax(y, axis=1)\n",
" accuracy = np.mean(predictions == y)\n",
"\n",
" if not epoch % 100:\n",
" print(f'epoch: {epoch}, ' +\n",
" f'acc: {accuracy:.3f}, ' +\n",
" f'loss: {loss:.3f}, ' +\n",
" f'lr: {optimizer.current_learning_rate}')\n",
"\n",
" # Backward pass\n",
" loss_activation.backward(loss_activation.output, y)\n",
" dense2.backward(loss_activation.dinputs)\n",
" activation1.backward(dense2.dinputs)\n",
" dense1.backward(activation1.dinputs)\n",
"\n",
" # Update weights and biases\n",
" optimizer.pre_update_params()\n",
" optimizer.update_params(dense1)\n",
" optimizer.update_params(dense2)\n",
" optimizer.post_update_params()\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "3kAX2D2BFjmy"
},
"source": [
"\n",
"OPTIMIZERS: ADAGRAD \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "cZpH377mFjmy"
},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"# Adagrad optimizer\n",
"class Optimizer_Adagrad:\n",
" # Initialize optimizer - set settings\n",
" def __init__(self, learning_rate=1., decay=0., epsilon=1e-7):\n",
" self.learning_rate = learning_rate\n",
" self.current_learning_rate = learning_rate\n",
" self.decay = decay\n",
" self.iterations = 0\n",
" self.epsilon = epsilon\n",
"\n",
" # Call once before any parameter updates\n",
" def pre_update_params(self):\n",
" if self.decay:\n",
" self.current_learning_rate = self.learning_rate * \\\n",
" (1. / (1. + self.decay * self.iterations))\n",
"\n",
" # Update parameters\n",
" def update_params(self, layer):\n",
" # If layer does not contain cache arrays, create them filled with zeros\n",
" if not hasattr(layer, 'weight_cache'):\n",
" layer.weight_cache = np.zeros_like(layer.weights)\n",
" layer.bias_cache = np.zeros_like(layer.biases)\n",
"\n",
" # Update cache with squared current gradients\n",
" layer.weight_cache += layer.dweights**2\n",
" layer.bias_cache += layer.dbiases**2\n",
"\n",
" # Vanilla SGD parameter update + normalization with square rooted cache\n",
" layer.weights += -self.current_learning_rate * \\\n",
" layer.dweights / \\\n",
" (np.sqrt(layer.weight_cache) + self.epsilon)\n",
" layer.biases += -self.current_learning_rate * \\\n",
" layer.dbiases / \\\n",
" (np.sqrt(layer.bias_cache) + self.epsilon)\n",
"\n",
" # Call once after any parameter updates\n",
" def post_update_params(self):\n",
" self.iterations += 1\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "PmBRgb8OFjmy",
"outputId": "cebb6205-93bd-4c0e-ee9c-b6c1ae86c6de"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch: 0, acc: 0.303, loss: 1.099, lr: 1.0\n",
"epoch: 100, acc: 0.490, loss: 1.006, lr: 0.9901970492127933\n",
"epoch: 200, acc: 0.477, loss: 0.972, lr: 0.9804882831650161\n",
"epoch: 300, acc: 0.497, loss: 0.946, lr: 0.9709680551509855\n",
"epoch: 400, acc: 0.510, loss: 0.927, lr: 0.9616309260505818\n",
"epoch: 500, acc: 0.520, loss: 0.902, lr: 0.9524716639679969\n",
"epoch: 600, acc: 0.550, loss: 0.871, lr: 0.9434852344560807\n",
"epoch: 700, acc: 0.610, loss: 0.825, lr: 0.9346667912889054\n",
"epoch: 800, acc: 0.650, loss: 0.788, lr: 0.9260116677470135\n",
"epoch: 900, acc: 0.667, loss: 0.757, lr: 0.9175153683824203\n",
"epoch: 1000, acc: 0.640, loss: 0.733, lr: 0.9091735612328392\n",
"epoch: 1100, acc: 0.630, loss: 0.706, lr: 0.9009820704567978\n",
"epoch: 1200, acc: 0.700, loss: 0.692, lr: 0.892936869363336\n",
"epoch: 1300, acc: 0.723, loss: 0.660, lr: 0.8850340738118416\n",
"epoch: 1400, acc: 0.640, loss: 0.647, lr: 0.8772699359592947\n",
"epoch: 1500, acc: 0.670, loss: 0.638, lr: 0.8696408383337683\n",
"epoch: 1600, acc: 0.713, loss: 0.611, lr: 0.8621432882145013\n",
"epoch: 1700, acc: 0.733, loss: 0.591, lr: 0.8547739123001966\n",
"epoch: 1800, acc: 0.737, loss: 0.575, lr: 0.8475294516484448\n",
"epoch: 1900, acc: 0.733, loss: 0.562, lr: 0.8404067568703253\n",
"epoch: 2000, acc: 0.737, loss: 0.551, lr: 0.8334027835652972\n",
"epoch: 2100, acc: 0.737, loss: 0.537, lr: 0.8265145879824779\n",
"epoch: 2200, acc: 0.740, loss: 0.528, lr: 0.8197393228953193\n",
"epoch: 2300, acc: 0.753, loss: 0.520, lr: 0.8130742336775347\n",
"epoch: 2400, acc: 0.750, loss: 0.507, lr: 0.8065166545689169\n",
"epoch: 2500, acc: 0.747, loss: 0.501, lr: 0.8000640051204096\n",
"epoch: 2600, acc: 0.757, loss: 0.492, lr: 0.7937137868084768\n",
"epoch: 2700, acc: 0.777, loss: 0.486, lr: 0.7874635798094338\n",
"epoch: 2800, acc: 0.793, loss: 0.477, lr: 0.7813110399249941\n",
"epoch: 2900, acc: 0.813, loss: 0.466, lr: 0.7752538956508256\n",
"epoch: 3000, acc: 0.803, loss: 0.446, lr: 0.7692899453804138\n",
"epoch: 3100, acc: 0.807, loss: 0.437, lr: 0.7634170547370028\n",
"epoch: 3200, acc: 0.810, loss: 0.425, lr: 0.7576331540268202\n",
"epoch: 3300, acc: 0.830, loss: 0.417, lr: 0.7519362358072035\n",
"epoch: 3400, acc: 0.830, loss: 0.412, lr: 0.7463243525636241\n",
"epoch: 3500, acc: 0.843, loss: 0.406, lr: 0.7407956144899621\n",
"epoch: 3600, acc: 0.860, loss: 0.396, lr: 0.735348187366718\n",
"epoch: 3700, acc: 0.850, loss: 0.392, lr: 0.7299802905321557\n",
"epoch: 3800, acc: 0.857, loss: 0.386, lr: 0.7246901949416624\n",
"epoch: 3900, acc: 0.863, loss: 0.382, lr: 0.7194762213108857\n",
"epoch: 4000, acc: 0.863, loss: 0.377, lr: 0.7143367383384527\n",
"epoch: 4100, acc: 0.867, loss: 0.373, lr: 0.7092701610043266\n",
"epoch: 4200, acc: 0.877, loss: 0.368, lr: 0.7042749489400663\n",
"epoch: 4300, acc: 0.870, loss: 0.365, lr: 0.6993496048674733\n",
"epoch: 4400, acc: 0.870, loss: 0.361, lr: 0.6944926731022988\n",
"epoch: 4500, acc: 0.873, loss: 0.357, lr: 0.6897027381198704\n",
"epoch: 4600, acc: 0.870, loss: 0.354, lr: 0.6849784231796698\n",
"epoch: 4700, acc: 0.873, loss: 0.350, lr: 0.6803183890060548\n",
"epoch: 4800, acc: 0.870, loss: 0.347, lr: 0.6757213325224677\n",
"epoch: 4900, acc: 0.863, loss: 0.345, lr: 0.6711859856366199\n",
"epoch: 5000, acc: 0.863, loss: 0.342, lr: 0.6667111140742716\n",
"epoch: 5100, acc: 0.863, loss: 0.339, lr: 0.6622955162593549\n",
"epoch: 5200, acc: 0.860, loss: 0.337, lr: 0.6579380222383051\n",
"epoch: 5300, acc: 0.857, loss: 0.335, lr: 0.6536374926465782\n",
"epoch: 5400, acc: 0.860, loss: 0.332, lr: 0.649392817715436\n",
"epoch: 5500, acc: 0.863, loss: 0.329, lr: 0.6452029163171817\n",
"epoch: 5600, acc: 0.863, loss: 0.328, lr: 0.6410667350471184\n",
"epoch: 5700, acc: 0.860, loss: 0.326, lr: 0.6369832473405949\n",
"epoch: 5800, acc: 0.867, loss: 0.324, lr: 0.6329514526235838\n",
"epoch: 5900, acc: 0.867, loss: 0.322, lr: 0.6289703754953141\n",
"epoch: 6000, acc: 0.873, loss: 0.318, lr: 0.6250390649415589\n",
"epoch: 6100, acc: 0.880, loss: 0.314, lr: 0.6211565935772407\n",
"epoch: 6200, acc: 0.883, loss: 0.312, lr: 0.6173220569170937\n",
"epoch: 6300, acc: 0.883, loss: 0.310, lr: 0.6135345726731701\n",
"epoch: 6400, acc: 0.883, loss: 0.308, lr: 0.6097932800780536\n",
"epoch: 6500, acc: 0.883, loss: 0.307, lr: 0.6060973392326807\n",
"epoch: 6600, acc: 0.880, loss: 0.305, lr: 0.6024459304777396\n",
"epoch: 6700, acc: 0.883, loss: 0.304, lr: 0.5988382537876519\n",
"epoch: 6800, acc: 0.883, loss: 0.303, lr: 0.5952735281862016\n",
"epoch: 6900, acc: 0.883, loss: 0.302, lr: 0.5917509911829102\n",
"epoch: 7000, acc: 0.887, loss: 0.300, lr: 0.5882698982293076\n",
"epoch: 7100, acc: 0.887, loss: 0.299, lr: 0.5848295221942803\n",
"epoch: 7200, acc: 0.887, loss: 0.298, lr: 0.5814291528577243\n",
"epoch: 7300, acc: 0.887, loss: 0.297, lr: 0.5780680964217585\n",
"epoch: 7400, acc: 0.887, loss: 0.296, lr: 0.5747456750387954\n",
"epoch: 7500, acc: 0.887, loss: 0.295, lr: 0.5714612263557918\n",
"epoch: 7600, acc: 0.887, loss: 0.294, lr: 0.5682141030740383\n",
"epoch: 7700, acc: 0.887, loss: 0.294, lr: 0.5650036725238714\n",
"epoch: 7800, acc: 0.887, loss: 0.293, lr: 0.5618293162537221\n",
"epoch: 7900, acc: 0.887, loss: 0.292, lr: 0.5586904296329404\n",
"epoch: 8000, acc: 0.887, loss: 0.291, lr: 0.5555864214678593\n",
"epoch: 8100, acc: 0.887, loss: 0.290, lr: 0.5525167136305873\n",
"epoch: 8200, acc: 0.887, loss: 0.289, lr: 0.5494807407000385\n",
"epoch: 8300, acc: 0.887, loss: 0.289, lr: 0.5464779496147331\n",
"epoch: 8400, acc: 0.887, loss: 0.288, lr: 0.5435077993369205\n",
"epoch: 8500, acc: 0.890, loss: 0.286, lr: 0.5405697605275961\n",
"epoch: 8600, acc: 0.890, loss: 0.286, lr: 0.5376633152320017\n",
"epoch: 8700, acc: 0.890, loss: 0.285, lr: 0.5347879565752179\n",
"epoch: 8800, acc: 0.890, loss: 0.284, lr: 0.5319431884674717\n",
"epoch: 8900, acc: 0.890, loss: 0.283, lr: 0.5291285253188\n",
"epoch: 9000, acc: 0.890, loss: 0.283, lr: 0.5263434917627243\n",
"epoch: 9100, acc: 0.887, loss: 0.279, lr: 0.5235876223886068\n",
"epoch: 9200, acc: 0.893, loss: 0.277, lr: 0.5208604614823689\n",
"epoch: 9300, acc: 0.893, loss: 0.276, lr: 0.5181615627752734\n",
"epoch: 9400, acc: 0.893, loss: 0.276, lr: 0.5154904892004742\n",
"epoch: 9500, acc: 0.893, loss: 0.275, lr: 0.5128468126570593\n",
"epoch: 9600, acc: 0.893, loss: 0.274, lr: 0.5102301137813153\n",
"epoch: 9700, acc: 0.893, loss: 0.274, lr: 0.5076399817249606\n",
"epoch: 9800, acc: 0.893, loss: 0.273, lr: 0.5050760139400979\n",
"epoch: 9900, acc: 0.893, loss: 0.273, lr: 0.5025378159706518\n",
"epoch: 10000, acc: 0.893, loss: 0.272, lr: 0.5000250012500626\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"\n",
"# Create Dense layer with 2 input features and 64 output values\n",
"dense1 = Layer_Dense(2, 64)\n",
"\n",
"# Create ReLU activation (to be used with Dense layer)\n",
"activation1 = Activation_ReLU()\n",
"\n",
"# Create second Dense layer with 64 input features (as we take output of previous layer here) and 3 output values (output values)\n",
"dense2 = Layer_Dense(64, 3)\n",
"\n",
"# Create Softmax classifier's combined loss and activation\n",
"loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
"\n",
"# Create optimizer\n",
"# optimizer = Optimizer_SGD(decay=8e-8, momentum=0.9)\n",
"optimizer = Optimizer_Adagrad(decay=1e-4)\n",
"\n",
"# Train in loop\n",
"for epoch in range(10001):\n",
" # Perform a forward pass of our training data through this layer\n",
" dense1.forward(X)\n",
"\n",
" # Perform a forward pass through activation function\n",
" # takes the output of first dense layer here\n",
" activation1.forward(dense1.output)\n",
"\n",
" # Perform a forward pass through second Dense layer\n",
" # takes outputs of activation function of first layer as inputs\n",
" dense2.forward(activation1.output)\n",
"\n",
" # Perform a forward pass through the activation/loss function\n",
" # takes the output of second dense layer here and returns loss\n",
" loss = loss_activation.forward(dense2.output, y)\n",
"\n",
" # Calculate accuracy from output of activation2 and targets\n",
" # calculate values along first axis\n",
" predictions = np.argmax(loss_activation.output, axis=1)\n",
" if len(y.shape) == 2:\n",
" y = np.argmax(y, axis=1)\n",
" accuracy = np.mean(predictions == y)\n",
"\n",
" if not epoch % 100:\n",
" print(f'epoch: {epoch}, ' +\n",
" f'acc: {accuracy:.3f}, ' +\n",
" f'loss: {loss:.3f}, ' +\n",
" f'lr: {optimizer.current_learning_rate}')\n",
"\n",
" # Backward pass\n",
" loss_activation.backward(loss_activation.output, y)\n",
" dense2.backward(loss_activation.dinputs)\n",
" activation1.backward(dense2.dinputs)\n",
" dense1.backward(activation1.dinputs)\n",
"\n",
" # Update weights and biases\n",
" optimizer.pre_update_params()\n",
" optimizer.update_params(dense1)\n",
" optimizer.update_params(dense2)\n",
" optimizer.post_update_params()\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "_8hEKQBrFjmy"
},
"source": [
"\n",
"OPTIMIZERS: RMSPROP \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4VkLC_ukFjmy"
},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"class Optimizer_RMSprop:\n",
" # Initialize optimizer - set settings\n",
" def __init__(self, learning_rate=0.001, decay=0., epsilon=1e-7, rho=0.9):\n",
" self.learning_rate = learning_rate\n",
" self.current_learning_rate = learning_rate\n",
" self.decay = decay\n",
" self.iterations = 0\n",
" self.epsilon = epsilon\n",
" self.rho = rho\n",
"\n",
" # Call once before any parameter updates\n",
" def pre_update_params(self):\n",
" if self.decay:\n",
" self.current_learning_rate = self.learning_rate * \\\n",
" (1. / (1. + self.decay * self.iterations))\n",
"\n",
" # Update parameters\n",
" def update_params(self, layer):\n",
" # If layer does not contain cache arrays,\n",
" # create them filled with zeros\n",
" if not hasattr(layer, 'weight_cache'):\n",
" layer.weight_cache = np.zeros_like(layer.weights)\n",
" layer.bias_cache = np.zeros_like(layer.biases)\n",
"\n",
" # Update cache with squared current gradients\n",
" layer.weight_cache = self.rho * layer.weight_cache + \\\n",
" (1 - self.rho) * layer.dweights**2\n",
" layer.bias_cache = self.rho * layer.bias_cache + \\\n",
" (1 - self.rho) * layer.dbiases**2\n",
"\n",
" # Vanilla SGD parameter update + normalization\n",
" # with square rooted cache\n",
" layer.weights += -self.current_learning_rate * \\\n",
" layer.dweights / \\\n",
" (np.sqrt(layer.weight_cache) + self.epsilon)\n",
" layer.biases += -self.current_learning_rate * \\\n",
" layer.dbiases / \\\n",
" (np.sqrt(layer.bias_cache) + self.epsilon)\n",
"\n",
" # Call once after any parameter updates\n",
" def post_update_params(self):\n",
" self.iterations += 1\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-TwxlA-yFjmy",
"outputId": "8f5695a6-e8f9-4b04-dc8d-bf028f8d2d4a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch: 0, acc: 0.263, loss: 1.099, lr: 0.02\n",
"epoch: 100, acc: 0.490, loss: 1.017, lr: 0.01998021958261321\n",
"epoch: 200, acc: 0.550, loss: 0.962, lr: 0.019960279044701046\n",
"epoch: 300, acc: 0.557, loss: 0.902, lr: 0.019940378268975763\n",
"epoch: 400, acc: 0.650, loss: 0.845, lr: 0.01992051713662487\n",
"epoch: 500, acc: 0.610, loss: 0.853, lr: 0.01990069552930875\n",
"epoch: 600, acc: 0.643, loss: 0.773, lr: 0.019880913329158343\n",
"epoch: 700, acc: 0.630, loss: 0.785, lr: 0.019861170418772778\n",
"epoch: 800, acc: 0.683, loss: 0.715, lr: 0.019841466681217078\n",
"epoch: 900, acc: 0.707, loss: 0.697, lr: 0.01982180200001982\n",
"epoch: 1000, acc: 0.703, loss: 0.694, lr: 0.019802176259170884\n",
"epoch: 1100, acc: 0.667, loss: 0.702, lr: 0.01978258934311912\n",
"epoch: 1200, acc: 0.717, loss: 0.672, lr: 0.01976304113677013\n",
"epoch: 1300, acc: 0.747, loss: 0.630, lr: 0.019743531525483964\n",
"epoch: 1400, acc: 0.730, loss: 0.618, lr: 0.01972406039507293\n",
"epoch: 1500, acc: 0.743, loss: 0.614, lr: 0.019704627631799327\n",
"epoch: 1600, acc: 0.743, loss: 0.603, lr: 0.019685233122373254\n",
"epoch: 1700, acc: 0.740, loss: 0.593, lr: 0.019665876753950384\n",
"epoch: 1800, acc: 0.767, loss: 0.595, lr: 0.01964655841412981\n",
"epoch: 1900, acc: 0.770, loss: 0.578, lr: 0.019627277990951823\n",
"epoch: 2000, acc: 0.750, loss: 0.576, lr: 0.019608035372895814\n",
"epoch: 2100, acc: 0.757, loss: 0.559, lr: 0.01958883044887805\n",
"epoch: 2200, acc: 0.780, loss: 0.565, lr: 0.019569663108249594\n",
"epoch: 2300, acc: 0.777, loss: 0.552, lr: 0.01955053324079414\n",
"epoch: 2400, acc: 0.770, loss: 0.560, lr: 0.019531440736725945\n",
"epoch: 2500, acc: 0.770, loss: 0.539, lr: 0.019512385486687673\n",
"epoch: 2600, acc: 0.783, loss: 0.546, lr: 0.019493367381748363\n",
"epoch: 2700, acc: 0.783, loss: 0.533, lr: 0.019474386313401298\n",
"epoch: 2800, acc: 0.737, loss: 0.609, lr: 0.019455442173562\n",
"epoch: 2900, acc: 0.783, loss: 0.522, lr: 0.019436534854566128\n",
"epoch: 3000, acc: 0.763, loss: 0.547, lr: 0.01941766424916747\n",
"epoch: 3100, acc: 0.803, loss: 0.513, lr: 0.019398830250535893\n",
"epoch: 3200, acc: 0.803, loss: 0.509, lr: 0.019380032752255354\n",
"epoch: 3300, acc: 0.787, loss: 0.530, lr: 0.01936127164832186\n",
"epoch: 3400, acc: 0.810, loss: 0.495, lr: 0.01934254683314152\n",
"epoch: 3500, acc: 0.803, loss: 0.504, lr: 0.019323858201528515\n",
"epoch: 3600, acc: 0.817, loss: 0.491, lr: 0.019305205648703173\n",
"epoch: 3700, acc: 0.820, loss: 0.484, lr: 0.01928658907028997\n",
"epoch: 3800, acc: 0.800, loss: 0.476, lr: 0.01926800836231563\n",
"epoch: 3900, acc: 0.783, loss: 0.499, lr: 0.019249463421207133\n",
"epoch: 4000, acc: 0.797, loss: 0.482, lr: 0.019230954143789846\n",
"epoch: 4100, acc: 0.797, loss: 0.477, lr: 0.019212480427285565\n",
"epoch: 4200, acc: 0.793, loss: 0.472, lr: 0.019194042169310647\n",
"epoch: 4300, acc: 0.803, loss: 0.463, lr: 0.019175639267874092\n",
"epoch: 4400, acc: 0.780, loss: 0.489, lr: 0.019157271621375684\n",
"epoch: 4500, acc: 0.790, loss: 0.462, lr: 0.0191389391286041\n",
"epoch: 4600, acc: 0.800, loss: 0.459, lr: 0.019120641688735073\n",
"epoch: 4700, acc: 0.810, loss: 0.464, lr: 0.019102379201329525\n",
"epoch: 4800, acc: 0.823, loss: 0.438, lr: 0.01908415156633174\n",
"epoch: 4900, acc: 0.793, loss: 0.455, lr: 0.01906595868406753\n",
"epoch: 5000, acc: 0.803, loss: 0.446, lr: 0.01904780045524243\n",
"epoch: 5100, acc: 0.800, loss: 0.441, lr: 0.019029676780939874\n",
"epoch: 5200, acc: 0.753, loss: 0.541, lr: 0.019011587562619416\n",
"epoch: 5300, acc: 0.823, loss: 0.428, lr: 0.01899353270211493\n",
"epoch: 5400, acc: 0.807, loss: 0.435, lr: 0.018975512101632844\n",
"epoch: 5500, acc: 0.807, loss: 0.429, lr: 0.018957525663750367\n",
"epoch: 5600, acc: 0.810, loss: 0.426, lr: 0.018939573291413745\n",
"epoch: 5700, acc: 0.813, loss: 0.416, lr: 0.018921654887936498\n",
"epoch: 5800, acc: 0.823, loss: 0.414, lr: 0.018903770356997706\n",
"epoch: 5900, acc: 0.830, loss: 0.396, lr: 0.018885919602640248\n",
"epoch: 6000, acc: 0.810, loss: 0.411, lr: 0.018868102529269144\n",
"epoch: 6100, acc: 0.813, loss: 0.402, lr: 0.018850319041649778\n",
"epoch: 6200, acc: 0.813, loss: 0.399, lr: 0.018832569044906263\n",
"epoch: 6300, acc: 0.813, loss: 0.397, lr: 0.018814852444519702\n",
"epoch: 6400, acc: 0.850, loss: 0.381, lr: 0.018797169146326564\n",
"epoch: 6500, acc: 0.857, loss: 0.393, lr: 0.01877951905651696\n",
"epoch: 6600, acc: 0.857, loss: 0.392, lr: 0.018761902081633034\n",
"epoch: 6700, acc: 0.857, loss: 0.389, lr: 0.018744318128567278\n",
"epoch: 6800, acc: 0.860, loss: 0.368, lr: 0.018726767104560903\n",
"epoch: 6900, acc: 0.823, loss: 0.391, lr: 0.018709248917202218\n",
"epoch: 7000, acc: 0.823, loss: 0.388, lr: 0.018691763474424996\n",
"epoch: 7100, acc: 0.820, loss: 0.385, lr: 0.018674310684506857\n",
"epoch: 7200, acc: 0.817, loss: 0.422, lr: 0.01865689045606769\n",
"epoch: 7300, acc: 0.843, loss: 0.394, lr: 0.01863950269806802\n",
"epoch: 7400, acc: 0.853, loss: 0.383, lr: 0.018622147319807447\n",
"epoch: 7500, acc: 0.850, loss: 0.383, lr: 0.018604824230923075\n",
"epoch: 7600, acc: 0.747, loss: 0.540, lr: 0.01858753334138793\n",
"epoch: 7700, acc: 0.840, loss: 0.424, lr: 0.018570274561509396\n",
"epoch: 7800, acc: 0.860, loss: 0.378, lr: 0.018553047801927663\n",
"epoch: 7900, acc: 0.863, loss: 0.378, lr: 0.018535852973614212\n",
"epoch: 8000, acc: 0.787, loss: 0.511, lr: 0.01851868998787026\n",
"epoch: 8100, acc: 0.823, loss: 0.379, lr: 0.018501558756325222\n",
"epoch: 8200, acc: 0.830, loss: 0.375, lr: 0.01848445919093522\n",
"epoch: 8300, acc: 0.823, loss: 0.371, lr: 0.018467391203981567\n",
"epoch: 8400, acc: 0.833, loss: 0.381, lr: 0.018450354708069265\n",
"epoch: 8500, acc: 0.853, loss: 0.375, lr: 0.018433349616125496\n",
"epoch: 8600, acc: 0.857, loss: 0.372, lr: 0.018416375841398172\n",
"epoch: 8700, acc: 0.867, loss: 0.370, lr: 0.01839943329745444\n",
"epoch: 8800, acc: 0.873, loss: 0.348, lr: 0.01838252189817921\n",
"epoch: 8900, acc: 0.830, loss: 0.369, lr: 0.018365641557773718\n",
"epoch: 9000, acc: 0.830, loss: 0.369, lr: 0.018348792190754044\n",
"epoch: 9100, acc: 0.843, loss: 0.358, lr: 0.0183319737119497\n",
"epoch: 9200, acc: 0.880, loss: 0.346, lr: 0.018315186036502167\n",
"epoch: 9300, acc: 0.867, loss: 0.365, lr: 0.018298429079863496\n",
"epoch: 9400, acc: 0.860, loss: 0.367, lr: 0.018281702757794862\n",
"epoch: 9500, acc: 0.817, loss: 0.443, lr: 0.018265006986365174\n",
"epoch: 9600, acc: 0.820, loss: 0.369, lr: 0.018248341681949654\n",
"epoch: 9700, acc: 0.827, loss: 0.363, lr: 0.018231706761228456\n",
"epoch: 9800, acc: 0.827, loss: 0.362, lr: 0.018215102141185255\n",
"epoch: 9900, acc: 0.873, loss: 0.349, lr: 0.018198527739105907\n",
"epoch: 10000, acc: 0.870, loss: 0.360, lr: 0.018181983472577025\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"\n",
"# Create Dense layer with 2 input features and 64 output values\n",
"dense1 = Layer_Dense(2, 64)\n",
"\n",
"# Create ReLU activation (to be used with Dense layer)\n",
"activation1 = Activation_ReLU()\n",
"\n",
"# Create second Dense layer with 64 input features (as we take output of previous layer here) and 3 output values (output values)\n",
"dense2 = Layer_Dense(64, 3)\n",
"\n",
"# Create Softmax classifier's combined loss and activation\n",
"loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
"\n",
"# Create optimizer\n",
"# optimizer = Optimizer_SGD(decay=8e-8, momentum=0.9)\n",
"#optimizer = Optimizer_Adagrad(decay=1e-4)\n",
"#optimizer = Optimizer_RMSprop(decay=1e-4)\n",
"optimizer = Optimizer_RMSprop(learning_rate=0.02, decay=1e-5,rho=0.999)\n",
"\n",
"# Train in loop\n",
"for epoch in range(10001):\n",
" # Perform a forward pass of our training data through this layer\n",
" dense1.forward(X)\n",
"\n",
" # Perform a forward pass through activation function\n",
" # takes the output of first dense layer here\n",
" activation1.forward(dense1.output)\n",
"\n",
" # Perform a forward pass through second Dense layer\n",
" # takes outputs of activation function of first layer as inputs\n",
" dense2.forward(activation1.output)\n",
"\n",
" # Perform a forward pass through the activation/loss function\n",
" # takes the output of second dense layer here and returns loss\n",
" loss = loss_activation.forward(dense2.output, y)\n",
"\n",
" # Calculate accuracy from output of activation2 and targets\n",
" # calculate values along first axis\n",
" predictions = np.argmax(loss_activation.output, axis=1)\n",
" if len(y.shape) == 2:\n",
" y = np.argmax(y, axis=1)\n",
" accuracy = np.mean(predictions == y)\n",
"\n",
" if not epoch % 100:\n",
" print(f'epoch: {epoch}, ' +\n",
" f'acc: {accuracy:.3f}, ' +\n",
" f'loss: {loss:.3f}, ' +\n",
" f'lr: {optimizer.current_learning_rate}')\n",
"\n",
" # Backward pass\n",
" loss_activation.backward(loss_activation.output, y)\n",
" dense2.backward(loss_activation.dinputs)\n",
" activation1.backward(dense2.dinputs)\n",
" dense1.backward(activation1.dinputs)\n",
"\n",
" # Update weights and biases\n",
" optimizer.pre_update_params()\n",
" optimizer.update_params(dense1)\n",
" optimizer.update_params(dense2)\n",
" optimizer.post_update_params()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hunJrmJNFjmy"
},
"source": [
"\n",
"OPTIMIZERS: ADAM \n",
"
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4vZtNOVeFjmy"
},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"# Adam optimizer\n",
"class Optimizer_Adam:\n",
" # Initialize optimizer - set settings\n",
" def __init__(self, learning_rate=0.001, decay=0., epsilon=1e-7, beta_1=0.9, beta_2=0.999):\n",
" self.learning_rate = learning_rate\n",
" self.current_learning_rate = learning_rate\n",
" self.decay = decay\n",
" self.iterations = 0\n",
" self.epsilon = epsilon\n",
" self.beta_1 = beta_1\n",
" self.beta_2 = beta_2\n",
"\n",
" # Call once before any parameter updates\n",
" def pre_update_params(self):\n",
" if self.decay:\n",
" self.current_learning_rate = self.learning_rate * (1. / (1. + self.decay * self.iterations))\n",
"\n",
" # Update parameters\n",
" def update_params(self, layer):\n",
" # If layer does not contain cache arrays, create them filled with zeros\n",
" if not hasattr(layer, 'weight_cache'):\n",
" layer.weight_momentums = np.zeros_like(layer.weights)\n",
" layer.weight_cache = np.zeros_like(layer.weights)\n",
" layer.bias_momentums = np.zeros_like(layer.biases)\n",
" layer.bias_cache = np.zeros_like(layer.biases)\n",
"\n",
" # Update momentum with current gradients\n",
" layer.weight_momentums = self.beta_1 * layer.weight_momentums + (1 - self.beta_1) * layer.dweights\n",
" layer.bias_momentums = self.beta_1 * layer.bias_momentums + (1 - self.beta_1) * layer.dbiases\n",
"\n",
" # Get corrected momentum\n",
" # self.iteration is 0 at first pass and we need to start with 1 here\n",
" weight_momentums_corrected = layer.weight_momentums / (1 - self.beta_1 ** (self.iterations + 1))\n",
" bias_momentums_corrected = layer.bias_momentums / (1 - self.beta_1 ** (self.iterations + 1))\n",
"\n",
" # Update cache with squared current gradients\n",
" layer.weight_cache = self.beta_2 * layer.weight_cache + (1 - self.beta_2) * layer.dweights**2\n",
" layer.bias_cache = self.beta_2 * layer.bias_cache + (1 - self.beta_2) * layer.dbiases**2\n",
"\n",
" # Get corrected cache\n",
" weight_cache_corrected = layer.weight_cache / (1 - self.beta_2 ** (self.iterations + 1))\n",
" bias_cache_corrected = layer.bias_cache / (1 - self.beta_2 ** (self.iterations + 1))\n",
"\n",
" # Vanilla SGD parameter update + normalization with square rooted cache\n",
" layer.weights += -self.current_learning_rate * weight_momentums_corrected / (np.sqrt(weight_cache_corrected) + self.epsilon)\n",
" layer.biases += -self.current_learning_rate * bias_momentums_corrected / (np.sqrt(bias_cache_corrected) + self.epsilon)\n",
"\n",
" # Call once after any parameter updates\n",
" def post_update_params(self):\n",
" self.iterations += 1\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "0jPa07gQFjmy",
"outputId": "a0e17fa7-3600-4184-a911-97ef141ef8bf"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch: 0, acc: 0.310, loss: 1.099, lr: 1.0\n",
"epoch: 100, acc: 0.537, loss: 1.003, lr: 0.9999920800627259\n",
"epoch: 200, acc: 0.487, loss: 0.899, lr: 0.9999840802534423\n",
"epoch: 300, acc: 0.687, loss: 0.662, lr: 0.9999760805721527\n",
"epoch: 400, acc: 0.723, loss: 0.566, lr: 0.9999680810188538\n",
"epoch: 500, acc: 0.760, loss: 0.518, lr: 0.9999600815935427\n",
"epoch: 600, acc: 0.767, loss: 0.496, lr: 0.9999520822962162\n",
"epoch: 700, acc: 0.770, loss: 0.466, lr: 0.9999440831268714\n",
"epoch: 800, acc: 0.787, loss: 0.419, lr: 0.9999360840855052\n",
"epoch: 900, acc: 0.620, loss: 1.111, lr: 0.9999280851721145\n",
"epoch: 1000, acc: 0.883, loss: 0.281, lr: 0.999920086386696\n",
"epoch: 1100, acc: 0.740, loss: 0.561, lr: 0.9999120877292469\n",
"epoch: 1200, acc: 0.877, loss: 0.256, lr: 0.999904089199764\n",
"epoch: 1300, acc: 0.873, loss: 0.236, lr: 0.9998960907982444\n",
"epoch: 1400, acc: 0.567, loss: 2.495, lr: 0.9998880925246847\n",
"epoch: 1500, acc: 0.790, loss: 0.445, lr: 0.9998800943790821\n",
"epoch: 1600, acc: 0.803, loss: 0.435, lr: 0.9998720963614335\n",
"epoch: 1700, acc: 0.800, loss: 0.431, lr: 0.9998640984717357\n",
"epoch: 1800, acc: 0.803, loss: 0.429, lr: 0.9998561007099859\n",
"epoch: 1900, acc: 0.803, loss: 0.426, lr: 0.9998481030761807\n",
"epoch: 2000, acc: 0.807, loss: 0.424, lr: 0.9998401055703172\n",
"epoch: 2100, acc: 0.810, loss: 0.422, lr: 0.9998321081923923\n",
"epoch: 2200, acc: 0.810, loss: 0.420, lr: 0.999824110942403\n",
"epoch: 2300, acc: 0.793, loss: 0.401, lr: 0.9998161138203462\n",
"epoch: 2400, acc: 0.803, loss: 0.399, lr: 0.9998081168262187\n",
"epoch: 2500, acc: 0.807, loss: 0.397, lr: 0.9998001199600176\n",
"epoch: 2600, acc: 0.810, loss: 0.395, lr: 0.9997921232217397\n",
"epoch: 2700, acc: 0.823, loss: 0.385, lr: 0.999784126611382\n",
"epoch: 2800, acc: 0.820, loss: 0.383, lr: 0.9997761301289415\n",
"epoch: 2900, acc: 0.820, loss: 0.380, lr: 0.999768133774415\n",
"epoch: 3000, acc: 0.813, loss: 0.372, lr: 0.9997601375477995\n",
"epoch: 3100, acc: 0.813, loss: 0.370, lr: 0.9997521414490919\n",
"epoch: 3200, acc: 0.813, loss: 0.369, lr: 0.9997441454782892\n",
"epoch: 3300, acc: 0.813, loss: 0.368, lr: 0.9997361496353881\n",
"epoch: 3400, acc: 0.813, loss: 0.366, lr: 0.9997281539203858\n",
"epoch: 3500, acc: 0.813, loss: 0.365, lr: 0.9997201583332792\n",
"epoch: 3600, acc: 0.813, loss: 0.365, lr: 0.9997121628740652\n",
"epoch: 3700, acc: 0.813, loss: 0.364, lr: 0.9997041675427408\n",
"epoch: 3800, acc: 0.813, loss: 0.363, lr: 0.9996961723393027\n",
"epoch: 3900, acc: 0.810, loss: 0.363, lr: 0.999688177263748\n",
"epoch: 4000, acc: 0.813, loss: 0.362, lr: 0.9996801823160735\n",
"epoch: 4100, acc: 0.813, loss: 0.361, lr: 0.9996721874962763\n",
"epoch: 4200, acc: 0.810, loss: 0.360, lr: 0.9996641928043533\n",
"epoch: 4300, acc: 0.813, loss: 0.360, lr: 0.9996561982403013\n",
"epoch: 4400, acc: 0.810, loss: 0.359, lr: 0.9996482038041173\n",
"epoch: 4500, acc: 0.810, loss: 0.359, lr: 0.9996402094957983\n",
"epoch: 4600, acc: 0.810, loss: 0.359, lr: 0.9996322153153412\n",
"epoch: 4700, acc: 0.813, loss: 0.358, lr: 0.999624221262743\n",
"epoch: 4800, acc: 0.807, loss: 0.358, lr: 0.9996162273380004\n",
"epoch: 4900, acc: 0.810, loss: 0.357, lr: 0.9996082335411106\n",
"epoch: 5000, acc: 0.807, loss: 0.357, lr: 0.9996002398720704\n",
"epoch: 5100, acc: 0.813, loss: 0.356, lr: 0.9995922463308767\n",
"epoch: 5200, acc: 0.807, loss: 0.357, lr: 0.9995842529175265\n",
"epoch: 5300, acc: 0.807, loss: 0.370, lr: 0.9995762596320168\n",
"epoch: 5400, acc: 0.797, loss: 0.397, lr: 0.9995682664743444\n",
"epoch: 5500, acc: 0.803, loss: 0.351, lr: 0.9995602734445063\n",
"epoch: 5600, acc: 0.807, loss: 0.348, lr: 0.9995522805424993\n",
"epoch: 5700, acc: 0.810, loss: 0.354, lr: 0.9995442877683206\n",
"epoch: 5800, acc: 0.803, loss: 0.347, lr: 0.999536295121967\n",
"epoch: 5900, acc: 0.800, loss: 0.369, lr: 0.9995283026034353\n",
"epoch: 6000, acc: 0.803, loss: 0.348, lr: 0.9995203102127226\n",
"epoch: 6100, acc: 0.807, loss: 0.345, lr: 0.9995123179498259\n",
"epoch: 6200, acc: 0.810, loss: 0.346, lr: 0.9995043258147419\n",
"epoch: 6300, acc: 0.803, loss: 0.346, lr: 0.9994963338074676\n",
"epoch: 6400, acc: 0.807, loss: 0.346, lr: 0.9994883419280001\n",
"epoch: 6500, acc: 0.803, loss: 0.347, lr: 0.9994803501763364\n",
"epoch: 6600, acc: 0.800, loss: 0.359, lr: 0.9994723585524731\n",
"epoch: 6700, acc: 0.813, loss: 0.343, lr: 0.9994643670564072\n",
"epoch: 6800, acc: 0.810, loss: 0.342, lr: 0.9994563756881358\n",
"epoch: 6900, acc: 0.810, loss: 0.341, lr: 0.9994483844476557\n",
"epoch: 7000, acc: 0.813, loss: 0.342, lr: 0.9994403933349639\n",
"epoch: 7100, acc: 0.807, loss: 0.342, lr: 0.9994324023500574\n",
"epoch: 7200, acc: 0.807, loss: 0.348, lr: 0.999424411492933\n",
"epoch: 7300, acc: 0.803, loss: 0.342, lr: 0.9994164207635877\n",
"epoch: 7400, acc: 0.810, loss: 0.343, lr: 0.9994084301620185\n",
"epoch: 7500, acc: 0.810, loss: 0.340, lr: 0.9994004396882222\n",
"epoch: 7600, acc: 0.810, loss: 0.340, lr: 0.999392449342196\n",
"epoch: 7700, acc: 0.803, loss: 0.343, lr: 0.9993844591239364\n",
"epoch: 7800, acc: 0.800, loss: 0.345, lr: 0.9993764690334407\n",
"epoch: 7900, acc: 0.803, loss: 0.342, lr: 0.9993684790707056\n",
"epoch: 8000, acc: 0.807, loss: 0.364, lr: 0.9993604892357283\n",
"epoch: 8100, acc: 0.810, loss: 0.347, lr: 0.9993524995285055\n",
"epoch: 8200, acc: 0.777, loss: 0.452, lr: 0.9993445099490342\n",
"epoch: 8300, acc: 0.807, loss: 0.346, lr: 0.9993365204973114\n",
"epoch: 8400, acc: 0.810, loss: 0.342, lr: 0.999328531173334\n",
"epoch: 8500, acc: 0.807, loss: 0.359, lr: 0.9993205419770989\n",
"epoch: 8600, acc: 0.847, loss: 0.284, lr: 0.9993125529086031\n",
"epoch: 8700, acc: 0.863, loss: 0.272, lr: 0.9993045639678434\n",
"epoch: 8800, acc: 0.867, loss: 0.283, lr: 0.999296575154817\n",
"epoch: 8900, acc: 0.863, loss: 0.285, lr: 0.9992885864695206\n",
"epoch: 9000, acc: 0.860, loss: 0.265, lr: 0.9992805979119511\n",
"epoch: 9100, acc: 0.863, loss: 0.266, lr: 0.9992726094821057\n",
"epoch: 9200, acc: 0.867, loss: 0.263, lr: 0.9992646211799814\n",
"epoch: 9300, acc: 0.867, loss: 0.259, lr: 0.9992566330055745\n",
"epoch: 9400, acc: 0.867, loss: 0.269, lr: 0.9992486449588827\n",
"epoch: 9500, acc: 0.873, loss: 0.249, lr: 0.9992406570399023\n",
"epoch: 9600, acc: 0.877, loss: 0.246, lr: 0.9992326692486306\n",
"epoch: 9700, acc: 0.877, loss: 0.245, lr: 0.9992246815850646\n",
"epoch: 9800, acc: 0.877, loss: 0.244, lr: 0.999216694049201\n",
"epoch: 9900, acc: 0.877, loss: 0.243, lr: 0.9992087066410369\n",
"epoch: 10000, acc: 0.873, loss: 0.244, lr: 0.9992007193605691\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"# Create dataset\n",
"X, y = spiral_data(samples=100, classes=3)\n",
"\n",
"# Create Dense layer with 2 input features and 64 output values\n",
"dense1 = Layer_Dense(2, 64)\n",
"\n",
"# Create ReLU activation (to be used with Dense layer)\n",
"activation1 = Activation_ReLU()\n",
"\n",
"# Create second Dense layer with 64 input features (as we take output of previous layer here) and 3 output values (output values)\n",
"dense2 = Layer_Dense(64, 3)\n",
"\n",
"# Create Softmax classifier's combined loss and activation\n",
"loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
"\n",
"# Create optimizer\n",
"#optimizer = Optimizer_SGD(decay=8e-8, momentum=0.9)\n",
"#optimizer = Optimizer_Adagrad(decay=1e-4)\n",
"#optimizer = Optimizer_RMSprop(decay=1e-4)\n",
"#optimizer = Optimizer_RMSprop(learning_rate=0.02, decay=1e-5,rho=0.999)\n",
"\n",
"optimizer = Optimizer_Adam(learning_rate=0.02, decay=1e-5)\n",
"\n",
"# Train in loop\n",
"for epoch in range(10001):\n",
" # Perform a forward pass of our training data through this layer\n",
" dense1.forward(X)\n",
"\n",
" # Perform a forward pass through activation function\n",
" # takes the output of first dense layer here\n",
" activation1.forward(dense1.output)\n",
"\n",
" # Perform a forward pass through second Dense layer\n",
" # takes outputs of activation function of first layer as inputs\n",
" dense2.forward(activation1.output)\n",
"\n",
" # Perform a forward pass through the activation/loss function\n",
" # takes the output of second dense layer here and returns loss\n",
" loss = loss_activation.forward(dense2.output, y)\n",
"\n",
" # Calculate accuracy from output of activation2 and targets\n",
" # calculate values along first axis\n",
" predictions = np.argmax(loss_activation.output, axis=1)\n",
" if len(y.shape) == 2:\n",
" y = np.argmax(y, axis=1)\n",
" accuracy = np.mean(predictions == y)\n",
"\n",
" if not epoch % 100:\n",
" print(f'epoch: {epoch}, ' +\n",
" f'acc: {accuracy:.3f}, ' +\n",
" f'loss: {loss:.3f}, ' +\n",
" f'lr: {optimizer.current_learning_rate}')\n",
"\n",
" # Backward pass\n",
" loss_activation.backward(loss_activation.output, y)\n",
" dense2.backward(loss_activation.dinputs)\n",
" activation1.backward(dense2.dinputs)\n",
" dense1.backward(activation1.dinputs)\n",
"\n",
" # Update weights and biases\n",
" optimizer.pre_update_params()\n",
" optimizer.update_params(dense1)\n",
" optimizer.update_params(dense2)\n",
" optimizer.post_update_params()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "idw4i9VtFjmz"
},
"outputs": [],
"source": [
"#ADAM: 0.957\n",
"#RMSPROP: 0.717\n",
"#MOMENTUM: 0.873"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "9e9f8aRZFjmz"
},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}