{"cells":[{"cell_type":"markdown","metadata":{"id":"Ytg89BNEludG"},"source":["## BUILDING NEURAL NETWORKS FROM SCRATCH PART 1: CODING NEURONS AND LAYERS"]},{"cell_type":"markdown","metadata":{"id":"19SdgoojludI"},"source":["
\n","CODING OUR FIRST NEURON: 3 INPUTS\n","
"]},{"cell_type":"markdown","metadata":{"id":"OKROcwPSludI"},"source":["![Screenshot 2024-05-16 at 12.14.13 PM.png](attachment:889de6b2-5333-4c4d-a82a-aa820b0eed8a.png)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"a3w3A29VludJ","outputId":"da8b22aa-a485-4ece-cdc5-c1b322f1e4d3"},"outputs":[{"name":"stdout","output_type":"stream","text":["2.3\n"]}],"source":["inputs = [1, 2, 3]\n","weights = [0.2, 0.8, -0.5]\n","bias = 2\n","\n","outputs = (inputs[0]*weights[0] + inputs[1]*weights[1] + inputs[2]*weights[2] + bias)\n","\n","print(outputs)"]},{"cell_type":"markdown","metadata":{"id":"ZjzzTPJEludK"},"source":["
\n","CODING OUR SECOND NEURON: 4 INPUTS\n","
"]},{"cell_type":"markdown","metadata":{"id":"f6RIPhWuludK"},"source":["![Screenshot 2024-05-16 at 12.14.35 PM.png](attachment:96db3f0b-9cc8-4d44-ae28-29624144f3f5.png)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JkpHdc3XludK","outputId":"52bb2cd4-def1-4e5b-e008-2e9d76b14f57"},"outputs":[{"name":"stdout","output_type":"stream","text":["4.8\n"]}],"source":["inputs = [1.0, 2.0, 3.0, 2.5]\n","weights = [0.2, 0.8, -0.5, 1.0]\n","bias = 2.0\n","output = (inputs[0]*weights[0] +\n"," inputs[1]*weights[1] +\n"," inputs[2]*weights[2] +\n"," inputs[3]*weights[3] + bias)\n","\n","print(output)"]},{"cell_type":"markdown","metadata":{"id":"11gsmFGlludL"},"source":["
\n","CODING OUR FIRST LAYER\n","
"]},{"cell_type":"markdown","metadata":{"id":"hc1HsAqvludL"},"source":["![Screenshot 2024-05-16 at 12.15.01 PM.png](attachment:0a573927-c2fe-415b-9534-85f6510f635b.png)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Fxbt42LKludM","outputId":"9e9cb600-1ab7-4028-d2c0-91951c8271ac"},"outputs":[{"name":"stdout","output_type":"stream","text":["[4.8, 1.21, 2.385]\n"]}],"source":["inputs = [1, 2, 3, 2.5]\n","\n","weights = [[0.2, 0.8, -0.5, 1],\n"," [0.5, -0.91, 0.26, -0.5],\n"," [-0.26, -0.27, 0.17, 0.87]]\n","\n","weights1 = weights[0] #LIST OF WEIGHTS ASSOCIATED WITH 1ST NEURON : W11, W12, W13, W14\n","weights2 = weights[1] #LIST OF WEIGHTS ASSOCIATED WITH 2ND NEURON : W21, W22, W23, W24\n","weights3 = weights[2] #LIST OF WEIGHTS ASSOCIATED WITH 3RD NEURON : W31, W32, W33, W34\n","\n","biases = [2, 3, 0.5]\n","\n","bias1 = 2\n","bias2 = 3\n","bias3 = 0.5\n","\n","outputs = [\n"," # Neuron 1:\n"," inputs[0]*weights1[0] +\n"," inputs[1]*weights1[1] +\n"," inputs[2]*weights1[2] +\n"," inputs[3]*weights1[3] + bias1,\n"," # Neuron 2:\n"," inputs[0]*weights2[0] +\n"," inputs[1]*weights2[1] +\n"," inputs[2]*weights2[2] +\n"," inputs[3]*weights2[3] + bias2,\n"," # Neuron 3:\n"," inputs[0]*weights3[0] +\n"," inputs[1]*weights3[1] +\n"," inputs[2]*weights3[2] +\n"," inputs[3]*weights3[3] + bias3]\n","\n","print(outputs)"]},{"cell_type":"markdown","metadata":{"id":"9mjDX4XsludM"},"source":["
\n","\n","USING LOOPS FOR BETTER AND EASIER CODING
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8InCpdk8ludM","outputId":"8b37ea71-ea02-490e-a802-4643e81615f4"},"outputs":[{"name":"stdout","output_type":"stream","text":["[4.8, 1.21, 2.385]\n"]}],"source":["inputs = [1, 2, 3, 2.5]\n","\n","##LIST OF WEIGHTS\n","weights = [[0.2, 0.8, -0.5, 1],\n"," [0.5, -0.91, 0.26, -0.5],\n"," [-0.26, -0.27, 0.17, 0.87]]\n","\n","##LIST OF BIASES\n","biases = [2, 3, 0.5]\n","\n","# Output of current layer\n","layer_outputs = []\n","\n","# For each neuron\n","for neuron_weights, neuron_bias in zip(weights, biases):\n"," # Zeroed output of given neuron\n"," neuron_output = 0\n"," # For each input and weight to the neuron\n"," for n_input, weight in zip(inputs, neuron_weights):\n"," # Multiply this input by associated weight\n"," # and add to the neuron's output variable\n"," neuron_output += n_input*weight ## W31*X1 + W32*X2 + W33*X3 + W34*X4\n"," # Add bias\n"," neuron_output += neuron_bias ## ## W31*X1 + W32*X2 + W33*X3 + W34*X4 + B3\n"," # Put neuron's result to the layer's output list\n"," layer_outputs.append(neuron_output)\n","print(layer_outputs)"]},{"cell_type":"markdown","metadata":{"id":"huHJVOjnludM"},"source":["
\n","USING NUMPY\n","
"]},{"cell_type":"markdown","metadata":{"id":"eWR7z9J1ludM"},"source":["
\n","SINGLE NEURON USING NUMPY\n","
"]},{"cell_type":"markdown","metadata":{"id":"4gZjm_eqludN"},"source":["![Screenshot 2024-05-16 at 12.21.35 PM.png](attachment:15dc20d7-a498-4291-a8d1-546adb19f1df.png)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"KKwWT6aPludN","outputId":"b2de59ce-8e3a-4982-e2ab-945e69786806"},"outputs":[{"name":"stdout","output_type":"stream","text":["4.8\n"]}],"source":["import numpy as np\n","\n","inputs = [1.0, 2.0, 3.0, 2.5]\n","weights = [0.2, 0.8, -0.5, 1.0]\n","bias = 2.0\n","\n","# Convert lists to numpy arrays\n","inputs_array = np.array(inputs)\n","weights_array = np.array(weights)\n","\n","# Calculate the dot product and add the bias\n","outputs = np.dot(weights_array, inputs_array) + bias\n","\n","print(outputs)"]},{"cell_type":"markdown","metadata":{"id":"DxvV6bnqludN"},"source":["
\n","LAYER OF NEURONS USING NUMPY\n","
"]},{"cell_type":"markdown","metadata":{"id":"SXdqJAhkludN"},"source":["![Screenshot 2024-05-16 at 12.28.51 PM.png](attachment:1baffeb8-236f-4476-a62a-2faf2ef006af.png)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zX8Z0xhPludN"},"outputs":[],"source":["## In plain Python, we wrote this as a list of lists. With NumPy, this will be a 2-dimensional array, which we’ll call a matrix."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"aQal5RvvludN","outputId":"8de4003a-9a82-4676-c919-c813da0fb0fa"},"outputs":[{"name":"stdout","output_type":"stream","text":["[4.8 1.21 2.385]\n"]}],"source":["import numpy as np\n","\n","inputs = [1.0, 2.0, 3.0, 2.5]\n","weights = [[0.2, 0.8, -0.5, 1],\n"," [0.5, -0.91, 0.26, -0.5],\n"," [-0.26, -0.27, 0.17, 0.87]]\n","biases = [2.0, 3.0, 0.5]\n","\n","# Convert lists to numpy arrays\n","inputs_array = np.array(inputs)\n","weights_array = np.array(weights)\n","biases_array = np.array(biases)\n","\n","# Calculate the dot product and add the biases\n","layer_outputs = np.dot(weights_array, inputs_array) + biases_array\n","print(layer_outputs)"]},{"cell_type":"markdown","metadata":{"id":"pP1RSWwhludN"},"source":["
\n","LAYER OF NEURONS AND BATCH OF DATA USING NUMPY\n","
"]},{"cell_type":"markdown","metadata":{"id":"NalrSqm3ludN"},"source":["![Screenshot 2024-05-16 at 12.40.55 PM.png](attachment:a823d9f6-e42c-451f-adee-be153cc12000.png)"]},{"cell_type":"markdown","metadata":{"id":"uiYRBs22ludN"},"source":["
\n","\n","NEED TO TAKE TRANSPOSE OF WEIGHT MATRIX
"]},{"cell_type":"markdown","metadata":{"id":"sY2UHGabludN"},"source":["![Screenshot 2024-05-16 at 12.41.11 PM.png](attachment:45a2d43b-5bdc-4752-8e55-9a60c2e459ca.png)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cfVhmzzsludN","outputId":"64bac98d-d82f-47b6-d182-3f223bfce827"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 4.8 1.21 2.385]\n"," [ 8.9 -1.81 0.2 ]\n"," [ 1.41 1.051 0.026]]\n"]}],"source":["import numpy as np\n","\n","inputs = [[1.0, 2.0, 3.0, 2.5],\n"," [2.0, 5.0, -1.0, 2.0],\n"," [-1.5, 2.7, 3.3, -0.8]]\n","weights = [[0.2, 0.8, -0.5, 1],\n"," [0.5, -0.91, 0.26, -0.5],\n"," [-0.26, -0.27, 0.17, 0.87]]\n","biases = [2.0, 3.0, 0.5]\n","\n","# Convert lists to numpy arrays\n","inputs_array = np.array(inputs)\n","weights_array = np.array(weights)\n","biases_array = np.array(biases)\n","\n","# Calculate the dot product and add the biases\n","outputs = np.dot(inputs_array, weights_array.T) + biases_array\n","print(outputs)\n"]},{"cell_type":"markdown","metadata":{"id":"sV9DgSOzludO"},"source":["
\n","2 LAYERS AND BATCH OF DATA USING NUMPY\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5TujKPLuludO","outputId":"b5798ff9-b7b6-4622-d40f-f325655925c9"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 0.5031 -1.04185 -2.03875]\n"," [ 0.2434 -2.7332 -5.7633 ]\n"," [-0.99314 1.41254 -0.35655]]\n"]}],"source":["import numpy as np\n","\n","inputs = [[1, 2, 3, 2.5],\n"," [2., 5., -1., 2],\n"," [-1.5, 2.7, 3.3, -0.8]]\n","\n","weights = [[0.2, 0.8, -0.5, 1],\n"," [0.5, -0.91, 0.26, -0.5],\n"," [-0.26, -0.27, 0.17, 0.87]]\n","\n","biases = [2, 3, 0.5]\n","\n","weights2 = [[0.1, -0.14, 0.5],\n"," [-0.5, 0.12, -0.33],\n"," [-0.44, 0.73, -0.13]]\n","\n","biases2 = [-1, 2, -0.5]\n","\n","# Convert lists to numpy arrays\n","inputs_array = np.array(inputs)\n","weights_array = np.array(weights)\n","biases_array = np.array(biases)\n","weights2_array = np.array(weights2)\n","biases2_array = np.array(biases2)\n","\n","# Calculate the output of the first layer\n","layer1_outputs = np.dot(inputs_array, weights_array.T) + biases_array\n","\n","# Calculate the output of the second layer\n","layer2_outputs = np.dot(layer1_outputs, weights2_array.T) + biases2_array\n","\n","print(layer2_outputs)\n"]},{"cell_type":"markdown","metadata":{"id":"yXfGkMp0ludO"},"source":["
\n","GENERATING NON LINEAR TRAINING DATA\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"-qnZH2tGludO","outputId":"5bba95c2-dcc9-4fa6-bb06-060dc47a38a0"},"outputs":[{"data":{"image/png":"","text/plain":["
"]},"metadata":{},"output_type":"display_data"}],"source":["from nnfs.datasets import spiral_data\n","import numpy as np\n","import nnfs\n","nnfs.init()\n","import matplotlib.pyplot as plt\n","X, y = spiral_data(samples=100, classes=3)\n","plt.scatter(X[:, 0], X[:, 1])\n","plt.show()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"XnQolruAludO","outputId":"eb2da66c-ec4b-4b21-f105-ded2665ccab4"},"outputs":[{"data":{"image/png":"","text/plain":["
"]},"metadata":{},"output_type":"display_data"}],"source":["plt.scatter(X[:, 0], X[:, 1], c=y, cmap='brg')\n","plt.show()"]},{"cell_type":"markdown","metadata":{"id":"vlpw27zXludO"},"source":["
\n","\n","The neural network will not be aware of the color differences as the data have no class encodings
"]},{"cell_type":"markdown","metadata":{"id":"0CkhNCPyludO"},"source":["
\n","DENSE LAYER CLASS\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"8T7WK7LGludO","outputId":"e6c503ab-1b5b-4a35-8f7b-f90d23a344de"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 0.0000000e+00 0.0000000e+00 0.0000000e+00]\n"," [-1.0475188e-04 1.1395361e-04 -4.7983500e-05]\n"," [-2.7414842e-04 3.1729150e-04 -8.6921798e-05]\n"," [-4.2188365e-04 5.2666257e-04 -5.5912682e-05]\n"," [-5.7707680e-04 7.1401405e-04 -8.9430439e-05]]\n"]}],"source":["import numpy as np\n","import nnfs\n","from nnfs.datasets import spiral_data\n","nnfs.init()\n","# Dense layer\n","class Layer_Dense:\n"," # Layer initialization\n"," def __init__(self, n_inputs, n_neurons):\n"," # Initialize weights and biases\n"," self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)\n"," self.biases = np.zeros((1, n_neurons))\n","\n"," # Forward pass\n"," def forward(self, inputs):\n"," # Calculate output values from inputs, weights and biases\n"," self.output = np.dot(inputs, self.weights) + self.biases\n","\n","# Create dataset\n","X, y = spiral_data(samples=100, classes=3)\n","# Create Dense layer with 2 input features and 3 output values\n","dense1 = Layer_Dense(2, 3)\n","# Perform a forward pass of our training data through this layer\n","dense1.forward(X)\n","\n","\n","# Let's see output of the first few samples:\n","print(dense1.output[:5])\n",""]},{"cell_type":"markdown","metadata":{"id":"W3clSyLSludO"},"source":["
\n","ACTIVATION FUNCTION: RELU\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cuwXD_ypludP","outputId":"49568dbc-31d9-46ed-aecd-e8e6fb337a3c"},"outputs":[{"name":"stdout","output_type":"stream","text":["[0. 2. 0. 3.3 0. 1.1 2.2 0. ]\n"]}],"source":["import numpy as np\n","inputs = [0, 2, -1, 3.3, -2.7, 1.1, 2.2, -100]\n","output = np.maximum(0, inputs)\n","print(output)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Lzc6OOkXludP"},"outputs":[],"source":["# ReLU activation\n","class Activation_ReLU:\n"," # Forward pass\n"," def forward(self, inputs):\n"," # Calculate output values from input\n"," self.output = np.maximum(0, inputs)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"D-REY-BIludP","outputId":"3bf738bd-9e7f-400d-85e1-2d893c7d76e3"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[0. 0. 0.]\n"," [0. 0. 0.]\n"," [0. 0. 0.]\n"," [0. 0. 0.]\n"," [0. 0. 0.]]\n"]}],"source":["# Create dataset\n","X, y = spiral_data(samples=100, classes=3)\n","# Create Dense layer with 2 input features and 3 output values\n","dense1 = Layer_Dense(2, 3)\n","# Create ReLU activation (to be used with Dense layer):\n","activation1 = Activation_ReLU()\n","# Make a forward pass of our training data through this layer\n","dense1.forward(X)\n","# Forward pass through activation func.\n","# Takes in output from previous layer\n","activation1.forward(dense1.output)\n","# Let's see output of the first few samples:\n","print(activation1.output[:5])"]},{"cell_type":"markdown","metadata":{"id":"mhK7iYZZludP"},"source":["
\n","ACTIVATION FUNCTION: SOFTMAX\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jzdok_-qludP","outputId":"3ccab8c9-10b9-4a52-c447-8453d2132f89"},"outputs":[{"name":"stdout","output_type":"stream","text":["45\n","[12 15 18]\n","(3,)\n","[ 6 15 24]\n","(3,)\n","[[12 15 18]]\n","(1, 3)\n","[[ 6]\n"," [15]\n"," [24]]\n","(3, 1)\n","[7 8 9]\n","[3 6 9]\n"]}],"source":["### TRY THESE EXERCISES FOR YOURSELF!\n","\n","A = [[1, 2, 3], [4, 5, 6], [7, 8,9]]\n","print(np.sum(A))\n","\n","print(np.sum(A, axis = 0))\n","print(np.sum(A, axis = 0).shape)\n","\n","print(np.sum(A, axis = 1))\n","print(np.sum(A, axis = 1).shape)\n","\n","print(np.sum(A, axis = 0,keepdims = True))\n","print(np.sum(A, axis = 0,keepdims = True).shape)\n","\n","print(np.sum(A, axis = 1,keepdims = True))\n","print(np.sum(A, axis = 1,keepdims = True).shape)\n","\n","print(np.max(A, axis = 0))\n","print(np.max(A, axis = 1))"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"VM6nce0CludT","outputId":"1acd78dc-2a60-4a14-f9a9-b357fc093856"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[0.06414769 0.17437149 0.47399085 0.28748998]\n"," [0.04517666 0.90739747 0.00224921 0.04517666]\n"," [0.00522984 0.34875873 0.63547983 0.0105316 ]]\n"]},{"data":{"text/plain":["array([1., 1., 1.])"]},"execution_count":16,"metadata":{},"output_type":"execute_result"}],"source":["inputs = [[1, 2, 3, 2.5],\n"," [2., 5., -1., 2],\n"," [-1.5, 2.7, 3.3, -0.8]]\n","\n","# Get unnormalized probabilities\n","exp_values = np.exp(inputs - np.max(inputs, axis=1,keepdims=True))\n"," # Normalize them for each sample\n","probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)\n","print(probabilities)\n","np.sum(probabilities, axis = 1)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"o_ktaZPmludT"},"outputs":[],"source":["# Softmax activation\n","class Activation_Softmax:\n"," # Forward pass\n"," def forward(self, inputs):\n"," # Get unnormalized probabilities\n"," exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))\n"," # Normalize them for each sample\n"," probabilities = exp_values / np.sum(exp_values, axis=1,keepdims=True)\n"," self.output = probabilities"]},{"cell_type":"markdown","metadata":{"id":"CEa72KdFludT"},"source":["
\n","ONE FORWARD PASS (WITHOUT LOSS)\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Ah-gZ5CvludT","outputId":"7bf1d50b-7198-4ad2-81a0-9d5171d97b36"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[0.33333334 0.33333334 0.33333334]\n"," [0.33333364 0.3333334 0.3333329 ]\n"," [0.33333385 0.3333335 0.33333266]\n"," [0.33333433 0.3333336 0.33333206]\n"," [0.33333462 0.33333373 0.33333164]]\n"]}],"source":["# Create dataset\n","X, y = spiral_data(samples=100, classes=3)\n","# Create Dense layer with 2 input features and 3 output values\n","dense1 = Layer_Dense(2, 3)\n","# Create ReLU activation (to be used with Dense layer):\n","activation1 = Activation_ReLU()\n","# Create second Dense layer with 3 input features (as we take output\n","# of previous layer here) and 3 output values\n","dense2 = Layer_Dense(3, 3)\n","# Create Softmax activation (to be used with Dense layer):\n","activation2 = Activation_Softmax()\n","\n","# Make a forward pass of our training data through this layer\n","dense1.forward(X)\n","\n","# Make a forward pass through activation function\n","# it takes the output of first dense layer here\n","activation1.forward(dense1.output)\n","# Make a forward pass through second Dense layer\n","# it takes outputs of activation function of first layer as inputs\n","dense2.forward(activation1.output)\n","# Make a forward pass through activation function\n","# it takes the output of second dense layer here\n","activation2.forward(dense2.output)\n","# Let's see output of the first few samples:\n","print(activation2.output[:5])"]},{"cell_type":"markdown","metadata":{"id":"wcgFltXVludT"},"source":["
\n","CALCULATING NETWORK ERROR WITH LOSS \n","
"]},{"cell_type":"markdown","metadata":{"id":"h2yMJAbuludT"},"source":["
\n","CROSS ENTROPY LOSS BUILDING BLOCKS IN PYTHON\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zSZu6R0fludT","outputId":"9f31e517-45db-402e-f35e-edd6be8cb196"},"outputs":[{"name":"stdout","output_type":"stream","text":["[0.7 0.5 0.9]\n"]}],"source":["softmax_outputs = np.array([[0.7, 0.1, 0.2],\n"," [0.1, 0.5, 0.4],\n"," [0.02, 0.9, 0.08]])\n","class_targets = [0, 1, 1]\n","print(softmax_outputs[range(len(softmax_outputs)), class_targets])"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"aR9RTJtMludT","outputId":"1f148eb4-ac2a-4db3-ba33-1dfe7f05ad0f"},"outputs":[{"data":{"text/plain":["range(0, 3)"]},"execution_count":20,"metadata":{},"output_type":"execute_result"}],"source":["range(len(softmax_outputs))"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"okqQYTxWludU","outputId":"9e605e43-9a31-4fa2-91e9-8c23776b8062"},"outputs":[{"name":"stdout","output_type":"stream","text":["[0.35667494 0.69314718 0.10536052]\n","0.38506088005216804\n"]}],"source":["print(-np.log(softmax_outputs[\n"," range(len(softmax_outputs)), class_targets\n","]))\n","neg_log = -np.log(softmax_outputs[\n"," range(len(softmax_outputs)), class_targets\n"," ])\n","average_loss = np.mean(neg_log)\n","print(average_loss)"]},{"cell_type":"markdown","metadata":{"id":"9ptqANA8ludU"},"source":["
\n","IF DATA IS ONE HOT ENCODED, HOW TO EXTRACT THE RELEVANT PREDICTIONS\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"bGcXCIuuludU","outputId":"808a8008-bfa8-4504-8c5f-1c6bae82049b"},"outputs":[{"name":"stdout","output_type":"stream","text":["[0.35667494 0.69314718 0.10536052]\n","0.38506088005216804\n"]}],"source":["y_true_check = np.array([\n"," [1, 0, 0],\n"," [0, 1, 0],\n"," [0, 1, 0]\n","])\n","\n","y_pred_clipped_check = np.array([\n"," [0.7, 0.2, 0.1],\n"," [0.1, 0.5, 0.4],\n"," [0.02, 0.9, 0.08]\n","])\n","\n","A = y_true_check*y_pred_clipped_check\n","B = np.sum(A, axis = 1)\n","C = - np.log(B)\n","\n","print(C)\n","print(np.mean(C))\n","\n"]},{"cell_type":"markdown","metadata":{"id":"13DeLj4AludU"},"source":["
\n","IMPLEMENTING THE LOSS CLASS\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"EmdbYMj3ludU"},"outputs":[],"source":["# Common loss class\n","class Loss:\n"," # Calculates the data and regularization losses\n"," # given model output and ground truth values\n"," def calculate(self, output, y):\n"," # Calculate sample losses\n"," sample_losses = self.forward(output, y)\n"," # Calculate mean loss\n"," data_loss = np.mean(sample_losses)\n"," # Return loss\n"," return data_loss"]},{"cell_type":"markdown","metadata":{"id":"s6-iBSpDludU"},"source":["
\n","IMPLEMENTING THE CATEGORICAL CROSS ENTROPY CLASS\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"3Tt0k-P0ludU"},"outputs":[],"source":["# Cross-entropy loss\n","class Loss_CategoricalCrossentropy(Loss):\n"," # Forward pass\n"," def forward(self, y_pred, y_true):\n"," # Number of samples in a batch\n"," samples = len(y_pred)\n"," # Clip data to prevent division by 0\n"," # Clip both sides to not drag mean towards any value\n"," y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)\n"," # Probabilities for target values -\n"," # only if categorical labels\n"," if len(y_true.shape) == 1:\n"," correct_confidences = y_pred_clipped[\n"," range(samples),\n"," y_true\n"," ]\n"," # Mask values - only for one-hot encoded labels\n"," elif len(y_true.shape) == 2:\n"," correct_confidences = np.sum(\n"," y_pred_clipped*y_true,\n"," axis=1\n"," )\n"," # Losses\n"," negative_log_likelihoods = -np.log(correct_confidences)\n"," return negative_log_likelihoods"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"NPc9GhTJludU","outputId":"79f269f3-08cc-4b3f-ed4c-0fd85b8242ab"},"outputs":[{"name":"stdout","output_type":"stream","text":["0.38506088005216804\n"]}],"source":["softmax_outputs = np.array([[0.7, 0.1, 0.2],\n"," [0.1, 0.5, 0.4],\n"," [0.02, 0.9, 0.08]])\n","class_targets = np.array([[1, 0, 0],\n"," [0, 1, 0],\n"," [0, 1, 0]])\n","loss_function = Loss_CategoricalCrossentropy()\n","loss = loss_function.calculate(softmax_outputs, class_targets)\n","print(loss)"]},{"cell_type":"markdown","metadata":{"id":"cMKAJfBGludU"},"source":["
\n","FULL CODE UPTO THIS POINT\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ArW5N5NnludU","outputId":"42d51b1e-e3e3-4d0b-9b90-f183cc4f091f"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[0.33333334 0.33333334 0.33333334]\n"," [0.3333341 0.33333302 0.3333329 ]\n"," [0.3333341 0.33333302 0.33333296]\n"," [0.3333341 0.333333 0.33333293]\n"," [0.3333364 0.33333203 0.33333158]]\n","loss: 1.0986193\n","acc: 0.28\n"]}],"source":["# Create dataset\n","X, y = spiral_data(samples=100, classes=3)\n","# Create Dense layer with 2 input features and 3 output values\n","dense1 = Layer_Dense(2, 3)\n","# Create ReLU activation (to be used with Dense layer):\n","activation1 = Activation_ReLU()\n","# Create second Dense layer with 3 input features (as we take output\n","# of previous layer here) and 3 output values\n","dense2 = Layer_Dense(3, 3)\n","# Create Softmax activation (to be used with Dense layer):\n","activation2 = Activation_Softmax()\n","# Create loss function\n","loss_function = Loss_CategoricalCrossentropy()\n","\n","\n","# Perform a forward pass of our training data through this layer\n","dense1.forward(X)\n","# Perform a forward pass through activation function\n","# it takes the output of first dense layer here\n","activation1.forward(dense1.output)\n","\n","# Perform a forward pass through second Dense layer\n","# it takes outputs of activation function of first layer as inputs\n","dense2.forward(activation1.output)\n","# Perform a forward pass through activation function\n","# it takes the output of second dense layer here\n","activation2.forward(dense2.output)\n","# Let's see output of the first few samples:\n","print(activation2.output[:5])\n","# Perform a forward pass through activation function\n","# it takes the output of second dense layer here and returns loss\n","loss = loss_function.calculate(activation2.output, y)\n","# Print loss value\n","print('loss:', loss)\n","\n","# Calculate accuracy from output of activation2 and targets\n","# calculate values along first axis\n","predictions = np.argmax(activation2.output, axis=1)\n","if len(y.shape) == 2:\n"," y = np.argmax(y, axis=1)\n","accuracy = np.mean(predictions == y)\n","# Print accuracy\n","print('acc:', accuracy)"]},{"cell_type":"markdown","metadata":{"id":"jUTzqWfUludU"},"source":["
\n","INTRODUCING ACCURACY \n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"-9kgV38tludU","outputId":"1920a77a-366f-4f12-9b31-b85e50df6d64"},"outputs":[{"name":"stdout","output_type":"stream","text":["acc: 1.0\n"]}],"source":["import numpy as np\n","# Probabilities of 3 samples\n","softmax_outputs = np.array([[0.7, 0.2, 0.1],\n"," [0.1, 0.5, 0.4],\n"," [0.02, 0.9, 0.08]])\n","# Target (ground-truth) labels for 3 samples\n","class_targets = np.array([0, 1, 1])\n","# Calculate values along second axis (axis of index 1)\n","predictions = np.argmax(softmax_outputs, axis=1)\n","# If targets are one-hot encoded - convert them\n","if len(class_targets.shape) == 2:\n"," class_targets = np.argmax(class_targets, axis=1)\n","# True evaluates to 1; False to 0\n","accuracy = np.mean(predictions == class_targets)\n","print('acc:', accuracy)"]},{"cell_type":"markdown","metadata":{"id":"ffbmCts_ludU"},"source":["
\n","THE NEED FOR OPTIMIZATION \n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"DJ5M282eludV","outputId":"5f47b561-d7c7-4dec-c877-ef93e7d306b4"},"outputs":[{"data":{"image/png":"","text/plain":["
"]},"metadata":{},"output_type":"display_data"}],"source":["#SIMPLER DATASET\n","import matplotlib.pyplot as plt\n","import nnfs\n","from nnfs.datasets import vertical_data\n","nnfs.init()\n","X, y = vertical_data(samples=100, classes=3)\n","plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap='brg')\n","plt.show()"]},{"cell_type":"markdown","metadata":{"id":"jxbgeGWnludV"},"source":["
\n","STRATEGY 1: RANDOMLY SELECT WEIGHTS AND BIASES - DOES NOT WORK!\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Dgmi2YlKludV","outputId":"7dfc3a29-c1e8-40ed-9ca3-cf9541026cd9"},"outputs":[{"name":"stdout","output_type":"stream","text":["New set of weights found, iteration: 0 loss: 1.1016203 acc: 0.3333333333333333\n","New set of weights found, iteration: 1 loss: 1.1002508 acc: 0.3333333333333333\n","New set of weights found, iteration: 2 loss: 1.0992025 acc: 0.3333333333333333\n","New set of weights found, iteration: 3 loss: 1.0986239 acc: 0.3333333333333333\n","New set of weights found, iteration: 10 loss: 1.0984299 acc: 0.3333333333333333\n","New set of weights found, iteration: 22 loss: 1.0976521 acc: 0.36333333333333334\n","New set of weights found, iteration: 150 loss: 1.0974255 acc: 0.3333333333333333\n","New set of weights found, iteration: 874 loss: 1.0972673 acc: 0.3333333333333333\n","New set of weights found, iteration: 894 loss: 1.096895 acc: 0.3333333333333333\n","New set of weights found, iteration: 1036 loss: 1.095428 acc: 0.3333333333333333\n","New set of weights found, iteration: 88633 loss: 1.0952065 acc: 0.3333333333333333\n"]}],"source":["# Create dataset\n","X, y = vertical_data(samples=100, classes=3)\n","# Create model\n","dense1 = Layer_Dense(2, 3) # first dense layer, 2 inputs\n","activation1 = Activation_ReLU()\n","dense2 = Layer_Dense(3, 3) # second dense layer, 3 inputs, 3 outputs\n","activation2 = Activation_Softmax()\n","# Create loss function\n","loss_function = Loss_CategoricalCrossentropy()\n","\n","# Helper variables\n","lowest_loss = 9999999 # some initial value\n","best_dense1_weights = dense1.weights.copy()\n","best_dense1_biases = dense1.biases.copy()\n","best_dense2_weights = dense2.weights.copy()\n","best_dense2_biases = dense2.biases.copy()\n","\n","for iteration in range(100000):\n"," # Generate a new set of weights for iteration\n"," dense1.weights = 0.05 * np.random.randn(2, 3)\n"," dense1.biases = 0.05 * np.random.randn(1, 3)\n"," dense2.weights = 0.05 * np.random.randn(3, 3)\n"," dense2.biases = 0.05 * np.random.randn(1, 3)\n"," # Perform a forward pass of the training data through this layer\n"," dense1.forward(X)\n"," activation1.forward(dense1.output)\n"," dense2.forward(activation1.output)\n"," activation2.forward(dense2.output)\n"," # Perform a forward pass through activation function\n"," # it takes the output of second dense layer here and returns loss\n"," loss = loss_function.calculate(activation2.output, y)\n"," # Calculate accuracy from output of activation2 and targets\n"," # calculate values along first axis\n"," predictions = np.argmax(activation2.output, axis=1)\n"," accuracy = np.mean(predictions == y)\n"," # If loss is smaller - print and save weights and biases aside\n"," if loss < lowest_loss:\n"," print('New set of weights found, iteration:', iteration,'loss:', loss, 'acc:', accuracy)\n"," best_dense1_weights = dense1.weights.copy()\n"," best_dense1_biases = dense1.biases.copy()\n"," best_dense2_weights = dense2.weights.copy()\n"," best_dense2_biases = dense2.biases.copy()\n"," lowest_loss = loss"]},{"cell_type":"markdown","metadata":{"id":"YgWpa0BKludV"},"source":["
\n","STRATEGY 2: RANDOMLY ADJUST WEIGHTS AND BIASES - WORKS!\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1zGlYl3QludV","outputId":"8fca285e-1ba2-40a1-fd09-52ef09f0f9bb"},"outputs":[{"name":"stdout","output_type":"stream","text":["New set of weights found, iteration: 0 loss: 1.1008747 acc: 0.3333333333333333\n","New set of weights found, iteration: 3 loss: 1.1005714 acc: 0.3333333333333333\n","New set of weights found, iteration: 4 loss: 1.099462 acc: 0.3333333333333333\n","New set of weights found, iteration: 9 loss: 1.0994359 acc: 0.3333333333333333\n","New set of weights found, iteration: 10 loss: 1.09855 acc: 0.3333333333333333\n","New set of weights found, iteration: 13 loss: 1.098517 acc: 0.3333333333333333\n","New set of weights found, iteration: 14 loss: 1.0938607 acc: 0.3333333333333333\n","New set of weights found, iteration: 15 loss: 1.0920315 acc: 0.3333333333333333\n","New set of weights found, iteration: 17 loss: 1.091391 acc: 0.3333333333333333\n","New set of weights found, iteration: 19 loss: 1.0910357 acc: 0.3333333333333333\n","New set of weights found, iteration: 20 loss: 1.0898421 acc: 0.3333333333333333\n","New set of weights found, iteration: 21 loss: 1.0843327 acc: 0.3333333333333333\n","New set of weights found, iteration: 26 loss: 1.0835577 acc: 0.3333333333333333\n","New set of weights found, iteration: 27 loss: 1.0823517 acc: 0.3333333333333333\n","New set of weights found, iteration: 28 loss: 1.0778279 acc: 0.3333333333333333\n","New set of weights found, iteration: 31 loss: 1.076321 acc: 0.3333333333333333\n","New set of weights found, iteration: 35 loss: 1.0729524 acc: 0.3\n","New set of weights found, iteration: 36 loss: 1.0699975 acc: 0.3333333333333333\n","New set of weights found, iteration: 47 loss: 1.0631136 acc: 0.3333333333333333\n","New set of weights found, iteration: 52 loss: 1.062574 acc: 0.3333333333333333\n","New set of weights found, iteration: 53 loss: 1.0624933 acc: 0.3333333333333333\n","New set of weights found, iteration: 54 loss: 1.0592291 acc: 0.3333333333333333\n","New set of weights found, iteration: 57 loss: 1.0574998 acc: 0.3333333333333333\n","New set of weights found, iteration: 58 loss: 1.0493913 acc: 0.38\n","New set of weights found, iteration: 64 loss: 1.0450443 acc: 0.65\n","New set of weights found, iteration: 68 loss: 1.0377563 acc: 0.65\n","New set of weights found, iteration: 70 loss: 1.0333902 acc: 0.66\n","New set of weights found, iteration: 71 loss: 1.0315365 acc: 0.63\n","New set of weights found, iteration: 74 loss: 1.0283976 acc: 0.6033333333333334\n","New set of weights found, iteration: 75 loss: 1.0214951 acc: 0.6133333333333333\n","New set of weights found, iteration: 82 loss: 1.0101981 acc: 0.6466666666666666\n","New set of weights found, iteration: 84 loss: 1.0100108 acc: 0.5733333333333334\n","New set of weights found, iteration: 90 loss: 1.0035288 acc: 0.7133333333333334\n","New set of weights found, iteration: 92 loss: 1.0008268 acc: 0.65\n","New set of weights found, iteration: 95 loss: 0.99175525 acc: 0.6\n","New set of weights found, iteration: 98 loss: 0.9812336 acc: 0.66\n","New set of weights found, iteration: 101 loss: 0.97777444 acc: 0.58\n","New set of weights found, iteration: 106 loss: 0.977026 acc: 0.67\n","New set of weights found, iteration: 110 loss: 0.97546613 acc: 0.6766666666666666\n","New set of weights found, iteration: 111 loss: 0.9656759 acc: 0.9066666666666666\n","New set of weights found, iteration: 115 loss: 0.96165353 acc: 0.8733333333333333\n","New set of weights found, iteration: 116 loss: 0.94877195 acc: 0.9\n","New set of weights found, iteration: 122 loss: 0.93169373 acc: 0.8766666666666667\n","New set of weights found, iteration: 127 loss: 0.9187146 acc: 0.81\n","New set of weights found, iteration: 131 loss: 0.91708404 acc: 0.6666666666666666\n","New set of weights found, iteration: 136 loss: 0.9127953 acc: 0.6933333333333334\n","New set of weights found, iteration: 139 loss: 0.90787965 acc: 0.6666666666666666\n","New set of weights found, iteration: 141 loss: 0.9043988 acc: 0.66\n","New set of weights found, iteration: 146 loss: 0.90065634 acc: 0.6666666666666666\n","New set of weights found, iteration: 147 loss: 0.89855623 acc: 0.6533333333333333\n","New set of weights found, iteration: 149 loss: 0.8932796 acc: 0.69\n","New set of weights found, iteration: 152 loss: 0.87403554 acc: 0.84\n","New set of weights found, iteration: 153 loss: 0.8707889 acc: 0.7666666666666667\n","New set of weights found, iteration: 156 loss: 0.8667261 acc: 0.88\n","New set of weights found, iteration: 158 loss: 0.8490861 acc: 0.87\n","New set of weights found, iteration: 162 loss: 0.84764665 acc: 0.8333333333333334\n","New set of weights found, iteration: 165 loss: 0.8428589 acc: 0.8033333333333333\n","New set of weights found, iteration: 166 loss: 0.8239612 acc: 0.66\n","New set of weights found, iteration: 167 loss: 0.7897708 acc: 0.7\n","New set of weights found, iteration: 171 loss: 0.7849403 acc: 0.6733333333333333\n","New set of weights found, iteration: 172 loss: 0.782472 acc: 0.6633333333333333\n","New set of weights found, iteration: 173 loss: 0.7803537 acc: 0.6633333333333333\n","New set of weights found, iteration: 175 loss: 0.7747049 acc: 0.67\n","New set of weights found, iteration: 178 loss: 0.77419984 acc: 0.6666666666666666\n","New set of weights found, iteration: 179 loss: 0.75924987 acc: 0.7233333333333334\n","New set of weights found, iteration: 183 loss: 0.7520049 acc: 0.67\n","New set of weights found, iteration: 187 loss: 0.7399888 acc: 0.7366666666666667\n","New set of weights found, iteration: 188 loss: 0.73904145 acc: 0.8333333333333334\n","New set of weights found, iteration: 190 loss: 0.7320137 acc: 0.8266666666666667\n","New set of weights found, iteration: 195 loss: 0.712903 acc: 0.69\n","New set of weights found, iteration: 198 loss: 0.70528257 acc: 0.7266666666666667\n","New set of weights found, iteration: 199 loss: 0.70251924 acc: 0.6966666666666667\n","New set of weights found, iteration: 200 loss: 0.69334394 acc: 0.72\n","New set of weights found, iteration: 201 loss: 0.6803275 acc: 0.79\n","New set of weights found, iteration: 205 loss: 0.6770639 acc: 0.7666666666666667\n","New set of weights found, iteration: 206 loss: 0.66997415 acc: 0.8733333333333333\n","New set of weights found, iteration: 207 loss: 0.6599941 acc: 0.8066666666666666\n","New set of weights found, iteration: 208 loss: 0.65543926 acc: 0.7766666666666666\n","New set of weights found, iteration: 209 loss: 0.62252766 acc: 0.8033333333333333\n","New set of weights found, iteration: 210 loss: 0.61185175 acc: 0.7766666666666666\n","New set of weights found, iteration: 211 loss: 0.599142 acc: 0.8766666666666667\n","New set of weights found, iteration: 213 loss: 0.5920534 acc: 0.8766666666666667\n","New set of weights found, iteration: 222 loss: 0.5918576 acc: 0.8533333333333334\n","New set of weights found, iteration: 229 loss: 0.57633215 acc: 0.8533333333333334\n","New set of weights found, iteration: 234 loss: 0.55926883 acc: 0.8933333333333333\n","New set of weights found, iteration: 239 loss: 0.5579059 acc: 0.83\n","New set of weights found, iteration: 243 loss: 0.55486155 acc: 0.8466666666666667\n","New set of weights found, iteration: 246 loss: 0.5507333 acc: 0.8866666666666667\n","New set of weights found, iteration: 247 loss: 0.545519 acc: 0.8833333333333333\n","New set of weights found, iteration: 248 loss: 0.5346363 acc: 0.86\n","New set of weights found, iteration: 264 loss: 0.5342746 acc: 0.83\n","New set of weights found, iteration: 265 loss: 0.5253426 acc: 0.8866666666666667\n","New set of weights found, iteration: 267 loss: 0.51280546 acc: 0.9066666666666666\n","New set of weights found, iteration: 269 loss: 0.50337905 acc: 0.9233333333333333\n","New set of weights found, iteration: 276 loss: 0.5024293 acc: 0.89\n","New set of weights found, iteration: 278 loss: 0.491296 acc: 0.9133333333333333\n","New set of weights found, iteration: 283 loss: 0.48956776 acc: 0.9033333333333333\n","New set of weights found, iteration: 284 loss: 0.48599878 acc: 0.93\n","New set of weights found, iteration: 289 loss: 0.48009065 acc: 0.9266666666666666\n","New set of weights found, iteration: 290 loss: 0.47792414 acc: 0.9233333333333333\n","New set of weights found, iteration: 291 loss: 0.46914592 acc: 0.9133333333333333\n","New set of weights found, iteration: 298 loss: 0.46764258 acc: 0.9266666666666666\n","New set of weights found, iteration: 300 loss: 0.45708776 acc: 0.8933333333333333\n","New set of weights found, iteration: 301 loss: 0.45304757 acc: 0.8966666666666666\n","New set of weights found, iteration: 304 loss: 0.45023417 acc: 0.9266666666666666\n","New set of weights found, iteration: 308 loss: 0.44732147 acc: 0.8833333333333333\n","New set of weights found, iteration: 309 loss: 0.44072458 acc: 0.88\n","New set of weights found, iteration: 313 loss: 0.4319237 acc: 0.8933333333333333\n","New set of weights found, iteration: 318 loss: 0.42440805 acc: 0.92\n","New set of weights found, iteration: 320 loss: 0.41862 acc: 0.92\n","New set of weights found, iteration: 324 loss: 0.41297048 acc: 0.9166666666666666\n","New set of weights found, iteration: 326 loss: 0.41245985 acc: 0.9\n","New set of weights found, iteration: 328 loss: 0.40676734 acc: 0.9233333333333333\n","New set of weights found, iteration: 329 loss: 0.40640786 acc: 0.93\n","New set of weights found, iteration: 335 loss: 0.39842996 acc: 0.93\n","New set of weights found, iteration: 338 loss: 0.39804557 acc: 0.94\n","New set of weights found, iteration: 344 loss: 0.39646825 acc: 0.9233333333333333\n","New set of weights found, iteration: 347 loss: 0.38945505 acc: 0.93\n","New set of weights found, iteration: 348 loss: 0.3871968 acc: 0.9166666666666666\n","New set of weights found, iteration: 351 loss: 0.3779128 acc: 0.9133333333333333\n","New set of weights found, iteration: 354 loss: 0.37311223 acc: 0.9233333333333333\n","New set of weights found, iteration: 355 loss: 0.37276617 acc: 0.9166666666666666\n","New set of weights found, iteration: 356 loss: 0.36852098 acc: 0.9166666666666666\n","New set of weights found, iteration: 360 loss: 0.35868368 acc: 0.9333333333333333\n","New set of weights found, iteration: 362 loss: 0.35691482 acc: 0.93\n","New set of weights found, iteration: 367 loss: 0.3429358 acc: 0.9366666666666666\n","New set of weights found, iteration: 368 loss: 0.33983532 acc: 0.93\n","New set of weights found, iteration: 375 loss: 0.33676398 acc: 0.9333333333333333\n","New set of weights found, iteration: 378 loss: 0.33293056 acc: 0.9366666666666666\n","New set of weights found, iteration: 379 loss: 0.32861033 acc: 0.93\n","New set of weights found, iteration: 380 loss: 0.32313567 acc: 0.93\n","New set of weights found, iteration: 383 loss: 0.32154855 acc: 0.9366666666666666\n","New set of weights found, iteration: 386 loss: 0.32056552 acc: 0.9333333333333333\n","New set of weights found, iteration: 392 loss: 0.32019016 acc: 0.93\n","New set of weights found, iteration: 394 loss: 0.31832498 acc: 0.9366666666666666\n","New set of weights found, iteration: 400 loss: 0.315471 acc: 0.9333333333333333\n","New set of weights found, iteration: 401 loss: 0.3055538 acc: 0.93\n","New set of weights found, iteration: 407 loss: 0.3045038 acc: 0.9333333333333333\n","New set of weights found, iteration: 410 loss: 0.30153093 acc: 0.9366666666666666\n","New set of weights found, iteration: 417 loss: 0.29838136 acc: 0.92\n","New set of weights found, iteration: 420 loss: 0.29762506 acc: 0.93\n","New set of weights found, iteration: 428 loss: 0.29648978 acc: 0.9333333333333333\n","New set of weights found, iteration: 434 loss: 0.29306 acc: 0.9333333333333333\n","New set of weights found, iteration: 435 loss: 0.29192674 acc: 0.93\n","New set of weights found, iteration: 440 loss: 0.28488693 acc: 0.9333333333333333\n","New set of weights found, iteration: 444 loss: 0.28334972 acc: 0.93\n","New set of weights found, iteration: 448 loss: 0.28309777 acc: 0.93\n","New set of weights found, iteration: 449 loss: 0.28108674 acc: 0.93\n","New set of weights found, iteration: 453 loss: 0.27773702 acc: 0.94\n","New set of weights found, iteration: 461 loss: 0.27312914 acc: 0.9266666666666666\n","New set of weights found, iteration: 467 loss: 0.2729932 acc: 0.93\n","New set of weights found, iteration: 472 loss: 0.27115387 acc: 0.93\n","New set of weights found, iteration: 473 loss: 0.26995963 acc: 0.9333333333333333\n","New set of weights found, iteration: 475 loss: 0.26944205 acc: 0.9333333333333333\n","New set of weights found, iteration: 477 loss: 0.268688 acc: 0.9366666666666666\n","New set of weights found, iteration: 479 loss: 0.26242334 acc: 0.9266666666666666\n","New set of weights found, iteration: 493 loss: 0.26207444 acc: 0.9333333333333333\n","New set of weights found, iteration: 495 loss: 0.25908068 acc: 0.93\n","New set of weights found, iteration: 496 loss: 0.2590734 acc: 0.93\n","New set of weights found, iteration: 497 loss: 0.2582146 acc: 0.94\n","New set of weights found, iteration: 503 loss: 0.25407296 acc: 0.93\n","New set of weights found, iteration: 505 loss: 0.25202662 acc: 0.9333333333333333\n","New set of weights found, iteration: 506 loss: 0.24973544 acc: 0.9366666666666666\n","New set of weights found, iteration: 512 loss: 0.24969201 acc: 0.93\n","New set of weights found, iteration: 517 loss: 0.24860601 acc: 0.9266666666666666\n","New set of weights found, iteration: 519 loss: 0.24686712 acc: 0.9266666666666666\n","New set of weights found, iteration: 529 loss: 0.24602742 acc: 0.9333333333333333\n","New set of weights found, iteration: 535 loss: 0.2445193 acc: 0.93\n","New set of weights found, iteration: 536 loss: 0.24081425 acc: 0.9333333333333333\n","New set of weights found, iteration: 538 loss: 0.24043368 acc: 0.93\n","New set of weights found, iteration: 546 loss: 0.23582341 acc: 0.9333333333333333\n","New set of weights found, iteration: 550 loss: 0.23467208 acc: 0.93\n","New set of weights found, iteration: 552 loss: 0.23393926 acc: 0.93\n","New set of weights found, iteration: 557 loss: 0.22943695 acc: 0.93\n","New set of weights found, iteration: 564 loss: 0.22614151 acc: 0.9266666666666666\n","New set of weights found, iteration: 570 loss: 0.22351934 acc: 0.93\n","New set of weights found, iteration: 582 loss: 0.2227324 acc: 0.93\n","New set of weights found, iteration: 585 loss: 0.21830775 acc: 0.93\n","New set of weights found, iteration: 597 loss: 0.2167703 acc: 0.93\n","New set of weights found, iteration: 610 loss: 0.21555844 acc: 0.9333333333333333\n","New set of weights found, iteration: 625 loss: 0.21515213 acc: 0.9366666666666666\n","New set of weights found, iteration: 629 loss: 0.21122937 acc: 0.9333333333333333\n","New set of weights found, iteration: 631 loss: 0.21094893 acc: 0.9266666666666666\n","New set of weights found, iteration: 637 loss: 0.21082413 acc: 0.9266666666666666\n","New set of weights found, iteration: 659 loss: 0.21066985 acc: 0.93\n","New set of weights found, iteration: 661 loss: 0.20929192 acc: 0.93\n","New set of weights found, iteration: 684 loss: 0.20894809 acc: 0.93\n","New set of weights found, iteration: 686 loss: 0.20748574 acc: 0.9333333333333333\n","New set of weights found, iteration: 689 loss: 0.205267 acc: 0.93\n","New set of weights found, iteration: 708 loss: 0.20465927 acc: 0.93\n","New set of weights found, iteration: 712 loss: 0.20393105 acc: 0.9266666666666666\n","New set of weights found, iteration: 729 loss: 0.20358147 acc: 0.9266666666666666\n","New set of weights found, iteration: 733 loss: 0.20317748 acc: 0.9266666666666666\n","New set of weights found, iteration: 739 loss: 0.20279907 acc: 0.94\n","New set of weights found, iteration: 740 loss: 0.20236613 acc: 0.9333333333333333\n","New set of weights found, iteration: 746 loss: 0.2021528 acc: 0.93\n","New set of weights found, iteration: 748 loss: 0.20100321 acc: 0.93\n","New set of weights found, iteration: 754 loss: 0.1993275 acc: 0.93\n","New set of weights found, iteration: 757 loss: 0.19792211 acc: 0.93\n","New set of weights found, iteration: 773 loss: 0.19783711 acc: 0.93\n","New set of weights found, iteration: 775 loss: 0.19740187 acc: 0.9333333333333333\n","New set of weights found, iteration: 776 loss: 0.19721994 acc: 0.9333333333333333\n","New set of weights found, iteration: 777 loss: 0.1961473 acc: 0.9333333333333333\n","New set of weights found, iteration: 779 loss: 0.19598241 acc: 0.9366666666666666\n","New set of weights found, iteration: 784 loss: 0.19526182 acc: 0.9333333333333333\n","New set of weights found, iteration: 785 loss: 0.19456321 acc: 0.9333333333333333\n","New set of weights found, iteration: 791 loss: 0.1943641 acc: 0.93\n","New set of weights found, iteration: 799 loss: 0.19334234 acc: 0.9366666666666666\n","New set of weights found, iteration: 806 loss: 0.19311096 acc: 0.9333333333333333\n","New set of weights found, iteration: 809 loss: 0.19277118 acc: 0.93\n","New set of weights found, iteration: 811 loss: 0.19264112 acc: 0.93\n","New set of weights found, iteration: 815 loss: 0.190967 acc: 0.93\n","New set of weights found, iteration: 890 loss: 0.18890005 acc: 0.9333333333333333\n","New set of weights found, iteration: 892 loss: 0.18846218 acc: 0.9333333333333333\n","New set of weights found, iteration: 893 loss: 0.18713883 acc: 0.9333333333333333\n","New set of weights found, iteration: 903 loss: 0.1869782 acc: 0.9333333333333333\n","New set of weights found, iteration: 909 loss: 0.18646298 acc: 0.9366666666666666\n","New set of weights found, iteration: 917 loss: 0.18598829 acc: 0.93\n","New set of weights found, iteration: 927 loss: 0.18594755 acc: 0.93\n","New set of weights found, iteration: 932 loss: 0.18573302 acc: 0.93\n","New set of weights found, iteration: 942 loss: 0.18538505 acc: 0.9333333333333333\n","New set of weights found, iteration: 946 loss: 0.18424016 acc: 0.9333333333333333\n","New set of weights found, iteration: 959 loss: 0.1835666 acc: 0.9333333333333333\n","New set of weights found, iteration: 969 loss: 0.1830834 acc: 0.9333333333333333\n","New set of weights found, iteration: 974 loss: 0.18286093 acc: 0.93\n","New set of weights found, iteration: 983 loss: 0.1820501 acc: 0.93\n","New set of weights found, iteration: 985 loss: 0.18183175 acc: 0.93\n","New set of weights found, iteration: 993 loss: 0.18173474 acc: 0.93\n","New set of weights found, iteration: 995 loss: 0.18133913 acc: 0.9266666666666666\n","New set of weights found, iteration: 1001 loss: 0.18080577 acc: 0.9266666666666666\n","New set of weights found, iteration: 1006 loss: 0.1802216 acc: 0.9333333333333333\n","New set of weights found, iteration: 1025 loss: 0.17995203 acc: 0.9333333333333333\n","New set of weights found, iteration: 1042 loss: 0.17973644 acc: 0.93\n","New set of weights found, iteration: 1061 loss: 0.17944387 acc: 0.9266666666666666\n","New set of weights found, iteration: 1082 loss: 0.17943431 acc: 0.93\n","New set of weights found, iteration: 1083 loss: 0.17871827 acc: 0.9266666666666666\n","New set of weights found, iteration: 1093 loss: 0.17833588 acc: 0.9333333333333333\n","New set of weights found, iteration: 1123 loss: 0.1782659 acc: 0.9333333333333333\n","New set of weights found, iteration: 1144 loss: 0.17753273 acc: 0.93\n","New set of weights found, iteration: 1154 loss: 0.17727007 acc: 0.93\n","New set of weights found, iteration: 1165 loss: 0.17724434 acc: 0.9333333333333333\n","New set of weights found, iteration: 1166 loss: 0.17719936 acc: 0.9333333333333333\n","New set of weights found, iteration: 1171 loss: 0.17672187 acc: 0.9366666666666666\n","New set of weights found, iteration: 1176 loss: 0.17671774 acc: 0.9333333333333333\n","New set of weights found, iteration: 1212 loss: 0.17666677 acc: 0.9333333333333333\n","New set of weights found, iteration: 1223 loss: 0.17643958 acc: 0.9333333333333333\n","New set of weights found, iteration: 1275 loss: 0.1762153 acc: 0.9333333333333333\n","New set of weights found, iteration: 1303 loss: 0.17542142 acc: 0.9333333333333333\n","New set of weights found, iteration: 1378 loss: 0.17535225 acc: 0.9333333333333333\n","New set of weights found, iteration: 1382 loss: 0.17529377 acc: 0.9333333333333333\n","New set of weights found, iteration: 1404 loss: 0.17457567 acc: 0.9333333333333333\n","New set of weights found, iteration: 1457 loss: 0.17447841 acc: 0.9333333333333333\n","New set of weights found, iteration: 1461 loss: 0.17424846 acc: 0.93\n","New set of weights found, iteration: 1477 loss: 0.1737377 acc: 0.9266666666666666\n","New set of weights found, iteration: 1548 loss: 0.17335981 acc: 0.9266666666666666\n","New set of weights found, iteration: 1580 loss: 0.1733058 acc: 0.9266666666666666\n","New set of weights found, iteration: 1587 loss: 0.17321306 acc: 0.93\n","New set of weights found, iteration: 1596 loss: 0.17308939 acc: 0.93\n","New set of weights found, iteration: 1614 loss: 0.17301595 acc: 0.9266666666666666\n","New set of weights found, iteration: 1644 loss: 0.17284796 acc: 0.9266666666666666\n","New set of weights found, iteration: 1647 loss: 0.17258313 acc: 0.9266666666666666\n","New set of weights found, iteration: 1662 loss: 0.1723816 acc: 0.93\n","New set of weights found, iteration: 1706 loss: 0.17223743 acc: 0.9266666666666666\n","New set of weights found, iteration: 1719 loss: 0.1719875 acc: 0.93\n","New set of weights found, iteration: 1978 loss: 0.17198084 acc: 0.9266666666666666\n","New set of weights found, iteration: 2001 loss: 0.17195481 acc: 0.93\n","New set of weights found, iteration: 2026 loss: 0.1717552 acc: 0.9266666666666666\n","New set of weights found, iteration: 2031 loss: 0.17165588 acc: 0.93\n","New set of weights found, iteration: 2066 loss: 0.17161626 acc: 0.9266666666666666\n","New set of weights found, iteration: 2074 loss: 0.17140518 acc: 0.93\n","New set of weights found, iteration: 2441 loss: 0.17138883 acc: 0.93\n","New set of weights found, iteration: 2636 loss: 0.17134853 acc: 0.9266666666666666\n","New set of weights found, iteration: 2718 loss: 0.17130204 acc: 0.9266666666666666\n","New set of weights found, iteration: 2775 loss: 0.17122428 acc: 0.93\n","New set of weights found, iteration: 2829 loss: 0.1711681 acc: 0.93\n","New set of weights found, iteration: 2874 loss: 0.17114341 acc: 0.93\n","New set of weights found, iteration: 2978 loss: 0.17100853 acc: 0.93\n","New set of weights found, iteration: 3138 loss: 0.17093514 acc: 0.9266666666666666\n","New set of weights found, iteration: 3293 loss: 0.17080545 acc: 0.9266666666666666\n","New set of weights found, iteration: 3486 loss: 0.1707664 acc: 0.93\n","New set of weights found, iteration: 3681 loss: 0.17070974 acc: 0.93\n","New set of weights found, iteration: 4038 loss: 0.17066137 acc: 0.93\n","New set of weights found, iteration: 4090 loss: 0.17065905 acc: 0.93\n","New set of weights found, iteration: 4116 loss: 0.17063397 acc: 0.93\n","New set of weights found, iteration: 4258 loss: 0.17062972 acc: 0.93\n","New set of weights found, iteration: 5960 loss: 0.17062148 acc: 0.93\n","New set of weights found, iteration: 6352 loss: 0.17058212 acc: 0.93\n","New set of weights found, iteration: 8192 loss: 0.17057395 acc: 0.93\n","New set of weights found, iteration: 8254 loss: 0.17056267 acc: 0.93\n","New set of weights found, iteration: 9463 loss: 0.17056097 acc: 0.93\n"]}],"source":["# Create dataset\n","X, y = vertical_data(samples=100, classes=3)\n","# Create model\n","dense1 = Layer_Dense(2, 3) # first dense layer, 2 inputs\n","activation1 = Activation_ReLU()\n","dense2 = Layer_Dense(3, 3) # second dense layer, 3 inputs, 3 outputs\n","activation2 = Activation_Softmax()\n","# Create loss function\n","loss_function = Loss_CategoricalCrossentropy()\n","# Helper variables\n","lowest_loss = 9999999 # some initial value\n","best_dense1_weights = dense1.weights.copy()\n","best_dense1_biases = dense1.biases.copy()\n","best_dense2_weights = dense2.weights.copy()\n","best_dense2_biases = dense2.biases.copy()\n","for iteration in range(10000):\n"," # Update weights with some small random values\n"," dense1.weights += 0.05 * np.random.randn(2, 3)\n"," dense1.biases += 0.05 * np.random.randn(1, 3)\n"," dense2.weights += 0.05 * np.random.randn(3, 3)\n"," dense2.biases += 0.05 * np.random.randn(1, 3)\n"," # Perform a forward pass of our training data through this layer\n"," dense1.forward(X)\n"," activation1.forward(dense1.output)\n"," dense2.forward(activation1.output)\n"," activation2.forward(dense2.output)\n"," # Perform a forward pass through activation function\n"," # it takes the output of second dense layer here and returns loss\n"," loss = loss_function.calculate(activation2.output, y)\n"," # Calculate accuracy from output of activation2 and targets\n"," # calculate values along first axis\n"," predictions = np.argmax(activation2.output, axis=1)\n"," accuracy = np.mean(predictions == y)\n"," # If loss is smaller - print and save weights and biases aside\n"," if loss < lowest_loss:\n"," print('New set of weights found, iteration:', iteration,'loss:', loss, 'acc:', accuracy)\n"," best_dense1_weights = dense1.weights.copy()\n"," best_dense1_biases = dense1.biases.copy()\n"," best_dense2_weights = dense2.weights.copy()\n"," best_dense2_biases = dense2.biases.copy()\n"," lowest_loss = loss\n"," # Revert weights and biases\n"," else:\n"," dense1.weights = best_dense1_weights.copy()\n"," dense1.biases = best_dense1_biases.copy()\n"," dense2.weights = best_dense2_weights.copy()\n"," dense2.biases = best_dense2_biases.copy()"]},{"cell_type":"markdown","metadata":{"id":"uCFESJdqludV"},"source":["
\n","STRATEGY 2: FOR SPIRAL DATASET - DOES NOT WORK!\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"KI7k0Q8eludV","outputId":"48ff052b-e5af-4503-86d3-948c231d2a44"},"outputs":[{"name":"stdout","output_type":"stream","text":["New set of weights found, iteration: 0 loss: 1.0991902 acc: 0.3333333333333333\n","New set of weights found, iteration: 1 loss: 1.0988214 acc: 0.3333333333333333\n","New set of weights found, iteration: 6 loss: 1.0982754 acc: 0.3333333333333333\n","New set of weights found, iteration: 14 loss: 1.0978643 acc: 0.3333333333333333\n","New set of weights found, iteration: 23 loss: 1.0972557 acc: 0.35\n","New set of weights found, iteration: 37 loss: 1.0970367 acc: 0.35\n","New set of weights found, iteration: 40 loss: 1.0958394 acc: 0.37\n","New set of weights found, iteration: 41 loss: 1.0950066 acc: 0.4066666666666667\n","New set of weights found, iteration: 44 loss: 1.0948946 acc: 0.37333333333333335\n","New set of weights found, iteration: 53 loss: 1.0943341 acc: 0.3933333333333333\n","New set of weights found, iteration: 58 loss: 1.0942849 acc: 0.36666666666666664\n","New set of weights found, iteration: 59 loss: 1.0941792 acc: 0.35\n","New set of weights found, iteration: 60 loss: 1.0926698 acc: 0.38666666666666666\n","New set of weights found, iteration: 61 loss: 1.0913428 acc: 0.3933333333333333\n","New set of weights found, iteration: 63 loss: 1.0910325 acc: 0.4066666666666667\n","New set of weights found, iteration: 65 loss: 1.0902771 acc: 0.3933333333333333\n","New set of weights found, iteration: 75 loss: 1.0892566 acc: 0.39\n","New set of weights found, iteration: 88 loss: 1.0892477 acc: 0.39666666666666667\n","New set of weights found, iteration: 90 loss: 1.0862132 acc: 0.39\n","New set of weights found, iteration: 95 loss: 1.0858525 acc: 0.38333333333333336\n","New set of weights found, iteration: 99 loss: 1.0850619 acc: 0.44\n","New set of weights found, iteration: 103 loss: 1.0833515 acc: 0.4166666666666667\n","New set of weights found, iteration: 108 loss: 1.0818787 acc: 0.41333333333333333\n","New set of weights found, iteration: 111 loss: 1.0813941 acc: 0.39\n","New set of weights found, iteration: 113 loss: 1.0787892 acc: 0.38666666666666666\n","New set of weights found, iteration: 114 loss: 1.0777413 acc: 0.36333333333333334\n","New set of weights found, iteration: 116 loss: 1.0772293 acc: 0.38\n","New set of weights found, iteration: 121 loss: 1.0759072 acc: 0.3933333333333333\n","New set of weights found, iteration: 132 loss: 1.075869 acc: 0.43\n","New set of weights found, iteration: 134 loss: 1.075571 acc: 0.39666666666666667\n","New set of weights found, iteration: 143 loss: 1.0749155 acc: 0.4066666666666667\n","New set of weights found, iteration: 148 loss: 1.0747138 acc: 0.4066666666666667\n","New set of weights found, iteration: 156 loss: 1.0740647 acc: 0.42\n","New set of weights found, iteration: 183 loss: 1.0736305 acc: 0.42333333333333334\n","New set of weights found, iteration: 184 loss: 1.0735968 acc: 0.4266666666666667\n","New set of weights found, iteration: 196 loss: 1.0734725 acc: 0.4266666666666667\n","New set of weights found, iteration: 197 loss: 1.0726037 acc: 0.44\n","New set of weights found, iteration: 198 loss: 1.0724471 acc: 0.4533333333333333\n","New set of weights found, iteration: 213 loss: 1.0718489 acc: 0.38666666666666666\n","New set of weights found, iteration: 225 loss: 1.07182 acc: 0.4166666666666667\n","New set of weights found, iteration: 228 loss: 1.0711939 acc: 0.41\n","New set of weights found, iteration: 276 loss: 1.0709915 acc: 0.42\n","New set of weights found, iteration: 395 loss: 1.0707066 acc: 0.42333333333333334\n","New set of weights found, iteration: 471 loss: 1.070367 acc: 0.42\n","New set of weights found, iteration: 1317 loss: 1.0702449 acc: 0.43333333333333335\n","New set of weights found, iteration: 1857 loss: 1.0702258 acc: 0.43333333333333335\n","New set of weights found, iteration: 2198 loss: 1.070218 acc: 0.4266666666666667\n","New set of weights found, iteration: 2315 loss: 1.0701423 acc: 0.44333333333333336\n","New set of weights found, iteration: 5115 loss: 1.0700669 acc: 0.43333333333333335\n","New set of weights found, iteration: 5942 loss: 1.069947 acc: 0.44333333333333336\n"]}],"source":["# Create dataset\n","X, y = spiral_data(samples=100, classes=3)# Create model\n","dense1 = Layer_Dense(2, 3) # first dense layer, 2 inputs\n","activation1 = Activation_ReLU()\n","dense2 = Layer_Dense(3, 3) # second dense layer, 3 inputs, 3 outputs\n","activation2 = Activation_Softmax()\n","# Create loss function\n","loss_function = Loss_CategoricalCrossentropy()\n","# Helper variables\n","lowest_loss = 9999999 # some initial value\n","best_dense1_weights = dense1.weights.copy()\n","best_dense1_biases = dense1.biases.copy()\n","best_dense2_weights = dense2.weights.copy()\n","best_dense2_biases = dense2.biases.copy()\n","for iteration in range(10000):\n"," # Update weights with some small random values\n"," dense1.weights += 0.05 * np.random.randn(2, 3)\n"," dense1.biases += 0.05 * np.random.randn(1, 3)\n"," dense2.weights += 0.05 * np.random.randn(3, 3)\n"," dense2.biases += 0.05 * np.random.randn(1, 3)\n"," # Perform a forward pass of our training data through this layer\n"," dense1.forward(X)\n"," activation1.forward(dense1.output)\n"," dense2.forward(activation1.output)\n"," activation2.forward(dense2.output)\n"," # Perform a forward pass through activation function\n"," # it takes the output of second dense layer here and returns loss\n"," loss = loss_function.calculate(activation2.output, y)\n"," # Calculate accuracy from output of activation2 and targets\n"," # calculate values along first axis\n"," predictions = np.argmax(activation2.output, axis=1)\n"," accuracy = np.mean(predictions == y)\n"," # If loss is smaller - print and save weights and biases aside\n"," if loss < lowest_loss:\n"," print('New set of weights found, iteration:', iteration,'loss:', loss, 'acc:', accuracy)\n"," best_dense1_weights = dense1.weights.copy()\n"," best_dense1_biases = dense1.biases.copy()\n"," best_dense2_weights = dense2.weights.copy()\n"," best_dense2_biases = dense2.biases.copy()\n"," lowest_loss = loss\n"," # Revert weights and biases\n"," else:\n"," dense1.weights = best_dense1_weights.copy()\n"," dense1.biases = best_dense1_biases.copy()\n"," dense2.weights = best_dense2_weights.copy()\n"," dense2.biases = best_dense2_biases.copy()"]},{"cell_type":"markdown","metadata":{"id":"g4BJeD9_ludV"},"source":["
\n","BACKPROPAGATION \n","
"]},{"cell_type":"markdown","metadata":{"id":"17T6gnEdludV"},"source":["
\n","GRADIENTS OF THE LOSS WITH RESPECT TO WEIGHTS\n","
"]},{"cell_type":"markdown","metadata":{"id":"ffLt0i83ludV"},"source":["![Screenshot 2024-06-04 at 8.39.55 AM.png](attachment:2ce30e57-6373-4b3a-83c7-1195faf70521.png)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"mE4OMy6vludV","outputId":"678d3940-ad48-4b68-bf78-b78f57c11006"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 0.5 0.5 0.5]\n"," [20.1 20.1 20.1]\n"," [10.9 10.9 10.9]\n"," [ 4.1 4.1 4.1]]\n"]}],"source":["import numpy as np\n","# Passed-in gradient from the next layer\n","# for the purpose of this example we're going to use\n","# an array of an incremental gradient values\n","dvalues = np.array([[1., 1., 1.],\n"," [2., 2., 2.],\n"," [3., 3., 3.]])\n","# We have 3 sets of inputs - samples\n","inputs = np.array([[1, 2, 3, 2.5],\n"," [2., 5., -1., 2],\n"," [-1.5, 2.7, 3.3, -0.8]])\n","# sum weights of given input\n","# and multiply by the passed-in gradient for this neuron\n","dweights = np.dot(inputs.T, dvalues)\n","print(dweights)"]},{"cell_type":"markdown","metadata":{"id":"bws-msQQludW"},"source":["
\n","GRADIENTS OF THE LOSS WITH RESPECT TO BIASES\n","
"]},{"cell_type":"markdown","metadata":{"id":"tmZYpVReludW"},"source":["![Screenshot 2024-06-04 at 8.40.32 AM.png](attachment:6bc97cfa-5ef2-48e8-a092-543ab5661b0e.png)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"eyqttZ-wludW","outputId":"d3952352-87cd-4ee1-a2a6-00c71705bf33"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[6. 6. 6.]]\n"]}],"source":["import numpy as np\n","# Passed-in gradient from the next layer\n","# for the purpose of this example we're going to use\n","# an array of an incremental gradient values\n","dvalues = np.array([[1., 1., 1.],\n"," [2., 2., 2.],\n"," [3., 3., 3.]])\n","# One bias for each neuron\n","# biases are the row vector with a shape (1, neurons)\n","biases = np.array([[2, 3, 0.5]])\n","# dbiases - sum values, do this over samples (first axis), keepdims\n","# since this by default will produce a plain list -\n","# we explained this in the chapter 4\n","dbiases = np.sum(dvalues, axis=0, keepdims=True)\n","print(dbiases)"]},{"cell_type":"markdown","metadata":{"id":"a0t9YbEAludW"},"source":["
\n","GRADIENTS OF THE LOSS WITH RESPECT TO INPUTS\n","
"]},{"cell_type":"markdown","metadata":{"id":"iwj9qpG1ludW"},"source":["![Screenshot 2024-06-04 at 8.41.02 AM.png](attachment:80f310e2-857a-4147-87fa-96e60bbce229.png)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"u_ry_Y-VludW","outputId":"08a4236b-cc1f-494a-d77c-3ff715bc6a37"},"outputs":[{"name":"stdout","output_type":"stream","text":["[[ 0.44 -0.38 -0.07 1.37]\n"," [ 0.88 -0.76 -0.14 2.74]\n"," [ 1.32 -1.14 -0.21 4.11]]\n"]}],"source":["import numpy as np\n","# Passed-in gradient from the next layer\n","# for the purpose of this example we're going to use\n","# an array of an incremental gradient values\n","dvalues = np.array([[1., 1., 1.],\n"," [2., 2., 2.],\n"," [3., 3., 3.]])\n","# We have 3 sets of weights - one set for each neuron\n","# we have 4 inputs, thus 4 weights\n","# recall that we keep weights transposed\n","weights = np.array([[0.2, 0.8, -0.5, 1],\n"," [0.5, -0.91, 0.26, -0.5],\n"," [-0.26, -0.27, 0.17, 0.87]]).T\n","# sum weights of given input\n","# and multiply by the passed-in gradient for this neuron\n","dinputs = np.dot(dvalues, weights.T)\n","print(dinputs)"]},{"cell_type":"markdown","metadata":{"id":"vVjxhrTLludX"},"source":["
\n","ADDING THE \"BACKWARD\" METHOD IN THE LAYER-DENSE CLASS\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"CRJJ_BraludX"},"outputs":[],"source":["# class Layer_Dense:\n","# ...\n","# # Backward pass\n","# def backward(self, dvalues):\n","# # Gradients on parameters\n","# self.dweights = np.dot(self.inputs.T, dvalues)\n","# self.dbiases = np.sum(dvalues, axis=0, keepdims=True)\n","# # Gradient on values\n","# self.dinputs = np.dot(dvalues, self.weights.T)"]},{"cell_type":"markdown","metadata":{"id":"Raoe3SdlludX"},"source":["
\n","ADDING THE \"BACKWARD\" METHOD IN THE RELU ACTIVATION CLASS\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xO4OWMfUludX"},"outputs":[],"source":["# # ReLU activation\n","# class Activation_ReLU:\n","# # Forward pass\n","# def forward(self, inputs):\n","# # Remember input values\n","# self.inputs = inputs\n","# self.output = np.maximum(0, inputs)\n","# # Backward pass\n","# def backward(self, dvalues):\n","# # Since we need to modify the original variable,\n","# # let's make a copy of the values first\n","# self.dinputs = dvalues.copy()\n","# # Zero gradient where input values were negative\n","# self.dinputs[self.inputs <= 0] = 0"]},{"cell_type":"markdown","metadata":{"id":"VckWYcrrludX"},"source":["
\n","LOSS FUNCTION BACKPROPAGATION\n","
"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"H8NF4KBbludX"},"outputs":[],"source":["# Cross-entropy loss\n","class Loss_CategoricalCrossentropy(Loss):\n"," ...\n"," # Backward pass\n"," def backward(self, dvalues, y_true):\n"," # Number of samples\n"," samples = len(dvalues)\n"," # Number of labels in every sample\n"," # We'll use the first sample to count them\n"," labels = len(dvalues[0])\n"," # If labels are sparse, turn them into one-hot vector\n"," if len(y_true.shape) == 1:\n"," y_true = np.eye(labels)[y_true]\n"," # Calculate gradient\n"," self.dinputs = -y_true / dvalues\n"," # Normalize gradient\n"," self.dinputs = self.dinputs / samples"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Sfh0r4WuludX"},"outputs":[],"source":[]}],"metadata":{"kernelspec":{"display_name":"Python 3 (ipykernel)","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.12.2"},"colab":{"provenance":[]}},"nbformat":4,"nbformat_minor":0}