Lectures 18-20
This commit is contained in:
parent
0d4ea9b80f
commit
748500dc53
@ -14,9 +14,7 @@ Lecture 12 uses same handout.
|
|||||||
|
|
||||||
Lectures 13-17 use same handout.
|
Lectures 13-17 use same handout.
|
||||||
|
|
||||||
Lecture 18 uses same handout.
|
Lectures 18-21 use same handout.
|
||||||
|
|
||||||
Lectures 19-21 use same handout.
|
|
||||||
|
|
||||||
Lecture 22 uses same handout.
|
Lecture 22 uses same handout.
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@ -9,7 +9,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -23,7 +23,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -40,9 +40,6 @@
|
|||||||
"class Activation_ReLU:\n",
|
"class Activation_ReLU:\n",
|
||||||
" def forward(self, inputs):\n",
|
" def forward(self, inputs):\n",
|
||||||
" self.output = np.maximum(0, inputs)\n",
|
" self.output = np.maximum(0, inputs)\n",
|
||||||
" \n",
|
|
||||||
" def derivative(self, inputs):\n",
|
|
||||||
" return np.where(inputs > 0, 1, 0)\n",
|
|
||||||
" \n",
|
" \n",
|
||||||
"class Activation_Softmax:\n",
|
"class Activation_Softmax:\n",
|
||||||
" def forward(self, inputs):\n",
|
" def forward(self, inputs):\n",
|
||||||
@ -154,6 +151,160 @@
|
|||||||
"## Note With Layer_Dense class\n",
|
"## Note With Layer_Dense class\n",
|
||||||
"The Layer_Dense class has the weights stored in the transposed fashion for forward propagation. Therefore, the weight matrix must be transposed for the backpropagation."
|
"The Layer_Dense class has the weights stored in the transposed fashion for forward propagation. Therefore, the weight matrix must be transposed for the backpropagation."
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Adding the Backward Propagation Method to Layer_Dense and ReLU Activation Classes"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class Layer_Dense:\n",
|
||||||
|
" def __init__(self, n_inputs, n_neurons):\n",
|
||||||
|
" # Initialize the weights and biases\n",
|
||||||
|
" self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights\n",
|
||||||
|
" self.biases = np.zeros((1, n_neurons))\n",
|
||||||
|
"\n",
|
||||||
|
" def forward(self, inputs):\n",
|
||||||
|
" # Calculate the output values from inputs, weights, and biases\n",
|
||||||
|
" self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed\n",
|
||||||
|
" \n",
|
||||||
|
" def backward(self, dvalues):\n",
|
||||||
|
" '''Calculated the gradient of the loss with respect to the weights and biases of this layer.\n",
|
||||||
|
" dvalues is equiavelent to a transposed dl_dZ. It is the gradient \n",
|
||||||
|
" of the loss with respect to the outputs of this layer.'''\n",
|
||||||
|
" self.dweights = np.dot(self.inputs.T, dvalues)\n",
|
||||||
|
" self.dbiases = np.sum(dvalues, axis=0, keepdims=0)\n",
|
||||||
|
" self.dinputs = np.dot(dvalues, self.weights.T)\n",
|
||||||
|
"\n",
|
||||||
|
"class Activation_ReLU:\n",
|
||||||
|
" def forward(self, inputs):\n",
|
||||||
|
" self.output = np.maximum(0, inputs)\n",
|
||||||
|
" \n",
|
||||||
|
" def backward(self, dvalues):\n",
|
||||||
|
" '''Calculated the gradient of the loss with respect to this layer's activation function\n",
|
||||||
|
" dvalues is equiavelent to a transposed dl_dZ. It is the gradient \n",
|
||||||
|
" of the loss with respect to the outputs of this layer.'''\n",
|
||||||
|
" self.dinputs = dvalues.copy()\n",
|
||||||
|
" self.dinputs[self.inputs <= 0] = 0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Adding the Backward Propagation Method to the Loss_CategoricalCrossEntropy Class"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class Loss_CategoricalCrossEntropy(Loss):\n",
|
||||||
|
" def forward(self, y_pred, y_true):\n",
|
||||||
|
" '''y_pred is the neural network output\n",
|
||||||
|
" y_true is the ideal output of the neural network'''\n",
|
||||||
|
" samples = len(y_pred)\n",
|
||||||
|
" # Bound the predicted values \n",
|
||||||
|
" y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)\n",
|
||||||
|
" \n",
|
||||||
|
" if len(y_true.shape) == 1: # Categorically labeled\n",
|
||||||
|
" correct_confidences = y_pred_clipped[range(samples), y_true]\n",
|
||||||
|
" elif len(y_true.shape) == 2: # One hot encoded\n",
|
||||||
|
" correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)\n",
|
||||||
|
"\n",
|
||||||
|
" # Calculate the losses\n",
|
||||||
|
" negative_log_likelihoods = -np.log(correct_confidences)\n",
|
||||||
|
" return negative_log_likelihoods\n",
|
||||||
|
" \n",
|
||||||
|
" def backward(self, dvalues, y_true):\n",
|
||||||
|
" samples = len(dvalues)\n",
|
||||||
|
"\n",
|
||||||
|
" # Number of lables in each sample\n",
|
||||||
|
" labels = len(dvalues[0])\n",
|
||||||
|
"\n",
|
||||||
|
" # if the labels are sparse, turn them into a one-hot vector\n",
|
||||||
|
" if len(y_true.shape) == 1:\n",
|
||||||
|
" y_true = np.eye(labels)[y_true]\n",
|
||||||
|
"\n",
|
||||||
|
" # Calculate the gradient then normalize\n",
|
||||||
|
" self.dinputs = -y_true / dvalues\n",
|
||||||
|
" self.dinputs = self.dinputs / samples"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Combined Softmax Activation and Cross Entropy Loss"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class Activation_Softmax_Loss_CategoricalCrossentropy():\n",
|
||||||
|
" def __init__(self):\n",
|
||||||
|
" self.activation = Activation_Softmax()\n",
|
||||||
|
" self.loss = Loss_CategoricalCrossEntropy()\n",
|
||||||
|
"\n",
|
||||||
|
" def forward(self, inputs, y_true):\n",
|
||||||
|
" self.activation.forward(inputs)\n",
|
||||||
|
" self.output = self.activation.output\n",
|
||||||
|
" return self.loss.calculate(self.output, y_true)\n",
|
||||||
|
" \n",
|
||||||
|
" def backward(self, dvalues, y_true):\n",
|
||||||
|
" samples = len(dvalues)\n",
|
||||||
|
"\n",
|
||||||
|
" # if the samples are one-hot encoded, turn them into discrete values\n",
|
||||||
|
" if len(y_true.shape) == 2:\n",
|
||||||
|
" y_true = np.argmax(y_true, axis=1)\n",
|
||||||
|
" \n",
|
||||||
|
" # Copy so we can safely modify\n",
|
||||||
|
" self.dinputs = dvalues.copy()\n",
|
||||||
|
" \n",
|
||||||
|
" # Calculate and normalize gradient \n",
|
||||||
|
" self.dinputs[range(samples), y_true] -= 1\n",
|
||||||
|
" self.dinputs = self.dinputs / samples"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Gradients: combined loss and activation:\n",
|
||||||
|
"[[-0.1 0.03333333 0.06666667]\n",
|
||||||
|
" [ 0.03333333 -0.16666667 0.13333333]\n",
|
||||||
|
" [ 0.00666667 -0.03333333 0.02666667]]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"softmax_outputs = np.array([[0.7, 0.1, 0.2],\n",
|
||||||
|
" [0.1, 0.5, 0.4],\n",
|
||||||
|
" [0.02, 0.9, 0.08]])\n",
|
||||||
|
"class_targets = np.array([0, 1, 1])\n",
|
||||||
|
"softmax_loss = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
|
||||||
|
"softmax_loss.backward(softmax_outputs, class_targets)\n",
|
||||||
|
"dvalues1 = softmax_loss.dinputs\n",
|
||||||
|
"print('Gradients: combined loss and activation:')\n",
|
||||||
|
"print(dvalues1)"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
Loading…
Reference in New Issue
Block a user