Lectures 18-20

This commit is contained in:
judsonupchurch 2025-01-19 18:55:33 +00:00
parent 0d4ea9b80f
commit 748500dc53
4 changed files with 157 additions and 9 deletions

View File

@ -14,9 +14,7 @@ Lecture 12 uses same handout.
Lectures 13-17 use same handout.
Lecture 18 uses same handout.
Lectures 19-21 use same handout.
Lectures 18-21 use same handout.
Lecture 22 uses same handout.

File diff suppressed because one or more lines are too long

View File

@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -41,9 +41,6 @@
" def forward(self, inputs):\n",
" self.output = np.maximum(0, inputs)\n",
" \n",
" def derivative(self, inputs):\n",
" return np.where(inputs > 0, 1, 0)\n",
" \n",
"class Activation_Softmax:\n",
" def forward(self, inputs):\n",
" # Get the unnormalized probabilities\n",
@ -154,6 +151,160 @@
"## Note With Layer_Dense class\n",
"The Layer_Dense class has the weights stored in the transposed fashion for forward propagation. Therefore, the weight matrix must be transposed for the backpropagation."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding the Backward Propagation Method to Layer_Dense and ReLU Activation Classes"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"class Layer_Dense:\n",
" def __init__(self, n_inputs, n_neurons):\n",
" # Initialize the weights and biases\n",
" self.weights = 0.01 * np.random.randn(n_inputs, n_neurons) # Normal distribution of weights\n",
" self.biases = np.zeros((1, n_neurons))\n",
"\n",
" def forward(self, inputs):\n",
" # Calculate the output values from inputs, weights, and biases\n",
" self.output = np.dot(inputs, self.weights) + self.biases # Weights are already transposed\n",
" \n",
" def backward(self, dvalues):\n",
" '''Calculated the gradient of the loss with respect to the weights and biases of this layer.\n",
" dvalues is equiavelent to a transposed dl_dZ. It is the gradient \n",
" of the loss with respect to the outputs of this layer.'''\n",
" self.dweights = np.dot(self.inputs.T, dvalues)\n",
" self.dbiases = np.sum(dvalues, axis=0, keepdims=0)\n",
" self.dinputs = np.dot(dvalues, self.weights.T)\n",
"\n",
"class Activation_ReLU:\n",
" def forward(self, inputs):\n",
" self.output = np.maximum(0, inputs)\n",
" \n",
" def backward(self, dvalues):\n",
" '''Calculated the gradient of the loss with respect to this layer's activation function\n",
" dvalues is equiavelent to a transposed dl_dZ. It is the gradient \n",
" of the loss with respect to the outputs of this layer.'''\n",
" self.dinputs = dvalues.copy()\n",
" self.dinputs[self.inputs <= 0] = 0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding the Backward Propagation Method to the Loss_CategoricalCrossEntropy Class"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"class Loss_CategoricalCrossEntropy(Loss):\n",
" def forward(self, y_pred, y_true):\n",
" '''y_pred is the neural network output\n",
" y_true is the ideal output of the neural network'''\n",
" samples = len(y_pred)\n",
" # Bound the predicted values \n",
" y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)\n",
" \n",
" if len(y_true.shape) == 1: # Categorically labeled\n",
" correct_confidences = y_pred_clipped[range(samples), y_true]\n",
" elif len(y_true.shape) == 2: # One hot encoded\n",
" correct_confidences = np.sum(y_pred_clipped*y_true, axis=1)\n",
"\n",
" # Calculate the losses\n",
" negative_log_likelihoods = -np.log(correct_confidences)\n",
" return negative_log_likelihoods\n",
" \n",
" def backward(self, dvalues, y_true):\n",
" samples = len(dvalues)\n",
"\n",
" # Number of lables in each sample\n",
" labels = len(dvalues[0])\n",
"\n",
" # if the labels are sparse, turn them into a one-hot vector\n",
" if len(y_true.shape) == 1:\n",
" y_true = np.eye(labels)[y_true]\n",
"\n",
" # Calculate the gradient then normalize\n",
" self.dinputs = -y_true / dvalues\n",
" self.dinputs = self.dinputs / samples"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Combined Softmax Activation and Cross Entropy Loss"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"class Activation_Softmax_Loss_CategoricalCrossentropy():\n",
" def __init__(self):\n",
" self.activation = Activation_Softmax()\n",
" self.loss = Loss_CategoricalCrossEntropy()\n",
"\n",
" def forward(self, inputs, y_true):\n",
" self.activation.forward(inputs)\n",
" self.output = self.activation.output\n",
" return self.loss.calculate(self.output, y_true)\n",
" \n",
" def backward(self, dvalues, y_true):\n",
" samples = len(dvalues)\n",
"\n",
" # if the samples are one-hot encoded, turn them into discrete values\n",
" if len(y_true.shape) == 2:\n",
" y_true = np.argmax(y_true, axis=1)\n",
" \n",
" # Copy so we can safely modify\n",
" self.dinputs = dvalues.copy()\n",
" \n",
" # Calculate and normalize gradient \n",
" self.dinputs[range(samples), y_true] -= 1\n",
" self.dinputs = self.dinputs / samples"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Gradients: combined loss and activation:\n",
"[[-0.1 0.03333333 0.06666667]\n",
" [ 0.03333333 -0.16666667 0.13333333]\n",
" [ 0.00666667 -0.03333333 0.02666667]]\n"
]
}
],
"source": [
"softmax_outputs = np.array([[0.7, 0.1, 0.2],\n",
" [0.1, 0.5, 0.4],\n",
" [0.02, 0.9, 0.08]])\n",
"class_targets = np.array([0, 1, 1])\n",
"softmax_loss = Activation_Softmax_Loss_CategoricalCrossentropy()\n",
"softmax_loss.backward(softmax_outputs, class_targets)\n",
"dvalues1 = softmax_loss.dinputs\n",
"print('Gradients: combined loss and activation:')\n",
"print(dvalues1)"
]
}
],
"metadata": {