diff --git a/lecture23_24/notes_23.ipynb b/lecture23_24/notes_23.ipynb index e0f9209..b8b6bdd 100644 --- a/lecture23_24/notes_23.ipynb +++ b/lecture23_24/notes_23.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -227,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -261,1020 +261,120 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "epoch: 0, acc: 0.353, loss: 1.099\n", - "epoch: 100, acc: 0.467, loss: 1.079\n", - "epoch: 200, acc: 0.450, loss: 1.067\n", - "epoch: 300, acc: 0.447, loss: 1.065\n" + "epoch: 0, acc: 0.360, loss: 1.099, lr: 1.0\n", + "epoch: 100, acc: 0.400, loss: 1.088, lr: 0.9099181073703367\n", + "epoch: 200, acc: 0.423, loss: 1.078, lr: 0.8340283569641367\n", + "epoch: 300, acc: 0.423, loss: 1.076, lr: 0.7698229407236336\n", + "epoch: 400, acc: 0.420, loss: 1.076, lr: 0.7147962830593281\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "epoch: 400, acc: 0.433, loss: 1.064\n", - "epoch: 500, acc: 0.430, loss: 1.062\n", - "epoch: 600, acc: 0.423, loss: 1.061\n", - "epoch: 700, acc: 0.433, loss: 1.059\n", - "epoch: 800, acc: 0.453, loss: 1.056\n", - "epoch: 900, acc: 0.467, loss: 1.054\n", - "epoch: 1000, acc: 0.467, loss: 1.051\n", - "epoch: 1100, acc: 0.463, loss: 1.048\n", - "epoch: 1200, acc: 0.463, loss: 1.045\n", - "epoch: 1300, acc: 0.467, loss: 1.041\n", - "epoch: 1400, acc: 0.470, loss: 1.037\n", - "epoch: 1500, acc: 0.480, loss: 1.032\n", - "epoch: 1600, acc: 0.480, loss: 1.026\n", - "epoch: 1700, acc: 0.490, loss: 1.020\n", - "epoch: 1800, acc: 0.493, loss: 1.014\n", - "epoch: 1900, acc: 0.493, loss: 1.006\n", - "epoch: 2000, acc: 0.507, loss: 0.998\n", - "epoch: 2100, acc: 0.510, loss: 0.989\n", - "epoch: 2200, acc: 0.527, loss: 0.980\n", - "epoch: 2300, acc: 0.540, loss: 0.971\n", - "epoch: 2400, acc: 0.433, loss: 0.976\n", - "epoch: 2500, acc: 0.430, loss: 0.973\n", - "epoch: 2600, acc: 0.430, loss: 0.968\n", - "epoch: 2700, acc: 0.433, loss: 0.964\n", - "epoch: 2800, acc: 0.433, loss: 0.958\n", - "epoch: 2900, acc: 0.440, loss: 0.953\n", - "epoch: 3000, acc: 0.443, loss: 0.948\n", - "epoch: 3100, acc: 0.447, loss: 0.943\n", - "epoch: 3200, acc: 0.457, loss: 0.939\n", - "epoch: 3300, acc: 0.467, loss: 0.935\n", - "epoch: 3400, acc: 0.470, loss: 0.930\n", - "epoch: 3500, acc: 0.477, loss: 0.927\n", - "epoch: 3600, acc: 0.487, loss: 0.923\n", - "epoch: 3700, acc: 0.483, loss: 0.919\n", - "epoch: 3800, acc: 0.480, loss: 0.915\n", - "epoch: 3900, acc: 0.480, loss: 0.911\n", - "epoch: 4000, acc: 0.487, loss: 0.907\n", - "epoch: 4100, acc: 0.503, loss: 0.904\n", - "epoch: 4200, acc: 0.513, loss: 0.900\n", - "epoch: 4300, acc: 0.517, loss: 0.896\n", - "epoch: 4400, acc: 0.523, loss: 0.893\n", - "epoch: 4500, acc: 0.533, loss: 0.889\n", - "epoch: 4600, acc: 0.533, loss: 0.886\n", - "epoch: 4700, acc: 0.537, loss: 0.882\n", - "epoch: 4800, acc: 0.537, loss: 0.878\n", - "epoch: 4900, acc: 0.537, loss: 0.875\n", - "epoch: 5000, acc: 0.547, loss: 0.871\n", - "epoch: 5100, acc: 0.543, loss: 0.868\n", - "epoch: 5200, acc: 0.547, loss: 0.865\n", - "epoch: 5300, acc: 0.550, loss: 0.861\n", - "epoch: 5400, acc: 0.553, loss: 0.857\n", - "epoch: 5500, acc: 0.557, loss: 0.854\n", - "epoch: 5600, acc: 0.567, loss: 0.850\n", - "epoch: 5700, acc: 0.563, loss: 0.846\n", - "epoch: 5800, acc: 0.567, loss: 0.843\n", - "epoch: 5900, acc: 0.567, loss: 0.840\n", - "epoch: 6000, acc: 0.567, loss: 0.837\n", - "epoch: 6100, acc: 0.583, loss: 0.833\n", - "epoch: 6200, acc: 0.587, loss: 0.830\n", - "epoch: 6300, acc: 0.593, loss: 0.827\n", - "epoch: 6400, acc: 0.600, loss: 0.824\n", - "epoch: 6500, acc: 0.600, loss: 0.821\n", - "epoch: 6600, acc: 0.597, loss: 0.818\n", - "epoch: 6700, acc: 0.597, loss: 0.815\n", - "epoch: 6800, acc: 0.597, loss: 0.812\n", - "epoch: 6900, acc: 0.597, loss: 0.809\n", - "epoch: 7000, acc: 0.603, loss: 0.805\n", - "epoch: 7100, acc: 0.603, loss: 0.803\n", - "epoch: 7200, acc: 0.610, loss: 0.800\n", - "epoch: 7300, acc: 0.607, loss: 0.797\n", - "epoch: 7400, acc: 0.613, loss: 0.794\n", - "epoch: 7500, acc: 0.617, loss: 0.792\n", - "epoch: 7600, acc: 0.620, loss: 0.788\n", - "epoch: 7700, acc: 0.620, loss: 0.785\n", - "epoch: 7800, acc: 0.620, loss: 0.783\n", - "epoch: 7900, acc: 0.627, loss: 0.780\n", - "epoch: 8000, acc: 0.627, loss: 0.779\n", - "epoch: 8100, acc: 0.627, loss: 0.775\n", - "epoch: 8200, acc: 0.627, loss: 0.772\n", - "epoch: 8300, acc: 0.630, loss: 0.769\n", - "epoch: 8400, acc: 0.630, loss: 0.765\n", - "epoch: 8500, acc: 0.633, loss: 0.762\n", - "epoch: 8600, acc: 0.630, loss: 0.759\n", - "epoch: 8700, acc: 0.643, loss: 0.755\n", - "epoch: 8800, acc: 0.647, loss: 0.751\n", - "epoch: 8900, acc: 0.647, loss: 0.748\n", - "epoch: 9000, acc: 0.647, loss: 0.743\n", - "epoch: 9100, acc: 0.647, loss: 0.740\n", - "epoch: 9200, acc: 0.647, loss: 0.736\n", - "epoch: 9300, acc: 0.650, loss: 0.732\n", - "epoch: 9400, acc: 0.650, loss: 0.729\n", - "epoch: 9500, acc: 0.647, loss: 0.725\n", - "epoch: 9600, acc: 0.650, loss: 0.722\n", - "epoch: 9700, acc: 0.653, loss: 0.718\n", - "epoch: 9800, acc: 0.657, loss: 0.714\n", - "epoch: 9900, acc: 0.660, loss: 0.711\n", - "epoch: 10000, acc: 0.660, loss: 0.708\n", - "epoch: 10100, acc: 0.663, loss: 0.705\n", - "epoch: 10200, acc: 0.670, loss: 0.701\n", - "epoch: 10300, acc: 0.677, loss: 0.698\n", - "epoch: 10400, acc: 0.680, loss: 0.695\n", - "epoch: 10500, acc: 0.683, loss: 0.692\n", - "epoch: 10600, acc: 0.683, loss: 0.689\n", - "epoch: 10700, acc: 0.680, loss: 0.686\n", - "epoch: 10800, acc: 0.690, loss: 0.683\n", - "epoch: 10900, acc: 0.690, loss: 0.680\n", - "epoch: 11000, acc: 0.690, loss: 0.677\n", - "epoch: 11100, acc: 0.697, loss: 0.674\n", - "epoch: 11200, acc: 0.707, loss: 0.671\n", - "epoch: 11300, acc: 0.707, loss: 0.668\n", - "epoch: 11400, acc: 0.713, loss: 0.665\n", - "epoch: 11500, acc: 0.713, loss: 0.662\n", - "epoch: 11600, acc: 0.717, loss: 0.658\n", - "epoch: 11700, acc: 0.720, loss: 0.655\n", - "epoch: 11800, acc: 0.720, loss: 0.654\n", - "epoch: 11900, acc: 0.727, loss: 0.650\n", - "epoch: 12000, acc: 0.733, loss: 0.647\n", - "epoch: 12100, acc: 0.733, loss: 0.644\n", - "epoch: 12200, acc: 0.733, loss: 0.641\n", - "epoch: 12300, acc: 0.733, loss: 0.639\n", - "epoch: 12400, acc: 0.733, loss: 0.637\n", - "epoch: 12500, acc: 0.727, loss: 0.634\n", - "epoch: 12600, acc: 0.723, loss: 0.630\n", - "epoch: 12700, acc: 0.733, loss: 0.627\n", - "epoch: 12800, acc: 0.733, loss: 0.626\n", - "epoch: 12900, acc: 0.737, loss: 0.622\n", - "epoch: 13000, acc: 0.737, loss: 0.619\n", - "epoch: 13100, acc: 0.740, loss: 0.617\n", - "epoch: 13200, acc: 0.743, loss: 0.614\n", - "epoch: 13300, acc: 0.743, loss: 0.611\n", - "epoch: 13400, acc: 0.740, loss: 0.608\n", - "epoch: 13500, acc: 0.740, loss: 0.606\n", - "epoch: 13600, acc: 0.737, loss: 0.604\n", - "epoch: 13700, acc: 0.743, loss: 0.602\n", - "epoch: 13800, acc: 0.747, loss: 0.600\n", - "epoch: 13900, acc: 0.747, loss: 0.598\n", - "epoch: 14000, acc: 0.757, loss: 0.596\n", - "epoch: 14100, acc: 0.753, loss: 0.594\n", - "epoch: 14200, acc: 0.760, loss: 0.592\n", - "epoch: 14300, acc: 0.760, loss: 0.590\n", - "epoch: 14400, acc: 0.757, loss: 0.589\n", - "epoch: 14500, acc: 0.763, loss: 0.587\n", - "epoch: 14600, acc: 0.763, loss: 0.585\n", - "epoch: 14700, acc: 0.773, loss: 0.584\n", - "epoch: 14800, acc: 0.777, loss: 0.582\n", - "epoch: 14900, acc: 0.777, loss: 0.580\n", - "epoch: 15000, acc: 0.783, loss: 0.579\n", - "epoch: 15100, acc: 0.783, loss: 0.577\n", - "epoch: 15200, acc: 0.783, loss: 0.576\n", - "epoch: 15300, acc: 0.790, loss: 0.575\n", - "epoch: 15400, acc: 0.787, loss: 0.573\n", - "epoch: 15500, acc: 0.790, loss: 0.572\n", - "epoch: 15600, acc: 0.790, loss: 0.570\n", - "epoch: 15700, acc: 0.790, loss: 0.569\n", - "epoch: 15800, acc: 0.793, loss: 0.568\n", - "epoch: 15900, acc: 0.793, loss: 0.566\n", - "epoch: 16000, acc: 0.793, loss: 0.565\n", - "epoch: 16100, acc: 0.793, loss: 0.564\n", - "epoch: 16200, acc: 0.790, loss: 0.562\n", - "epoch: 16300, acc: 0.790, loss: 0.561\n", - "epoch: 16400, acc: 0.790, loss: 0.560\n", - "epoch: 16500, acc: 0.790, loss: 0.559\n", - "epoch: 16600, acc: 0.793, loss: 0.558\n", - "epoch: 16700, acc: 0.793, loss: 0.557\n", - "epoch: 16800, acc: 0.793, loss: 0.555\n", - "epoch: 16900, acc: 0.797, loss: 0.554\n", - "epoch: 17000, acc: 0.797, loss: 0.553\n", - "epoch: 17100, acc: 0.797, loss: 0.552\n", - "epoch: 17200, acc: 0.797, loss: 0.551\n", - "epoch: 17300, acc: 0.797, loss: 0.550\n", - "epoch: 17400, acc: 0.797, loss: 0.549\n", - "epoch: 17500, acc: 0.797, loss: 0.548\n", - "epoch: 17600, acc: 0.797, loss: 0.547\n", - "epoch: 17700, acc: 0.797, loss: 0.546\n", - "epoch: 17800, acc: 0.797, loss: 0.545\n", - "epoch: 17900, acc: 0.793, loss: 0.544\n", - "epoch: 18000, acc: 0.790, loss: 0.543\n", - "epoch: 18100, acc: 0.793, loss: 0.542\n", - "epoch: 18200, acc: 0.793, loss: 0.542\n", - "epoch: 18300, acc: 0.793, loss: 0.541\n", - "epoch: 18400, acc: 0.793, loss: 0.540\n", - "epoch: 18500, acc: 0.793, loss: 0.539\n", - "epoch: 18600, acc: 0.793, loss: 0.538\n", - "epoch: 18700, acc: 0.790, loss: 0.537\n", - "epoch: 18800, acc: 0.793, loss: 0.536\n", - "epoch: 18900, acc: 0.793, loss: 0.535\n", - "epoch: 19000, acc: 0.793, loss: 0.535\n", - "epoch: 19100, acc: 0.793, loss: 0.534\n", - "epoch: 19200, acc: 0.793, loss: 0.533\n", - "epoch: 19300, acc: 0.793, loss: 0.532\n", - "epoch: 19400, acc: 0.793, loss: 0.531\n", - "epoch: 19500, acc: 0.793, loss: 0.531\n", - "epoch: 19600, acc: 0.793, loss: 0.530\n", - "epoch: 19700, acc: 0.793, loss: 0.529\n", - "epoch: 19800, acc: 0.793, loss: 0.528\n", - "epoch: 19900, acc: 0.793, loss: 0.528\n", - "epoch: 20000, acc: 0.793, loss: 0.527\n", - "epoch: 20100, acc: 0.797, loss: 0.526\n", - "epoch: 20200, acc: 0.797, loss: 0.525\n", - "epoch: 20300, acc: 0.797, loss: 0.525\n", - "epoch: 20400, acc: 0.797, loss: 0.524\n", - "epoch: 20500, acc: 0.797, loss: 0.523\n", - "epoch: 20600, acc: 0.797, loss: 0.523\n", - "epoch: 20700, acc: 0.797, loss: 0.522\n", - "epoch: 20800, acc: 0.800, loss: 0.521\n", - "epoch: 20900, acc: 0.800, loss: 0.521\n", - "epoch: 21000, acc: 0.800, loss: 0.520\n", - "epoch: 21100, acc: 0.800, loss: 0.519\n", - "epoch: 21200, acc: 0.800, loss: 0.519\n", - "epoch: 21300, acc: 0.800, loss: 0.518\n", - "epoch: 21400, acc: 0.800, loss: 0.518\n", - "epoch: 21500, acc: 0.800, loss: 0.517\n", - "epoch: 21600, acc: 0.800, loss: 0.516\n", - "epoch: 21700, acc: 0.800, loss: 0.516\n", - "epoch: 21800, acc: 0.800, loss: 0.515\n", - "epoch: 21900, acc: 0.800, loss: 0.515\n", - "epoch: 22000, acc: 0.800, loss: 0.514\n", - "epoch: 22100, acc: 0.797, loss: 0.513\n", - "epoch: 22200, acc: 0.800, loss: 0.513\n", - "epoch: 22300, acc: 0.797, loss: 0.512\n", - "epoch: 22400, acc: 0.797, loss: 0.512\n", - "epoch: 22500, acc: 0.797, loss: 0.511\n", - "epoch: 22600, acc: 0.797, loss: 0.510\n", - "epoch: 22700, acc: 0.797, loss: 0.510\n", - "epoch: 22800, acc: 0.797, loss: 0.509\n", - "epoch: 22900, acc: 0.797, loss: 0.509\n", - "epoch: 23000, acc: 0.797, loss: 0.508\n", - "epoch: 23100, acc: 0.800, loss: 0.508\n", - "epoch: 23200, acc: 0.797, loss: 0.507\n", - "epoch: 23300, acc: 0.797, loss: 0.507\n", - "epoch: 23400, acc: 0.797, loss: 0.506\n", - "epoch: 23500, acc: 0.797, loss: 0.506\n", - "epoch: 23600, acc: 0.800, loss: 0.505\n", - "epoch: 23700, acc: 0.803, loss: 0.505\n", - "epoch: 23800, acc: 0.803, loss: 0.504\n", - "epoch: 23900, acc: 0.800, loss: 0.504\n", - "epoch: 24000, acc: 0.803, loss: 0.503\n", - "epoch: 24100, acc: 0.807, loss: 0.503\n", - "epoch: 24200, acc: 0.807, loss: 0.502\n", - "epoch: 24300, acc: 0.807, loss: 0.502\n", - "epoch: 24400, acc: 0.807, loss: 0.501\n", - "epoch: 24500, acc: 0.807, loss: 0.501\n", - "epoch: 24600, acc: 0.807, loss: 0.500\n", - "epoch: 24700, acc: 0.807, loss: 0.500\n", - "epoch: 24800, acc: 0.807, loss: 0.499\n", - "epoch: 24900, acc: 0.807, loss: 0.499\n", - "epoch: 25000, acc: 0.807, loss: 0.498\n", - "epoch: 25100, acc: 0.807, loss: 0.498\n", - "epoch: 25200, acc: 0.807, loss: 0.497\n", - "epoch: 25300, acc: 0.807, loss: 0.497\n", - "epoch: 25400, acc: 0.807, loss: 0.497\n", - "epoch: 25500, acc: 0.803, loss: 0.496\n", - "epoch: 25600, acc: 0.803, loss: 0.496\n", - "epoch: 25700, acc: 0.803, loss: 0.495\n", - "epoch: 25800, acc: 0.803, loss: 0.495\n", - "epoch: 25900, acc: 0.803, loss: 0.494\n", - "epoch: 26000, acc: 0.800, loss: 0.494\n", - "epoch: 26100, acc: 0.803, loss: 0.493\n", - "epoch: 26200, acc: 0.800, loss: 0.493\n", - "epoch: 26300, acc: 0.800, loss: 0.492\n", - "epoch: 26400, acc: 0.800, loss: 0.492\n", - "epoch: 26500, acc: 0.800, loss: 0.492\n", - "epoch: 26600, acc: 0.800, loss: 0.491\n", - "epoch: 26700, acc: 0.803, loss: 0.491\n", - "epoch: 26800, acc: 0.800, loss: 0.490\n", - "epoch: 26900, acc: 0.800, loss: 0.490\n", - "epoch: 27000, acc: 0.800, loss: 0.490\n", - "epoch: 27100, acc: 0.803, loss: 0.489\n", - "epoch: 27200, acc: 0.803, loss: 0.489\n", - "epoch: 27300, acc: 0.803, loss: 0.488\n", - "epoch: 27400, acc: 0.803, loss: 0.488\n", - "epoch: 27500, acc: 0.803, loss: 0.488\n", - "epoch: 27600, acc: 0.803, loss: 0.487\n", - "epoch: 27700, acc: 0.803, loss: 0.487\n", - "epoch: 27800, acc: 0.803, loss: 0.487\n", - "epoch: 27900, acc: 0.803, loss: 0.486\n", - "epoch: 28000, acc: 0.803, loss: 0.486\n", - "epoch: 28100, acc: 0.803, loss: 0.485\n", - "epoch: 28200, acc: 0.803, loss: 0.485\n", - "epoch: 28300, acc: 0.803, loss: 0.485\n", - "epoch: 28400, acc: 0.803, loss: 0.484\n", - "epoch: 28500, acc: 0.803, loss: 0.484\n", - "epoch: 28600, acc: 0.803, loss: 0.484\n", - "epoch: 28700, acc: 0.800, loss: 0.483\n", - "epoch: 28800, acc: 0.803, loss: 0.483\n", - "epoch: 28900, acc: 0.800, loss: 0.483\n", - "epoch: 29000, acc: 0.800, loss: 0.482\n", - "epoch: 29100, acc: 0.800, loss: 0.482\n", - "epoch: 29200, acc: 0.800, loss: 0.482\n", - "epoch: 29300, acc: 0.800, loss: 0.481\n", - "epoch: 29400, acc: 0.800, loss: 0.481\n", - "epoch: 29500, acc: 0.800, loss: 0.481\n", - "epoch: 29600, acc: 0.800, loss: 0.480\n", - "epoch: 29700, acc: 0.800, loss: 0.480\n", - "epoch: 29800, acc: 0.800, loss: 0.480\n", - "epoch: 29900, acc: 0.800, loss: 0.479\n", - "epoch: 30000, acc: 0.800, loss: 0.479\n", - "epoch: 30100, acc: 0.800, loss: 0.479\n", - "epoch: 30200, acc: 0.800, loss: 0.478\n", - "epoch: 30300, acc: 0.800, loss: 0.478\n", - "epoch: 30400, acc: 0.800, loss: 0.478\n", - "epoch: 30500, acc: 0.800, loss: 0.477\n", - "epoch: 30600, acc: 0.800, loss: 0.477\n", - "epoch: 30700, acc: 0.800, loss: 0.477\n", - "epoch: 30800, acc: 0.800, loss: 0.476\n", - "epoch: 30900, acc: 0.800, loss: 0.476\n", - "epoch: 31000, acc: 0.800, loss: 0.476\n", - "epoch: 31100, acc: 0.800, loss: 0.476\n", - "epoch: 31200, acc: 0.800, loss: 0.475\n", - "epoch: 31300, acc: 0.800, loss: 0.475\n", - "epoch: 31400, acc: 0.800, loss: 0.475\n", - "epoch: 31500, acc: 0.800, loss: 0.474\n", - "epoch: 31600, acc: 0.800, loss: 0.474\n", - "epoch: 31700, acc: 0.800, loss: 0.474\n", - "epoch: 31800, acc: 0.800, loss: 0.473\n", - "epoch: 31900, acc: 0.800, loss: 0.473\n", - "epoch: 32000, acc: 0.800, loss: 0.473\n", - "epoch: 32100, acc: 0.800, loss: 0.473\n", - "epoch: 32200, acc: 0.800, loss: 0.472\n", - "epoch: 32300, acc: 0.800, loss: 0.472\n", - "epoch: 32400, acc: 0.800, loss: 0.472\n", - "epoch: 32500, acc: 0.800, loss: 0.471\n", - "epoch: 32600, acc: 0.800, loss: 0.471\n", - "epoch: 32700, acc: 0.800, loss: 0.471\n", - "epoch: 32800, acc: 0.800, loss: 0.471\n", - "epoch: 32900, acc: 0.800, loss: 0.470\n", - "epoch: 33000, acc: 0.800, loss: 0.470\n", - "epoch: 33100, acc: 0.800, loss: 0.470\n", - "epoch: 33200, acc: 0.800, loss: 0.469\n", - "epoch: 33300, acc: 0.800, loss: 0.469\n", - "epoch: 33400, acc: 0.800, loss: 0.469\n", - "epoch: 33500, acc: 0.800, loss: 0.469\n", - "epoch: 33600, acc: 0.800, loss: 0.468\n", - "epoch: 33700, acc: 0.800, loss: 0.468\n", - "epoch: 33800, acc: 0.800, loss: 0.468\n", - "epoch: 33900, acc: 0.800, loss: 0.468\n", - "epoch: 34000, acc: 0.800, loss: 0.467\n", - "epoch: 34100, acc: 0.800, loss: 0.467\n", - "epoch: 34200, acc: 0.800, loss: 0.467\n", - "epoch: 34300, acc: 0.800, loss: 0.467\n", - "epoch: 34400, acc: 0.800, loss: 0.466\n", - "epoch: 34500, acc: 0.800, loss: 0.466\n", - "epoch: 34600, acc: 0.800, loss: 0.466\n", - "epoch: 34700, acc: 0.800, loss: 0.465\n", - "epoch: 34800, acc: 0.800, loss: 0.465\n", - "epoch: 34900, acc: 0.800, loss: 0.465\n", - "epoch: 35000, acc: 0.800, loss: 0.464\n", - "epoch: 35100, acc: 0.800, loss: 0.464\n", - "epoch: 35200, acc: 0.800, loss: 0.464\n", - "epoch: 35300, acc: 0.800, loss: 0.464\n", - "epoch: 35400, acc: 0.800, loss: 0.463\n", - "epoch: 35500, acc: 0.800, loss: 0.463\n", - "epoch: 35600, acc: 0.800, loss: 0.463\n", - "epoch: 35700, acc: 0.800, loss: 0.462\n", - "epoch: 35800, acc: 0.800, loss: 0.462\n", - "epoch: 35900, acc: 0.800, loss: 0.462\n", - "epoch: 36000, acc: 0.800, loss: 0.462\n", - "epoch: 36100, acc: 0.800, loss: 0.461\n", - "epoch: 36200, acc: 0.800, loss: 0.461\n", - "epoch: 36300, acc: 0.800, loss: 0.461\n", - "epoch: 36400, acc: 0.800, loss: 0.460\n", - "epoch: 36500, acc: 0.800, loss: 0.460\n", - "epoch: 36600, acc: 0.800, loss: 0.460\n", - "epoch: 36700, acc: 0.800, loss: 0.460\n", - "epoch: 36800, acc: 0.800, loss: 0.459\n", - "epoch: 36900, acc: 0.800, loss: 0.459\n", - "epoch: 37000, acc: 0.800, loss: 0.459\n", - "epoch: 37100, acc: 0.800, loss: 0.458\n", - "epoch: 37200, acc: 0.797, loss: 0.458\n", - "epoch: 37300, acc: 0.800, loss: 0.458\n", - "epoch: 37400, acc: 0.800, loss: 0.457\n", - "epoch: 37500, acc: 0.800, loss: 0.457\n", - "epoch: 37600, acc: 0.800, loss: 0.457\n", - "epoch: 37700, acc: 0.800, loss: 0.456\n", - "epoch: 37800, acc: 0.800, loss: 0.456\n", - "epoch: 37900, acc: 0.800, loss: 0.456\n", - "epoch: 38000, acc: 0.800, loss: 0.455\n", - "epoch: 38100, acc: 0.800, loss: 0.455\n", - "epoch: 38200, acc: 0.800, loss: 0.455\n", - "epoch: 38300, acc: 0.800, loss: 0.454\n", - "epoch: 38400, acc: 0.800, loss: 0.454\n", - "epoch: 38500, acc: 0.800, loss: 0.454\n", - "epoch: 38600, acc: 0.800, loss: 0.454\n", - "epoch: 38700, acc: 0.800, loss: 0.453\n", - "epoch: 38800, acc: 0.800, loss: 0.453\n", - "epoch: 38900, acc: 0.800, loss: 0.453\n", - "epoch: 39000, acc: 0.800, loss: 0.453\n", - "epoch: 39100, acc: 0.800, loss: 0.452\n", - "epoch: 39200, acc: 0.800, loss: 0.452\n", - "epoch: 39300, acc: 0.800, loss: 0.452\n", - "epoch: 39400, acc: 0.800, loss: 0.451\n", - "epoch: 39500, acc: 0.800, loss: 0.451\n", - "epoch: 39600, acc: 0.800, loss: 0.451\n", - "epoch: 39700, acc: 0.803, loss: 0.451\n", - "epoch: 39800, acc: 0.807, loss: 0.450\n", - "epoch: 39900, acc: 0.807, loss: 0.450\n", - "epoch: 40000, acc: 0.807, loss: 0.450\n", - "epoch: 40100, acc: 0.807, loss: 0.450\n", - "epoch: 40200, acc: 0.807, loss: 0.449\n", - "epoch: 40300, acc: 0.807, loss: 0.449\n", - "epoch: 40400, acc: 0.807, loss: 0.449\n", - "epoch: 40500, acc: 0.803, loss: 0.449\n", - "epoch: 40600, acc: 0.803, loss: 0.448\n", - "epoch: 40700, acc: 0.803, loss: 0.448\n", - "epoch: 40800, acc: 0.803, loss: 0.448\n", - "epoch: 40900, acc: 0.807, loss: 0.448\n", - "epoch: 41000, acc: 0.807, loss: 0.447\n", - "epoch: 41100, acc: 0.807, loss: 0.447\n", - "epoch: 41200, acc: 0.807, loss: 0.447\n", - "epoch: 41300, acc: 0.807, loss: 0.447\n", - "epoch: 41400, acc: 0.807, loss: 0.446\n", - "epoch: 41500, acc: 0.807, loss: 0.446\n", - "epoch: 41600, acc: 0.810, loss: 0.446\n", - "epoch: 41700, acc: 0.807, loss: 0.446\n", - "epoch: 41800, acc: 0.807, loss: 0.446\n", - "epoch: 41900, acc: 0.807, loss: 0.445\n", - "epoch: 42000, acc: 0.807, loss: 0.445\n", - "epoch: 42100, acc: 0.807, loss: 0.445\n", - "epoch: 42200, acc: 0.807, loss: 0.445\n", - "epoch: 42300, acc: 0.807, loss: 0.444\n", - "epoch: 42400, acc: 0.807, loss: 0.444\n", - "epoch: 42500, acc: 0.807, loss: 0.444\n", - "epoch: 42600, acc: 0.807, loss: 0.444\n", - "epoch: 42700, acc: 0.807, loss: 0.444\n", - "epoch: 42800, acc: 0.807, loss: 0.443\n", - "epoch: 42900, acc: 0.807, loss: 0.443\n", - "epoch: 43000, acc: 0.807, loss: 0.443\n", - "epoch: 43100, acc: 0.807, loss: 0.443\n", - "epoch: 43200, acc: 0.807, loss: 0.443\n", - "epoch: 43300, acc: 0.807, loss: 0.442\n", - "epoch: 43400, acc: 0.807, loss: 0.442\n", - "epoch: 43500, acc: 0.807, loss: 0.442\n", - "epoch: 43600, acc: 0.807, loss: 0.442\n", - "epoch: 43700, acc: 0.807, loss: 0.442\n", - "epoch: 43800, acc: 0.807, loss: 0.441\n", - "epoch: 43900, acc: 0.807, loss: 0.441\n", - "epoch: 44000, acc: 0.807, loss: 0.441\n", - "epoch: 44100, acc: 0.807, loss: 0.441\n", - "epoch: 44200, acc: 0.807, loss: 0.441\n", - "epoch: 44300, acc: 0.807, loss: 0.440\n", - "epoch: 44400, acc: 0.807, loss: 0.440\n", - "epoch: 44500, acc: 0.807, loss: 0.440\n", - "epoch: 44600, acc: 0.807, loss: 0.440\n", - "epoch: 44700, acc: 0.807, loss: 0.440\n", - "epoch: 44800, acc: 0.807, loss: 0.439\n", - "epoch: 44900, acc: 0.810, loss: 0.439\n", - "epoch: 45000, acc: 0.807, loss: 0.439\n", - "epoch: 45100, acc: 0.807, loss: 0.439\n", - "epoch: 45200, acc: 0.810, loss: 0.439\n", - "epoch: 45300, acc: 0.810, loss: 0.438\n", - "epoch: 45400, acc: 0.810, loss: 0.438\n", - "epoch: 45500, acc: 0.810, loss: 0.438\n", - "epoch: 45600, acc: 0.810, loss: 0.438\n", - "epoch: 45700, acc: 0.810, loss: 0.438\n", - "epoch: 45800, acc: 0.810, loss: 0.437\n", - "epoch: 45900, acc: 0.810, loss: 0.437\n", - "epoch: 46000, acc: 0.810, loss: 0.437\n", - "epoch: 46100, acc: 0.810, loss: 0.437\n", - "epoch: 46200, acc: 0.810, loss: 0.437\n", - "epoch: 46300, acc: 0.810, loss: 0.437\n", - "epoch: 46400, acc: 0.810, loss: 0.436\n", - "epoch: 46500, acc: 0.810, loss: 0.436\n", - "epoch: 46600, acc: 0.810, loss: 0.436\n", - "epoch: 46700, acc: 0.810, loss: 0.436\n", - "epoch: 46800, acc: 0.810, loss: 0.436\n", - "epoch: 46900, acc: 0.810, loss: 0.435\n", - "epoch: 47000, acc: 0.810, loss: 0.435\n", - "epoch: 47100, acc: 0.810, loss: 0.435\n", - "epoch: 47200, acc: 0.813, loss: 0.435\n", - "epoch: 47300, acc: 0.813, loss: 0.435\n", - "epoch: 47400, acc: 0.813, loss: 0.435\n", - "epoch: 47500, acc: 0.813, loss: 0.434\n", - "epoch: 47600, acc: 0.813, loss: 0.434\n", - "epoch: 47700, acc: 0.813, loss: 0.434\n", - "epoch: 47800, acc: 0.813, loss: 0.434\n", - "epoch: 47900, acc: 0.817, loss: 0.434\n", - "epoch: 48000, acc: 0.817, loss: 0.433\n", - "epoch: 48100, acc: 0.817, loss: 0.433\n", - "epoch: 48200, acc: 0.813, loss: 0.433\n", - "epoch: 48300, acc: 0.817, loss: 0.433\n", - "epoch: 48400, acc: 0.817, loss: 0.433\n", - "epoch: 48500, acc: 0.813, loss: 0.433\n", - "epoch: 48600, acc: 0.817, loss: 0.432\n", - "epoch: 48700, acc: 0.817, loss: 0.432\n", - "epoch: 48800, acc: 0.817, loss: 0.432\n", - "epoch: 48900, acc: 0.817, loss: 0.432\n", - "epoch: 49000, acc: 0.817, loss: 0.432\n", - "epoch: 49100, acc: 0.817, loss: 0.432\n", - "epoch: 49200, acc: 0.817, loss: 0.431\n", - "epoch: 49300, acc: 0.817, loss: 0.431\n", - "epoch: 49400, acc: 0.817, loss: 0.431\n", - "epoch: 49500, acc: 0.817, loss: 0.431\n", - "epoch: 49600, acc: 0.817, loss: 0.431\n", - "epoch: 49700, acc: 0.817, loss: 0.431\n", - "epoch: 49800, acc: 0.817, loss: 0.430\n", - "epoch: 49900, acc: 0.817, loss: 0.430\n", - "epoch: 50000, acc: 0.817, loss: 0.430\n", - "epoch: 50100, acc: 0.820, loss: 0.430\n", - "epoch: 50200, acc: 0.820, loss: 0.430\n", - "epoch: 50300, acc: 0.820, loss: 0.429\n", - "epoch: 50400, acc: 0.820, loss: 0.429\n", - "epoch: 50500, acc: 0.820, loss: 0.429\n", - "epoch: 50600, acc: 0.820, loss: 0.429\n", - "epoch: 50700, acc: 0.820, loss: 0.429\n", - "epoch: 50800, acc: 0.820, loss: 0.429\n", - "epoch: 50900, acc: 0.820, loss: 0.428\n", - "epoch: 51000, acc: 0.820, loss: 0.428\n", - "epoch: 51100, acc: 0.820, loss: 0.428\n", - "epoch: 51200, acc: 0.820, loss: 0.428\n", - "epoch: 51300, acc: 0.820, loss: 0.428\n", - "epoch: 51400, acc: 0.820, loss: 0.428\n", - "epoch: 51500, acc: 0.820, loss: 0.427\n", - "epoch: 51600, acc: 0.820, loss: 0.427\n", - "epoch: 51700, acc: 0.820, loss: 0.427\n", - "epoch: 51800, acc: 0.820, loss: 0.427\n", - "epoch: 51900, acc: 0.820, loss: 0.427\n", - "epoch: 52000, acc: 0.820, loss: 0.427\n", - "epoch: 52100, acc: 0.820, loss: 0.426\n", - "epoch: 52200, acc: 0.820, loss: 0.426\n", - "epoch: 52300, acc: 0.820, loss: 0.426\n", - "epoch: 52400, acc: 0.820, loss: 0.426\n", - "epoch: 52500, acc: 0.820, loss: 0.426\n", - "epoch: 52600, acc: 0.820, loss: 0.426\n", - "epoch: 52700, acc: 0.820, loss: 0.426\n", - "epoch: 52800, acc: 0.820, loss: 0.425\n", - "epoch: 52900, acc: 0.820, loss: 0.425\n", - "epoch: 53000, acc: 0.820, loss: 0.425\n", - "epoch: 53100, acc: 0.820, loss: 0.425\n", - "epoch: 53200, acc: 0.820, loss: 0.425\n", - "epoch: 53300, acc: 0.820, loss: 0.425\n", - "epoch: 53400, acc: 0.820, loss: 0.424\n", - "epoch: 53500, acc: 0.820, loss: 0.424\n", - "epoch: 53600, acc: 0.820, loss: 0.424\n", - "epoch: 53700, acc: 0.820, loss: 0.424\n", - "epoch: 53800, acc: 0.820, loss: 0.424\n", - "epoch: 53900, acc: 0.820, loss: 0.424\n", - "epoch: 54000, acc: 0.820, loss: 0.424\n", - "epoch: 54100, acc: 0.820, loss: 0.423\n", - "epoch: 54200, acc: 0.820, loss: 0.423\n", - "epoch: 54300, acc: 0.820, loss: 0.423\n", - "epoch: 54400, acc: 0.820, loss: 0.423\n", - "epoch: 54500, acc: 0.820, loss: 0.423\n", - "epoch: 54600, acc: 0.823, loss: 0.423\n", - "epoch: 54700, acc: 0.823, loss: 0.422\n", - "epoch: 54800, acc: 0.823, loss: 0.422\n", - "epoch: 54900, acc: 0.823, loss: 0.422\n", - "epoch: 55000, acc: 0.823, loss: 0.422\n", - "epoch: 55100, acc: 0.823, loss: 0.422\n", - "epoch: 55200, acc: 0.823, loss: 0.422\n", - "epoch: 55300, acc: 0.823, loss: 0.422\n", - "epoch: 55400, acc: 0.823, loss: 0.421\n", - "epoch: 55500, acc: 0.823, loss: 0.421\n", - "epoch: 55600, acc: 0.823, loss: 0.421\n", - "epoch: 55700, acc: 0.823, loss: 0.421\n", - "epoch: 55800, acc: 0.823, loss: 0.421\n", - "epoch: 55900, acc: 0.823, loss: 0.421\n", - "epoch: 56000, acc: 0.823, loss: 0.421\n", - "epoch: 56100, acc: 0.823, loss: 0.420\n", - "epoch: 56200, acc: 0.823, loss: 0.420\n", - "epoch: 56300, acc: 0.823, loss: 0.420\n", - "epoch: 56400, acc: 0.823, loss: 0.420\n", - "epoch: 56500, acc: 0.823, loss: 0.420\n", - "epoch: 56600, acc: 0.823, loss: 0.420\n", - "epoch: 56700, acc: 0.823, loss: 0.420\n", - "epoch: 56800, acc: 0.823, loss: 0.419\n", - "epoch: 56900, acc: 0.820, loss: 0.419\n", - "epoch: 57000, acc: 0.820, loss: 0.419\n", - "epoch: 57100, acc: 0.820, loss: 0.419\n", - "epoch: 57200, acc: 0.820, loss: 0.419\n", - "epoch: 57300, acc: 0.820, loss: 0.419\n", - "epoch: 57400, acc: 0.820, loss: 0.419\n", - "epoch: 57500, acc: 0.820, loss: 0.418\n", - "epoch: 57600, acc: 0.820, loss: 0.418\n", - "epoch: 57700, acc: 0.820, loss: 0.418\n", - "epoch: 57800, acc: 0.820, loss: 0.418\n", - "epoch: 57900, acc: 0.820, loss: 0.418\n", - "epoch: 58000, acc: 0.820, loss: 0.418\n", - "epoch: 58100, acc: 0.820, loss: 0.418\n", - "epoch: 58200, acc: 0.820, loss: 0.417\n", - "epoch: 58300, acc: 0.820, loss: 0.417\n", - "epoch: 58400, acc: 0.820, loss: 0.417\n", - "epoch: 58500, acc: 0.820, loss: 0.417\n", - "epoch: 58600, acc: 0.820, loss: 0.417\n", - "epoch: 58700, acc: 0.823, loss: 0.417\n", - "epoch: 58800, acc: 0.823, loss: 0.417\n", - "epoch: 58900, acc: 0.823, loss: 0.417\n", - "epoch: 59000, acc: 0.823, loss: 0.416\n", - "epoch: 59100, acc: 0.823, loss: 0.416\n", - "epoch: 59200, acc: 0.823, loss: 0.416\n", - "epoch: 59300, acc: 0.823, loss: 0.416\n", - "epoch: 59400, acc: 0.823, loss: 0.416\n", - "epoch: 59500, acc: 0.823, loss: 0.416\n", - "epoch: 59600, acc: 0.823, loss: 0.416\n", - "epoch: 59700, acc: 0.823, loss: 0.415\n", - "epoch: 59800, acc: 0.823, loss: 0.415\n", - "epoch: 59900, acc: 0.823, loss: 0.415\n", - "epoch: 60000, acc: 0.823, loss: 0.415\n", - "epoch: 60100, acc: 0.823, loss: 0.415\n", - "epoch: 60200, acc: 0.823, loss: 0.415\n", - "epoch: 60300, acc: 0.820, loss: 0.415\n", - "epoch: 60400, acc: 0.820, loss: 0.415\n", - "epoch: 60500, acc: 0.820, loss: 0.414\n", - "epoch: 60600, acc: 0.820, loss: 0.414\n", - "epoch: 60700, acc: 0.823, loss: 0.414\n", - "epoch: 60800, acc: 0.820, loss: 0.414\n", - "epoch: 60900, acc: 0.823, loss: 0.414\n", - "epoch: 61000, acc: 0.823, loss: 0.414\n", - "epoch: 61100, acc: 0.823, loss: 0.414\n", - "epoch: 61200, acc: 0.823, loss: 0.414\n", - "epoch: 61300, acc: 0.827, loss: 0.413\n", - "epoch: 61400, acc: 0.827, loss: 0.413\n", - "epoch: 61500, acc: 0.827, loss: 0.413\n", - "epoch: 61600, acc: 0.827, loss: 0.413\n", - "epoch: 61700, acc: 0.827, loss: 0.413\n", - "epoch: 61800, acc: 0.827, loss: 0.413\n", - "epoch: 61900, acc: 0.827, loss: 0.413\n", - "epoch: 62000, acc: 0.827, loss: 0.413\n", - "epoch: 62100, acc: 0.827, loss: 0.412\n", - "epoch: 62200, acc: 0.827, loss: 0.412\n", - "epoch: 62300, acc: 0.827, loss: 0.412\n", - "epoch: 62400, acc: 0.827, loss: 0.412\n", - "epoch: 62500, acc: 0.827, loss: 0.412\n", - "epoch: 62600, acc: 0.827, loss: 0.412\n", - "epoch: 62700, acc: 0.827, loss: 0.412\n", - "epoch: 62800, acc: 0.827, loss: 0.412\n", - "epoch: 62900, acc: 0.827, loss: 0.412\n", - "epoch: 63000, acc: 0.827, loss: 0.411\n", - "epoch: 63100, acc: 0.827, loss: 0.411\n", - "epoch: 63200, acc: 0.827, loss: 0.411\n", - "epoch: 63300, acc: 0.827, loss: 0.411\n", - "epoch: 63400, acc: 0.827, loss: 0.411\n", - "epoch: 63500, acc: 0.830, loss: 0.411\n", - "epoch: 63600, acc: 0.830, loss: 0.411\n", - "epoch: 63700, acc: 0.830, loss: 0.411\n", - "epoch: 63800, acc: 0.830, loss: 0.410\n", - "epoch: 63900, acc: 0.830, loss: 0.410\n", - "epoch: 64000, acc: 0.830, loss: 0.410\n", - "epoch: 64100, acc: 0.830, loss: 0.410\n", - "epoch: 64200, acc: 0.830, loss: 0.410\n", - "epoch: 64300, acc: 0.830, loss: 0.410\n", - "epoch: 64400, acc: 0.830, loss: 0.410\n", - "epoch: 64500, acc: 0.830, loss: 0.410\n", - "epoch: 64600, acc: 0.830, loss: 0.410\n", - "epoch: 64700, acc: 0.833, loss: 0.409\n", - "epoch: 64800, acc: 0.833, loss: 0.409\n", - "epoch: 64900, acc: 0.833, loss: 0.409\n", - "epoch: 65000, acc: 0.833, loss: 0.409\n", - "epoch: 65100, acc: 0.833, loss: 0.409\n", - "epoch: 65200, acc: 0.833, loss: 0.409\n", - "epoch: 65300, acc: 0.833, loss: 0.409\n", - "epoch: 65400, acc: 0.833, loss: 0.409\n", - "epoch: 65500, acc: 0.837, loss: 0.409\n", - "epoch: 65600, acc: 0.837, loss: 0.408\n", - "epoch: 65700, acc: 0.837, loss: 0.408\n", - "epoch: 65800, acc: 0.837, loss: 0.408\n", - "epoch: 65900, acc: 0.837, loss: 0.408\n", - "epoch: 66000, acc: 0.837, loss: 0.408\n", - "epoch: 66100, acc: 0.837, loss: 0.408\n", - "epoch: 66200, acc: 0.833, loss: 0.408\n", - "epoch: 66300, acc: 0.833, loss: 0.408\n", - "epoch: 66400, acc: 0.833, loss: 0.408\n", - "epoch: 66500, acc: 0.833, loss: 0.408\n", - "epoch: 66600, acc: 0.833, loss: 0.407\n", - "epoch: 66700, acc: 0.833, loss: 0.407\n", - "epoch: 66800, acc: 0.833, loss: 0.407\n", - "epoch: 66900, acc: 0.833, loss: 0.407\n", - "epoch: 67000, acc: 0.830, loss: 0.407\n", - "epoch: 67100, acc: 0.833, loss: 0.407\n", - "epoch: 67200, acc: 0.830, loss: 0.407\n", - "epoch: 67300, acc: 0.830, loss: 0.407\n", - "epoch: 67400, acc: 0.830, loss: 0.407\n", - "epoch: 67500, acc: 0.830, loss: 0.406\n", - "epoch: 67600, acc: 0.830, loss: 0.406\n", - "epoch: 67700, acc: 0.830, loss: 0.406\n", - "epoch: 67800, acc: 0.830, loss: 0.406\n", - "epoch: 67900, acc: 0.830, loss: 0.406\n", - "epoch: 68000, acc: 0.830, loss: 0.406\n", - "epoch: 68100, acc: 0.830, loss: 0.406\n", - "epoch: 68200, acc: 0.830, loss: 0.406\n", - "epoch: 68300, acc: 0.830, loss: 0.406\n", - "epoch: 68400, acc: 0.833, loss: 0.406\n", - "epoch: 68500, acc: 0.833, loss: 0.405\n", - "epoch: 68600, acc: 0.830, loss: 0.405\n", - "epoch: 68700, acc: 0.833, loss: 0.405\n", - "epoch: 68800, acc: 0.837, loss: 0.405\n", - "epoch: 68900, acc: 0.837, loss: 0.405\n", - "epoch: 69000, acc: 0.837, loss: 0.405\n", - "epoch: 69100, acc: 0.837, loss: 0.405\n", - "epoch: 69200, acc: 0.837, loss: 0.405\n", - "epoch: 69300, acc: 0.837, loss: 0.405\n", - "epoch: 69400, acc: 0.833, loss: 0.405\n", - "epoch: 69500, acc: 0.837, loss: 0.404\n", - "epoch: 69600, acc: 0.837, loss: 0.404\n", - "epoch: 69700, acc: 0.837, loss: 0.404\n", - "epoch: 69800, acc: 0.837, loss: 0.404\n", - "epoch: 69900, acc: 0.837, loss: 0.404\n", - "epoch: 70000, acc: 0.837, loss: 0.404\n", - "epoch: 70100, acc: 0.837, loss: 0.404\n", - "epoch: 70200, acc: 0.837, loss: 0.404\n", - "epoch: 70300, acc: 0.840, loss: 0.404\n", - "epoch: 70400, acc: 0.840, loss: 0.404\n", - "epoch: 70500, acc: 0.840, loss: 0.404\n", - "epoch: 70600, acc: 0.840, loss: 0.403\n", - "epoch: 70700, acc: 0.840, loss: 0.403\n", - "epoch: 70800, acc: 0.840, loss: 0.403\n", - "epoch: 70900, acc: 0.840, loss: 0.403\n", - "epoch: 71000, acc: 0.840, loss: 0.403\n", - "epoch: 71100, acc: 0.840, loss: 0.403\n", - "epoch: 71200, acc: 0.840, loss: 0.403\n", - "epoch: 71300, acc: 0.840, loss: 0.403\n", - "epoch: 71400, acc: 0.840, loss: 0.403\n", - "epoch: 71500, acc: 0.840, loss: 0.403\n", - "epoch: 71600, acc: 0.840, loss: 0.402\n", - "epoch: 71700, acc: 0.840, loss: 0.402\n", - "epoch: 71800, acc: 0.840, loss: 0.402\n", - "epoch: 71900, acc: 0.840, loss: 0.402\n", - "epoch: 72000, acc: 0.840, loss: 0.402\n", - "epoch: 72100, acc: 0.840, loss: 0.402\n", - "epoch: 72200, acc: 0.840, loss: 0.402\n", - "epoch: 72300, acc: 0.840, loss: 0.402\n", - "epoch: 72400, acc: 0.840, loss: 0.402\n", - "epoch: 72500, acc: 0.840, loss: 0.402\n", - "epoch: 72600, acc: 0.840, loss: 0.401\n", - "epoch: 72700, acc: 0.840, loss: 0.401\n", - "epoch: 72800, acc: 0.840, loss: 0.401\n", - "epoch: 72900, acc: 0.840, loss: 0.401\n", - "epoch: 73000, acc: 0.840, loss: 0.401\n", - "epoch: 73100, acc: 0.840, loss: 0.401\n", - "epoch: 73200, acc: 0.840, loss: 0.401\n", - "epoch: 73300, acc: 0.840, loss: 0.401\n", - "epoch: 73400, acc: 0.840, loss: 0.401\n", - "epoch: 73500, acc: 0.840, loss: 0.401\n", - "epoch: 73600, acc: 0.840, loss: 0.401\n", - "epoch: 73700, acc: 0.840, loss: 0.400\n", - "epoch: 73800, acc: 0.840, loss: 0.400\n", - "epoch: 73900, acc: 0.840, loss: 0.400\n", - "epoch: 74000, acc: 0.837, loss: 0.400\n", - "epoch: 74100, acc: 0.837, loss: 0.400\n", - "epoch: 74200, acc: 0.837, loss: 0.400\n", - "epoch: 74300, acc: 0.837, loss: 0.400\n", - "epoch: 74400, acc: 0.837, loss: 0.400\n", - "epoch: 74500, acc: 0.837, loss: 0.400\n", - "epoch: 74600, acc: 0.833, loss: 0.399\n", - "epoch: 74700, acc: 0.833, loss: 0.399\n", - "epoch: 74800, acc: 0.833, loss: 0.399\n", - "epoch: 74900, acc: 0.837, loss: 0.399\n", - "epoch: 75000, acc: 0.837, loss: 0.399\n", - "epoch: 75100, acc: 0.837, loss: 0.399\n", - "epoch: 75200, acc: 0.837, loss: 0.399\n", - "epoch: 75300, acc: 0.837, loss: 0.399\n", - "epoch: 75400, acc: 0.837, loss: 0.399\n", - "epoch: 75500, acc: 0.837, loss: 0.399\n", - "epoch: 75600, acc: 0.837, loss: 0.398\n", - "epoch: 75700, acc: 0.837, loss: 0.398\n", - "epoch: 75800, acc: 0.837, loss: 0.398\n", - "epoch: 75900, acc: 0.837, loss: 0.398\n", - "epoch: 76000, acc: 0.837, loss: 0.398\n", - "epoch: 76100, acc: 0.837, loss: 0.398\n", - "epoch: 76200, acc: 0.837, loss: 0.398\n", - "epoch: 76300, acc: 0.837, loss: 0.398\n", - "epoch: 76400, acc: 0.837, loss: 0.398\n", - "epoch: 76500, acc: 0.837, loss: 0.398\n", - "epoch: 76600, acc: 0.837, loss: 0.397\n", - "epoch: 76700, acc: 0.837, loss: 0.397\n", - "epoch: 76800, acc: 0.837, loss: 0.397\n", - "epoch: 76900, acc: 0.837, loss: 0.397\n", - "epoch: 77000, acc: 0.837, loss: 0.397\n", - "epoch: 77100, acc: 0.837, loss: 0.397\n", - "epoch: 77200, acc: 0.837, loss: 0.397\n", - "epoch: 77300, acc: 0.837, loss: 0.397\n", - "epoch: 77400, acc: 0.837, loss: 0.397\n", - "epoch: 77500, acc: 0.837, loss: 0.397\n", - "epoch: 77600, acc: 0.837, loss: 0.397\n", - "epoch: 77700, acc: 0.837, loss: 0.396\n", - "epoch: 77800, acc: 0.837, loss: 0.396\n", - "epoch: 77900, acc: 0.837, loss: 0.396\n", - "epoch: 78000, acc: 0.837, loss: 0.396\n", - "epoch: 78100, acc: 0.837, loss: 0.396\n", - "epoch: 78200, acc: 0.837, loss: 0.396\n", - "epoch: 78300, acc: 0.837, loss: 0.396\n", - "epoch: 78400, acc: 0.837, loss: 0.396\n", - "epoch: 78500, acc: 0.837, loss: 0.396\n", - "epoch: 78600, acc: 0.837, loss: 0.396\n", - "epoch: 78700, acc: 0.837, loss: 0.396\n", - "epoch: 78800, acc: 0.837, loss: 0.396\n", - "epoch: 78900, acc: 0.837, loss: 0.395\n", - "epoch: 79000, acc: 0.837, loss: 0.395\n", - "epoch: 79100, acc: 0.837, loss: 0.395\n", - "epoch: 79200, acc: 0.837, loss: 0.395\n", - "epoch: 79300, acc: 0.837, loss: 0.395\n", - "epoch: 79400, acc: 0.837, loss: 0.395\n", - "epoch: 79500, acc: 0.837, loss: 0.395\n", - "epoch: 79600, acc: 0.837, loss: 0.395\n", - "epoch: 79700, acc: 0.837, loss: 0.395\n", - "epoch: 79800, acc: 0.837, loss: 0.395\n", - "epoch: 79900, acc: 0.837, loss: 0.395\n", - "epoch: 80000, acc: 0.837, loss: 0.394\n", - "epoch: 80100, acc: 0.837, loss: 0.394\n", - "epoch: 80200, acc: 0.837, loss: 0.394\n", - "epoch: 80300, acc: 0.837, loss: 0.394\n", - "epoch: 80400, acc: 0.837, loss: 0.394\n", - "epoch: 80500, acc: 0.837, loss: 0.394\n", - "epoch: 80600, acc: 0.837, loss: 0.394\n", - "epoch: 80700, acc: 0.837, loss: 0.394\n", - "epoch: 80800, acc: 0.837, loss: 0.394\n", - "epoch: 80900, acc: 0.837, loss: 0.394\n", - "epoch: 81000, acc: 0.837, loss: 0.394\n", - "epoch: 81100, acc: 0.837, loss: 0.394\n", - "epoch: 81200, acc: 0.837, loss: 0.393\n", - "epoch: 81300, acc: 0.837, loss: 0.393\n", - "epoch: 81400, acc: 0.837, loss: 0.393\n", - "epoch: 81500, acc: 0.837, loss: 0.393\n", - "epoch: 81600, acc: 0.837, loss: 0.393\n", - "epoch: 81700, acc: 0.837, loss: 0.393\n", - "epoch: 81800, acc: 0.837, loss: 0.393\n", - "epoch: 81900, acc: 0.837, loss: 0.393\n", - "epoch: 82000, acc: 0.837, loss: 0.393\n", - "epoch: 82100, acc: 0.837, loss: 0.393\n", - "epoch: 82200, acc: 0.837, loss: 0.393\n", - "epoch: 82300, acc: 0.837, loss: 0.393\n", - "epoch: 82400, acc: 0.837, loss: 0.392\n", - "epoch: 82500, acc: 0.837, loss: 0.392\n", - "epoch: 82600, acc: 0.837, loss: 0.392\n", - "epoch: 82700, acc: 0.837, loss: 0.392\n", - "epoch: 82800, acc: 0.837, loss: 0.392\n", - "epoch: 82900, acc: 0.837, loss: 0.392\n", - "epoch: 83000, acc: 0.837, loss: 0.392\n", - "epoch: 83100, acc: 0.837, loss: 0.392\n", - "epoch: 83200, acc: 0.837, loss: 0.392\n", - "epoch: 83300, acc: 0.837, loss: 0.392\n", - "epoch: 83400, acc: 0.837, loss: 0.392\n", - "epoch: 83500, acc: 0.837, loss: 0.392\n", - "epoch: 83600, acc: 0.837, loss: 0.392\n", - "epoch: 83700, acc: 0.837, loss: 0.391\n", - "epoch: 83800, acc: 0.837, loss: 0.391\n", - "epoch: 83900, acc: 0.837, loss: 0.391\n", - "epoch: 84000, acc: 0.837, loss: 0.391\n", - "epoch: 84100, acc: 0.837, loss: 0.391\n", - "epoch: 84200, acc: 0.837, loss: 0.391\n", - "epoch: 84300, acc: 0.837, loss: 0.391\n", - "epoch: 84400, acc: 0.837, loss: 0.391\n", - "epoch: 84500, acc: 0.837, loss: 0.391\n", - "epoch: 84600, acc: 0.837, loss: 0.391\n", - "epoch: 84700, acc: 0.837, loss: 0.391\n", - "epoch: 84800, acc: 0.837, loss: 0.391\n", - "epoch: 84900, acc: 0.837, loss: 0.391\n", - "epoch: 85000, acc: 0.837, loss: 0.390\n", - "epoch: 85100, acc: 0.837, loss: 0.390\n", - "epoch: 85200, acc: 0.837, loss: 0.390\n", - "epoch: 85300, acc: 0.837, loss: 0.390\n", - "epoch: 85400, acc: 0.837, loss: 0.390\n", - "epoch: 85500, acc: 0.837, loss: 0.390\n", - "epoch: 85600, acc: 0.837, loss: 0.390\n", - "epoch: 85700, acc: 0.837, loss: 0.390\n", - "epoch: 85800, acc: 0.837, loss: 0.390\n", - "epoch: 85900, acc: 0.837, loss: 0.390\n", - "epoch: 86000, acc: 0.837, loss: 0.390\n", - "epoch: 86100, acc: 0.837, loss: 0.390\n", - "epoch: 86200, acc: 0.837, loss: 0.390\n", - "epoch: 86300, acc: 0.837, loss: 0.390\n", - "epoch: 86400, acc: 0.837, loss: 0.389\n", - "epoch: 86500, acc: 0.837, loss: 0.389\n", - "epoch: 86600, acc: 0.837, loss: 0.389\n", - "epoch: 86700, acc: 0.837, loss: 0.389\n", - "epoch: 86800, acc: 0.837, loss: 0.389\n", - "epoch: 86900, acc: 0.837, loss: 0.389\n", - "epoch: 87000, acc: 0.837, loss: 0.389\n", - "epoch: 87100, acc: 0.837, loss: 0.389\n", - "epoch: 87200, acc: 0.837, loss: 0.389\n", - "epoch: 87300, acc: 0.837, loss: 0.389\n", - "epoch: 87400, acc: 0.837, loss: 0.389\n", - "epoch: 87500, acc: 0.837, loss: 0.389\n", - "epoch: 87600, acc: 0.837, loss: 0.389\n", - "epoch: 87700, acc: 0.837, loss: 0.389\n", - "epoch: 87800, acc: 0.837, loss: 0.388\n", - "epoch: 87900, acc: 0.837, loss: 0.388\n", - "epoch: 88000, acc: 0.837, loss: 0.388\n", - "epoch: 88100, acc: 0.837, loss: 0.388\n", - "epoch: 88200, acc: 0.837, loss: 0.388\n", - "epoch: 88300, acc: 0.837, loss: 0.388\n", - "epoch: 88400, acc: 0.837, loss: 0.388\n", - "epoch: 88500, acc: 0.837, loss: 0.388\n", - "epoch: 88600, acc: 0.837, loss: 0.388\n", - "epoch: 88700, acc: 0.837, loss: 0.388\n", - "epoch: 88800, acc: 0.837, loss: 0.388\n", - "epoch: 88900, acc: 0.837, loss: 0.388\n", - "epoch: 89000, acc: 0.837, loss: 0.388\n", - "epoch: 89100, acc: 0.837, loss: 0.388\n", - "epoch: 89200, acc: 0.837, loss: 0.388\n", - "epoch: 89300, acc: 0.837, loss: 0.387\n", - "epoch: 89400, acc: 0.837, loss: 0.387\n", - "epoch: 89500, acc: 0.837, loss: 0.387\n", - "epoch: 89600, acc: 0.837, loss: 0.387\n", - "epoch: 89700, acc: 0.837, loss: 0.387\n", - "epoch: 89800, acc: 0.837, loss: 0.387\n", - "epoch: 89900, acc: 0.837, loss: 0.387\n", - "epoch: 90000, acc: 0.837, loss: 0.387\n", - "epoch: 90100, acc: 0.837, loss: 0.387\n", - "epoch: 90200, acc: 0.837, loss: 0.387\n", - "epoch: 90300, acc: 0.837, loss: 0.387\n", - "epoch: 90400, acc: 0.840, loss: 0.387\n", - "epoch: 90500, acc: 0.840, loss: 0.387\n", - "epoch: 90600, acc: 0.840, loss: 0.387\n", - "epoch: 90700, acc: 0.843, loss: 0.387\n", - "epoch: 90800, acc: 0.843, loss: 0.386\n", - "epoch: 90900, acc: 0.843, loss: 0.386\n", - "epoch: 91000, acc: 0.843, loss: 0.386\n", - "epoch: 91100, acc: 0.843, loss: 0.386\n", - "epoch: 91200, acc: 0.843, loss: 0.386\n", - "epoch: 91300, acc: 0.843, loss: 0.386\n", - "epoch: 91400, acc: 0.843, loss: 0.386\n", - "epoch: 91500, acc: 0.843, loss: 0.386\n", - "epoch: 91600, acc: 0.843, loss: 0.386\n", - "epoch: 91700, acc: 0.843, loss: 0.386\n", - "epoch: 91800, acc: 0.843, loss: 0.386\n", - "epoch: 91900, acc: 0.843, loss: 0.386\n", - "epoch: 92000, acc: 0.843, loss: 0.386\n", - "epoch: 92100, acc: 0.843, loss: 0.386\n", - "epoch: 92200, acc: 0.840, loss: 0.386\n", - "epoch: 92300, acc: 0.840, loss: 0.386\n", - "epoch: 92400, acc: 0.840, loss: 0.385\n", - "epoch: 92500, acc: 0.840, loss: 0.385\n", - "epoch: 92600, acc: 0.840, loss: 0.385\n", - "epoch: 92700, acc: 0.840, loss: 0.385\n", - "epoch: 92800, acc: 0.840, loss: 0.385\n", - "epoch: 92900, acc: 0.840, loss: 0.385\n", - "epoch: 93000, acc: 0.840, loss: 0.385\n", - "epoch: 93100, acc: 0.840, loss: 0.385\n", - "epoch: 93200, acc: 0.840, loss: 0.385\n", - "epoch: 93300, acc: 0.840, loss: 0.385\n", - "epoch: 93400, acc: 0.840, loss: 0.385\n", - "epoch: 93500, acc: 0.840, loss: 0.385\n", - "epoch: 93600, acc: 0.840, loss: 0.385\n", - "epoch: 93700, acc: 0.840, loss: 0.385\n", - "epoch: 93800, acc: 0.840, loss: 0.385\n", - "epoch: 93900, acc: 0.840, loss: 0.385\n", - "epoch: 94000, acc: 0.840, loss: 0.384\n", - "epoch: 94100, acc: 0.840, loss: 0.384\n", - "epoch: 94200, acc: 0.840, loss: 0.384\n", - "epoch: 94300, acc: 0.840, loss: 0.384\n", - "epoch: 94400, acc: 0.840, loss: 0.384\n", - "epoch: 94500, acc: 0.840, loss: 0.384\n", - "epoch: 94600, acc: 0.840, loss: 0.384\n", - "epoch: 94700, acc: 0.840, loss: 0.384\n", - "epoch: 94800, acc: 0.840, loss: 0.384\n", - "epoch: 94900, acc: 0.840, loss: 0.384\n", - "epoch: 95000, acc: 0.840, loss: 0.384\n", - "epoch: 95100, acc: 0.840, loss: 0.384\n", - "epoch: 95200, acc: 0.840, loss: 0.384\n", - "epoch: 95300, acc: 0.840, loss: 0.384\n", - "epoch: 95400, acc: 0.840, loss: 0.384\n", - "epoch: 95500, acc: 0.840, loss: 0.384\n", - "epoch: 95600, acc: 0.840, loss: 0.384\n", - "epoch: 95700, acc: 0.840, loss: 0.383\n", - "epoch: 95800, acc: 0.840, loss: 0.383\n", - "epoch: 95900, acc: 0.840, loss: 0.383\n", - "epoch: 96000, acc: 0.840, loss: 0.383\n", - "epoch: 96100, acc: 0.840, loss: 0.383\n", - "epoch: 96200, acc: 0.840, loss: 0.383\n", - "epoch: 96300, acc: 0.840, loss: 0.383\n", - "epoch: 96400, acc: 0.840, loss: 0.383\n", - "epoch: 96500, acc: 0.840, loss: 0.383\n", - "epoch: 96600, acc: 0.840, loss: 0.383\n", - "epoch: 96700, acc: 0.840, loss: 0.383\n", - "epoch: 96800, acc: 0.840, loss: 0.383\n", - "epoch: 96900, acc: 0.840, loss: 0.383\n", - "epoch: 97000, acc: 0.840, loss: 0.383\n", - "epoch: 97100, acc: 0.840, loss: 0.383\n", - "epoch: 97200, acc: 0.840, loss: 0.383\n", - "epoch: 97300, acc: 0.840, loss: 0.383\n", - "epoch: 97400, acc: 0.840, loss: 0.382\n", - "epoch: 97500, acc: 0.840, loss: 0.382\n", - "epoch: 97600, acc: 0.840, loss: 0.382\n", - "epoch: 97700, acc: 0.840, loss: 0.382\n", - "epoch: 97800, acc: 0.840, loss: 0.382\n", - "epoch: 97900, acc: 0.840, loss: 0.382\n", - "epoch: 98000, acc: 0.840, loss: 0.382\n", - "epoch: 98100, acc: 0.843, loss: 0.382\n", - "epoch: 98200, acc: 0.843, loss: 0.382\n", - "epoch: 98300, acc: 0.843, loss: 0.382\n", - "epoch: 98400, acc: 0.843, loss: 0.382\n", - "epoch: 98500, acc: 0.843, loss: 0.382\n", - "epoch: 98600, acc: 0.843, loss: 0.382\n", - "epoch: 98700, acc: 0.843, loss: 0.382\n", - "epoch: 98800, acc: 0.843, loss: 0.382\n", - "epoch: 98900, acc: 0.843, loss: 0.382\n", - "epoch: 99000, acc: 0.843, loss: 0.382\n", - "epoch: 99100, acc: 0.843, loss: 0.382\n", - "epoch: 99200, acc: 0.843, loss: 0.381\n", - "epoch: 99300, acc: 0.843, loss: 0.381\n", - "epoch: 99400, acc: 0.843, loss: 0.381\n", - "epoch: 99500, acc: 0.843, loss: 0.381\n", - "epoch: 99600, acc: 0.843, loss: 0.381\n", - "epoch: 99700, acc: 0.843, loss: 0.381\n", - "epoch: 99800, acc: 0.843, loss: 0.381\n", - "epoch: 99900, acc: 0.843, loss: 0.381\n", - "epoch: 100000, acc: 0.843, loss: 0.381\n" + "epoch: 500, acc: 0.403, loss: 1.074, lr: 0.66711140760507\n", + "epoch: 600, acc: 0.403, loss: 1.072, lr: 0.6253908692933083\n", + "epoch: 700, acc: 0.410, loss: 1.070, lr: 0.5885815185403178\n", + "epoch: 800, acc: 0.413, loss: 1.068, lr: 0.5558643690939411\n", + "epoch: 900, acc: 0.427, loss: 1.066, lr: 0.526592943654555\n", + "epoch: 1000, acc: 0.440, loss: 1.063, lr: 0.5002501250625312\n", + "epoch: 1100, acc: 0.437, loss: 1.059, lr: 0.4764173415912339\n", + "epoch: 1200, acc: 0.447, loss: 1.056, lr: 0.45475216007276037\n", + "epoch: 1300, acc: 0.440, loss: 1.052, lr: 0.43497172683775553\n", + "epoch: 1400, acc: 0.427, loss: 1.048, lr: 0.4168403501458941\n", + "epoch: 1500, acc: 0.417, loss: 1.041, lr: 0.4001600640256102\n", + "epoch: 1600, acc: 0.427, loss: 1.032, lr: 0.3847633705271258\n", + "epoch: 1700, acc: 0.440, loss: 1.025, lr: 0.3705075954057058\n", + "epoch: 1800, acc: 0.473, loss: 1.017, lr: 0.35727045373347627\n", + "epoch: 1900, acc: 0.460, loss: 1.008, lr: 0.3449465332873405\n", + "epoch: 2000, acc: 0.463, loss: 1.000, lr: 0.33344448149383127\n", + "epoch: 2100, acc: 0.493, loss: 1.006, lr: 0.32268473701193934\n", + "epoch: 2200, acc: 0.463, loss: 1.014, lr: 0.31259768677711786\n", + "epoch: 2300, acc: 0.480, loss: 1.015, lr: 0.3031221582297666\n", + "epoch: 2400, acc: 0.497, loss: 1.012, lr: 0.29420417769932333\n", + "epoch: 2500, acc: 0.493, loss: 1.010, lr: 0.2857959416976279\n", + "epoch: 2600, acc: 0.503, loss: 1.006, lr: 0.2778549597110308\n", + "epoch: 2700, acc: 0.487, loss: 1.003, lr: 0.2703433360367667\n", + "epoch: 2800, acc: 0.483, loss: 0.998, lr: 0.26322716504343247\n", + "epoch: 2900, acc: 0.483, loss: 0.996, lr: 0.25647601949217746\n", + "epoch: 3000, acc: 0.490, loss: 0.991, lr: 0.25006251562890724\n", + "epoch: 3100, acc: 0.490, loss: 0.988, lr: 0.2439619419370578\n", + "epoch: 3200, acc: 0.490, loss: 0.983, lr: 0.23815194093831865\n", + "epoch: 3300, acc: 0.490, loss: 0.980, lr: 0.23261223540358225\n", + "epoch: 3400, acc: 0.493, loss: 0.974, lr: 0.22732439190725165\n", + "epoch: 3500, acc: 0.510, loss: 0.969, lr: 0.22227161591464767\n", + "epoch: 3600, acc: 0.517, loss: 0.966, lr: 0.21743857360295715\n", + "epoch: 3700, acc: 0.520, loss: 0.961, lr: 0.21281123643328367\n", + "epoch: 3800, acc: 0.520, loss: 0.957, lr: 0.20837674515524068\n", + "epoch: 3900, acc: 0.520, loss: 0.951, lr: 0.20412329046744235\n", + "epoch: 4000, acc: 0.523, loss: 0.947, lr: 0.2000400080016003\n", + "epoch: 4100, acc: 0.537, loss: 0.942, lr: 0.19611688566385566\n", + "epoch: 4200, acc: 0.543, loss: 0.938, lr: 0.19234468166955185\n", + "epoch: 4300, acc: 0.543, loss: 0.934, lr: 0.18871485185884126\n", + "epoch: 4400, acc: 0.553, loss: 0.929, lr: 0.18521948508983144\n", + "epoch: 4500, acc: 0.553, loss: 0.924, lr: 0.18185124568103292\n", + "epoch: 4600, acc: 0.557, loss: 0.920, lr: 0.1786033220217896\n", + "epoch: 4700, acc: 0.567, loss: 0.916, lr: 0.1754693805930865\n", + "epoch: 4800, acc: 0.573, loss: 0.911, lr: 0.17244352474564578\n", + "epoch: 4900, acc: 0.577, loss: 0.907, lr: 0.16952025767079165\n", + "epoch: 5000, acc: 0.583, loss: 0.903, lr: 0.16669444907484582\n", + "epoch: 5100, acc: 0.580, loss: 0.898, lr: 0.16396130513198884\n", + "epoch: 5200, acc: 0.587, loss: 0.894, lr: 0.16131634134537828\n", + "epoch: 5300, acc: 0.590, loss: 0.890, lr: 0.15875535799333226\n", + "epoch: 5400, acc: 0.593, loss: 0.886, lr: 0.1562744178777934\n", + "epoch: 5500, acc: 0.600, loss: 0.882, lr: 0.15386982612709646\n", + "epoch: 5600, acc: 0.603, loss: 0.877, lr: 0.15153811183512653\n", + "epoch: 5700, acc: 0.617, loss: 0.873, lr: 0.14927601134497687\n", + "epoch: 5800, acc: 0.620, loss: 0.869, lr: 0.14708045300779526\n", + "epoch: 5900, acc: 0.627, loss: 0.865, lr: 0.14494854326714016\n", + "epoch: 6000, acc: 0.637, loss: 0.860, lr: 0.1428775539362766\n", + "epoch: 6100, acc: 0.640, loss: 0.857, lr: 0.1408649105507818\n", + "epoch: 6200, acc: 0.637, loss: 0.852, lr: 0.13890818169190167\n", + "epoch: 6300, acc: 0.640, loss: 0.848, lr: 0.13700506918755992\n", + "epoch: 6400, acc: 0.647, loss: 0.844, lr: 0.13515339910798757\n", + "epoch: 6500, acc: 0.653, loss: 0.840, lr: 0.13335111348179757\n", + "epoch: 6600, acc: 0.643, loss: 0.836, lr: 0.13159626266614027\n", + "epoch: 6700, acc: 0.647, loss: 0.833, lr: 0.12988699831146902\n", + "epoch: 6800, acc: 0.653, loss: 0.830, lr: 0.12822156686754713\n", + "epoch: 6900, acc: 0.657, loss: 0.827, lr: 0.126598303582732\n", + "epoch: 7000, acc: 0.657, loss: 0.823, lr: 0.12501562695336915\n", + "epoch: 7100, acc: 0.647, loss: 0.821, lr: 0.12347203358439313\n", + "epoch: 7200, acc: 0.647, loss: 0.818, lr: 0.12196609342602757\n", + "epoch: 7300, acc: 0.660, loss: 0.816, lr: 0.12049644535486204\n", + "epoch: 7400, acc: 0.663, loss: 0.813, lr: 0.11906179307060363\n", + "epoch: 7500, acc: 0.677, loss: 0.810, lr: 0.11766090128250381\n", + "epoch: 7600, acc: 0.670, loss: 0.806, lr: 0.11629259216187929\n", + "epoch: 7700, acc: 0.670, loss: 0.804, lr: 0.11495574203931487\n", + "epoch: 7800, acc: 0.670, loss: 0.803, lr: 0.11364927832708263\n", + "epoch: 7900, acc: 0.693, loss: 0.804, lr: 0.11237217664906168\n", + "epoch: 8000, acc: 0.687, loss: 0.803, lr: 0.11112345816201799\n", + "epoch: 8100, acc: 0.687, loss: 0.801, lr: 0.10990218705352237\n", + "epoch: 8200, acc: 0.680, loss: 0.799, lr: 0.10870746820306555\n", + "epoch: 8300, acc: 0.677, loss: 0.796, lr: 0.1075384449940854\n", + "epoch: 8400, acc: 0.670, loss: 0.793, lr: 0.10639429726566654\n", + "epoch: 8500, acc: 0.667, loss: 0.791, lr: 0.10527423939362038\n", + "epoch: 8600, acc: 0.667, loss: 0.789, lr: 0.10417751849150952\n", + "epoch: 8700, acc: 0.670, loss: 0.787, lr: 0.10310341272296113\n", + "epoch: 8800, acc: 0.667, loss: 0.785, lr: 0.1020512297173181\n", + "epoch: 8900, acc: 0.667, loss: 0.783, lr: 0.10102030508132134\n", + "epoch: 9000, acc: 0.670, loss: 0.781, lr: 0.1000100010001\n", + "epoch: 9100, acc: 0.670, loss: 0.778, lr: 0.09901970492127933\n", + "epoch: 9200, acc: 0.663, loss: 0.776, lr: 0.09804882831650162\n", + "epoch: 9300, acc: 0.663, loss: 0.774, lr: 0.09709680551509856\n", + "epoch: 9400, acc: 0.673, loss: 0.772, lr: 0.09616309260505818\n", + "epoch: 9500, acc: 0.680, loss: 0.769, lr: 0.09524716639679968\n", + "epoch: 9600, acc: 0.680, loss: 0.768, lr: 0.09434852344560807\n", + "epoch: 9700, acc: 0.680, loss: 0.766, lr: 0.09346667912889055\n", + "epoch: 9800, acc: 0.683, loss: 0.765, lr: 0.09260116677470137\n", + "epoch: 9900, acc: 0.680, loss: 0.763, lr: 0.09175153683824203\n", + "epoch: 10000, acc: 0.680, loss: 0.761, lr: 0.09091735612328393\n" ] } ], @@ -1325,7 +425,8 @@ " if not epoch % 100:\n", " print(f'epoch: {epoch}, ' +\n", " f'acc: {accuracy:.3f}, ' +\n", - " f'loss: {loss:.3f}')\n", + " f'loss: {loss:.3f}, ' +\n", + " f'lr: {optimizer.current_learning_rate}')\n", " \n", " # Backward pass\n", " loss_activation.backward(loss_activation.output, y)\n", @@ -1352,7 +453,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -1408,114 +509,114 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "epoch: 0, acc: 0.317, loss: 1.099\n", - "epoch: 100, acc: 0.437, loss: 1.033\n", - "epoch: 200, acc: 0.483, loss: 0.913\n", - "epoch: 300, acc: 0.750, loss: 0.622\n", - "epoch: 400, acc: 0.810, loss: 0.471\n", - "epoch: 500, acc: 0.850, loss: 0.403\n", - "epoch: 600, acc: 0.873, loss: 0.380\n", - "epoch: 700, acc: 0.843, loss: 0.378\n", - "epoch: 800, acc: 0.897, loss: 0.289\n", - "epoch: 900, acc: 0.910, loss: 0.264\n", - "epoch: 1000, acc: 0.883, loss: 0.293\n", - "epoch: 1100, acc: 0.923, loss: 0.236\n", - "epoch: 1200, acc: 0.927, loss: 0.227\n", - "epoch: 1300, acc: 0.903, loss: 0.238\n", - "epoch: 1400, acc: 0.927, loss: 0.215\n", - "epoch: 1500, acc: 0.927, loss: 0.204\n", - "epoch: 1600, acc: 0.930, loss: 0.198\n", - "epoch: 1700, acc: 0.927, loss: 0.195\n", - "epoch: 1800, acc: 0.927, loss: 0.191\n", - "epoch: 1900, acc: 0.930, loss: 0.190\n", - "epoch: 2000, acc: 0.930, loss: 0.185\n", - "epoch: 2100, acc: 0.930, loss: 0.183\n", - "epoch: 2200, acc: 0.930, loss: 0.181\n", - "epoch: 2300, acc: 0.930, loss: 0.179\n", - "epoch: 2400, acc: 0.933, loss: 0.177\n", - "epoch: 2500, acc: 0.930, loss: 0.175\n", - "epoch: 2600, acc: 0.930, loss: 0.174\n", - "epoch: 2700, acc: 0.930, loss: 0.172\n", - "epoch: 2800, acc: 0.930, loss: 0.171\n", - "epoch: 2900, acc: 0.930, loss: 0.170\n", - "epoch: 3000, acc: 0.933, loss: 0.168\n", - "epoch: 3100, acc: 0.930, loss: 0.167\n", - "epoch: 3200, acc: 0.933, loss: 0.167\n", - "epoch: 3300, acc: 0.937, loss: 0.166\n", - "epoch: 3400, acc: 0.937, loss: 0.165\n", - "epoch: 3500, acc: 0.940, loss: 0.164\n", - "epoch: 3600, acc: 0.940, loss: 0.163\n", - "epoch: 3700, acc: 0.940, loss: 0.162\n", - "epoch: 3800, acc: 0.943, loss: 0.161\n", - "epoch: 3900, acc: 0.940, loss: 0.161\n", - "epoch: 4000, acc: 0.940, loss: 0.160\n", - "epoch: 4100, acc: 0.940, loss: 0.159\n", - "epoch: 4200, acc: 0.940, loss: 0.159\n", - "epoch: 4300, acc: 0.940, loss: 0.158\n", - "epoch: 4400, acc: 0.940, loss: 0.158\n", - "epoch: 4500, acc: 0.940, loss: 0.157\n", - "epoch: 4600, acc: 0.940, loss: 0.157\n", - "epoch: 4700, acc: 0.940, loss: 0.156\n", - "epoch: 4800, acc: 0.943, loss: 0.156\n", - "epoch: 4900, acc: 0.940, loss: 0.155\n", - "epoch: 5000, acc: 0.940, loss: 0.155\n", - "epoch: 5100, acc: 0.940, loss: 0.154\n", - "epoch: 5200, acc: 0.940, loss: 0.154\n", - "epoch: 5300, acc: 0.940, loss: 0.154\n", - "epoch: 5400, acc: 0.940, loss: 0.153\n", - "epoch: 5500, acc: 0.940, loss: 0.153\n", - "epoch: 5600, acc: 0.940, loss: 0.153\n", - "epoch: 5700, acc: 0.940, loss: 0.152\n", - "epoch: 5800, acc: 0.940, loss: 0.152\n", - "epoch: 5900, acc: 0.940, loss: 0.152\n", - "epoch: 6000, acc: 0.940, loss: 0.151\n", - "epoch: 6100, acc: 0.940, loss: 0.151\n", - "epoch: 6200, acc: 0.940, loss: 0.151\n", - "epoch: 6300, acc: 0.940, loss: 0.151\n", - "epoch: 6400, acc: 0.940, loss: 0.150\n", - "epoch: 6500, acc: 0.940, loss: 0.150\n", - "epoch: 6600, acc: 0.940, loss: 0.150\n", - "epoch: 6700, acc: 0.940, loss: 0.150\n", - "epoch: 6800, acc: 0.940, loss: 0.150\n", - "epoch: 6900, acc: 0.940, loss: 0.149\n", - "epoch: 7000, acc: 0.940, loss: 0.149\n", - "epoch: 7100, acc: 0.940, loss: 0.149\n", - "epoch: 7200, acc: 0.940, loss: 0.149\n", - "epoch: 7300, acc: 0.940, loss: 0.149\n", - "epoch: 7400, acc: 0.940, loss: 0.148\n", - "epoch: 7500, acc: 0.940, loss: 0.148\n", - "epoch: 7600, acc: 0.943, loss: 0.148\n", - "epoch: 7700, acc: 0.943, loss: 0.148\n", - "epoch: 7800, acc: 0.940, loss: 0.147\n", - "epoch: 7900, acc: 0.943, loss: 0.147\n", - "epoch: 8000, acc: 0.943, loss: 0.147\n", - "epoch: 8100, acc: 0.943, loss: 0.147\n", - "epoch: 8200, acc: 0.940, loss: 0.147\n", - "epoch: 8300, acc: 0.943, loss: 0.147\n", - "epoch: 8400, acc: 0.943, loss: 0.146\n", - "epoch: 8500, acc: 0.943, loss: 0.146\n", - "epoch: 8600, acc: 0.943, loss: 0.146\n", - "epoch: 8700, acc: 0.943, loss: 0.146\n", - "epoch: 8800, acc: 0.943, loss: 0.146\n", - "epoch: 8900, acc: 0.943, loss: 0.146\n", - "epoch: 9000, acc: 0.943, loss: 0.146\n", - "epoch: 9100, acc: 0.943, loss: 0.145\n", - "epoch: 9200, acc: 0.943, loss: 0.145\n", - "epoch: 9300, acc: 0.943, loss: 0.145\n", - "epoch: 9400, acc: 0.943, loss: 0.145\n", - "epoch: 9500, acc: 0.943, loss: 0.145\n", - "epoch: 9600, acc: 0.943, loss: 0.145\n", - "epoch: 9700, acc: 0.943, loss: 0.145\n", - "epoch: 9800, acc: 0.943, loss: 0.145\n", - "epoch: 9900, acc: 0.943, loss: 0.145\n", - "epoch: 10000, acc: 0.943, loss: 0.144\n" + "epoch: 0, acc: 0.343, loss: 1.099, lr: 1.0\n", + "epoch: 100, acc: 0.443, loss: 1.047, lr: 0.9099181073703367\n", + "epoch: 200, acc: 0.397, loss: 1.013, lr: 0.8340283569641367\n", + "epoch: 300, acc: 0.540, loss: 0.905, lr: 0.7698229407236336\n", + "epoch: 400, acc: 0.553, loss: 0.844, lr: 0.7147962830593281\n", + "epoch: 500, acc: 0.560, loss: 0.813, lr: 0.66711140760507\n", + "epoch: 600, acc: 0.567, loss: 0.792, lr: 0.6253908692933083\n", + "epoch: 700, acc: 0.570, loss: 0.776, lr: 0.5885815185403178\n", + "epoch: 800, acc: 0.570, loss: 0.768, lr: 0.5558643690939411\n", + "epoch: 900, acc: 0.573, loss: 0.762, lr: 0.526592943654555\n", + "epoch: 1000, acc: 0.573, loss: 0.757, lr: 0.5002501250625312\n", + "epoch: 1100, acc: 0.573, loss: 0.753, lr: 0.4764173415912339\n", + "epoch: 1200, acc: 0.573, loss: 0.751, lr: 0.45475216007276037\n", + "epoch: 1300, acc: 0.577, loss: 0.749, lr: 0.43497172683775553\n", + "epoch: 1400, acc: 0.573, loss: 0.747, lr: 0.4168403501458941\n", + "epoch: 1500, acc: 0.577, loss: 0.745, lr: 0.4001600640256102\n", + "epoch: 1600, acc: 0.577, loss: 0.744, lr: 0.3847633705271258\n", + "epoch: 1700, acc: 0.580, loss: 0.743, lr: 0.3705075954057058\n", + "epoch: 1800, acc: 0.583, loss: 0.742, lr: 0.35727045373347627\n", + "epoch: 1900, acc: 0.583, loss: 0.741, lr: 0.3449465332873405\n", + "epoch: 2000, acc: 0.580, loss: 0.740, lr: 0.33344448149383127\n", + "epoch: 2100, acc: 0.583, loss: 0.740, lr: 0.32268473701193934\n", + "epoch: 2200, acc: 0.583, loss: 0.739, lr: 0.31259768677711786\n", + "epoch: 2300, acc: 0.583, loss: 0.738, lr: 0.3031221582297666\n", + "epoch: 2400, acc: 0.583, loss: 0.738, lr: 0.29420417769932333\n", + "epoch: 2500, acc: 0.583, loss: 0.737, lr: 0.2857959416976279\n", + "epoch: 2600, acc: 0.583, loss: 0.737, lr: 0.2778549597110308\n", + "epoch: 2700, acc: 0.583, loss: 0.737, lr: 0.2703433360367667\n", + "epoch: 2800, acc: 0.583, loss: 0.736, lr: 0.26322716504343247\n", + "epoch: 2900, acc: 0.583, loss: 0.736, lr: 0.25647601949217746\n", + "epoch: 3000, acc: 0.583, loss: 0.736, lr: 0.25006251562890724\n", + "epoch: 3100, acc: 0.583, loss: 0.735, lr: 0.2439619419370578\n", + "epoch: 3200, acc: 0.583, loss: 0.735, lr: 0.23815194093831865\n", + "epoch: 3300, acc: 0.583, loss: 0.735, lr: 0.23261223540358225\n", + "epoch: 3400, acc: 0.583, loss: 0.735, lr: 0.22732439190725165\n", + "epoch: 3500, acc: 0.583, loss: 0.734, lr: 0.22227161591464767\n", + "epoch: 3600, acc: 0.583, loss: 0.734, lr: 0.21743857360295715\n", + "epoch: 3700, acc: 0.580, loss: 0.734, lr: 0.21281123643328367\n", + "epoch: 3800, acc: 0.583, loss: 0.734, lr: 0.20837674515524068\n", + "epoch: 3900, acc: 0.583, loss: 0.734, lr: 0.20412329046744235\n", + "epoch: 4000, acc: 0.583, loss: 0.734, lr: 0.2000400080016003\n", + "epoch: 4100, acc: 0.583, loss: 0.733, lr: 0.19611688566385566\n", + "epoch: 4200, acc: 0.583, loss: 0.733, lr: 0.19234468166955185\n", + "epoch: 4300, acc: 0.580, loss: 0.733, lr: 0.18871485185884126\n", + "epoch: 4400, acc: 0.580, loss: 0.733, lr: 0.18521948508983144\n", + "epoch: 4500, acc: 0.583, loss: 0.733, lr: 0.18185124568103292\n", + "epoch: 4600, acc: 0.583, loss: 0.733, lr: 0.1786033220217896\n", + "epoch: 4700, acc: 0.583, loss: 0.733, lr: 0.1754693805930865\n", + "epoch: 4800, acc: 0.583, loss: 0.732, lr: 0.17244352474564578\n", + "epoch: 4900, acc: 0.580, loss: 0.732, lr: 0.16952025767079165\n", + "epoch: 5000, acc: 0.580, loss: 0.732, lr: 0.16669444907484582\n", + "epoch: 5100, acc: 0.580, loss: 0.732, lr: 0.16396130513198884\n", + "epoch: 5200, acc: 0.583, loss: 0.732, lr: 0.16131634134537828\n", + "epoch: 5300, acc: 0.583, loss: 0.731, lr: 0.15875535799333226\n", + "epoch: 5400, acc: 0.583, loss: 0.731, lr: 0.1562744178777934\n", + "epoch: 5500, acc: 0.583, loss: 0.731, lr: 0.15386982612709646\n", + "epoch: 5600, acc: 0.583, loss: 0.731, lr: 0.15153811183512653\n", + "epoch: 5700, acc: 0.583, loss: 0.731, lr: 0.14927601134497687\n", + "epoch: 5800, acc: 0.583, loss: 0.731, lr: 0.14708045300779526\n", + "epoch: 5900, acc: 0.580, loss: 0.730, lr: 0.14494854326714016\n", + "epoch: 6000, acc: 0.580, loss: 0.730, lr: 0.1428775539362766\n", + "epoch: 6100, acc: 0.583, loss: 0.730, lr: 0.1408649105507818\n", + "epoch: 6200, acc: 0.580, loss: 0.730, lr: 0.13890818169190167\n", + "epoch: 6300, acc: 0.583, loss: 0.730, lr: 0.13700506918755992\n", + "epoch: 6400, acc: 0.580, loss: 0.730, lr: 0.13515339910798757\n", + "epoch: 6500, acc: 0.583, loss: 0.729, lr: 0.13335111348179757\n", + "epoch: 6600, acc: 0.580, loss: 0.729, lr: 0.13159626266614027\n", + "epoch: 6700, acc: 0.583, loss: 0.729, lr: 0.12988699831146902\n", + "epoch: 6800, acc: 0.583, loss: 0.729, lr: 0.12822156686754713\n", + "epoch: 6900, acc: 0.580, loss: 0.729, lr: 0.126598303582732\n", + "epoch: 7000, acc: 0.580, loss: 0.729, lr: 0.12501562695336915\n", + "epoch: 7100, acc: 0.583, loss: 0.728, lr: 0.12347203358439313\n", + "epoch: 7200, acc: 0.580, loss: 0.728, lr: 0.12196609342602757\n", + "epoch: 7300, acc: 0.580, loss: 0.728, lr: 0.12049644535486204\n", + "epoch: 7400, acc: 0.583, loss: 0.728, lr: 0.11906179307060363\n", + "epoch: 7500, acc: 0.580, loss: 0.728, lr: 0.11766090128250381\n", + "epoch: 7600, acc: 0.580, loss: 0.728, lr: 0.11629259216187929\n", + "epoch: 7700, acc: 0.580, loss: 0.727, lr: 0.11495574203931487\n", + "epoch: 7800, acc: 0.580, loss: 0.727, lr: 0.11364927832708263\n", + "epoch: 7900, acc: 0.583, loss: 0.727, lr: 0.11237217664906168\n", + "epoch: 8000, acc: 0.580, loss: 0.727, lr: 0.11112345816201799\n", + "epoch: 8100, acc: 0.580, loss: 0.727, lr: 0.10990218705352237\n", + "epoch: 8200, acc: 0.580, loss: 0.727, lr: 0.10870746820306555\n", + "epoch: 8300, acc: 0.580, loss: 0.727, lr: 0.1075384449940854\n", + "epoch: 8400, acc: 0.580, loss: 0.726, lr: 0.10639429726566654\n", + "epoch: 8500, acc: 0.583, loss: 0.726, lr: 0.10527423939362038\n", + "epoch: 8600, acc: 0.580, loss: 0.726, lr: 0.10417751849150952\n", + "epoch: 8700, acc: 0.580, loss: 0.726, lr: 0.10310341272296113\n", + "epoch: 8800, acc: 0.580, loss: 0.726, lr: 0.1020512297173181\n", + "epoch: 8900, acc: 0.580, loss: 0.726, lr: 0.10102030508132134\n", + "epoch: 9000, acc: 0.580, loss: 0.726, lr: 0.1000100010001\n", + "epoch: 9100, acc: 0.580, loss: 0.726, lr: 0.09901970492127933\n", + "epoch: 9200, acc: 0.580, loss: 0.725, lr: 0.09804882831650162\n", + "epoch: 9300, acc: 0.580, loss: 0.725, lr: 0.09709680551509856\n", + "epoch: 9400, acc: 0.580, loss: 0.725, lr: 0.09616309260505818\n", + "epoch: 9500, acc: 0.580, loss: 0.725, lr: 0.09524716639679968\n", + "epoch: 9600, acc: 0.583, loss: 0.725, lr: 0.09434852344560807\n", + "epoch: 9700, acc: 0.583, loss: 0.725, lr: 0.09346667912889055\n", + "epoch: 9800, acc: 0.587, loss: 0.725, lr: 0.09260116677470137\n", + "epoch: 9900, acc: 0.587, loss: 0.725, lr: 0.09175153683824203\n", + "epoch: 10000, acc: 0.587, loss: 0.724, lr: 0.09091735612328393\n" ] } ], @@ -1566,7 +667,8 @@ " if not epoch % 100:\n", " print(f'epoch: {epoch}, ' +\n", " f'acc: {accuracy:.3f}, ' +\n", - " f'loss: {loss:.3f}')\n", + " f'loss: {loss:.3f}, ' +\n", + " f'lr: {optimizer.current_learning_rate}')\n", " \n", " # Backward pass\n", " loss_activation.backward(loss_activation.output, y)\n", diff --git a/lecture23_24/notes_23.pdf b/lecture23_24/notes_23.pdf index 49ef51c..d0846b4 100644 Binary files a/lecture23_24/notes_23.pdf and b/lecture23_24/notes_23.pdf differ diff --git a/lecture23_24/notes_23.py b/lecture23_24/notes_23.py index 6c39e3e..e37ecb1 100644 --- a/lecture23_24/notes_23.py +++ b/lecture23_24/notes_23.py @@ -221,7 +221,7 @@ class Optimizer_SGD(): self.iterations += 1 # %% [markdown] -# # Testing the Learning Rate Decay +# ## Testing the Learning Rate Decay # %% # Create dataset @@ -270,7 +270,8 @@ for epoch in range(10001): if not epoch % 100: print(f'epoch: {epoch}, ' + f'acc: {accuracy:.3f}, ' + - f'loss: {loss:.3f}') + f'loss: {loss:.3f}, ' + + f'lr: {optimizer.current_learning_rate}') # Backward pass loss_activation.backward(loss_activation.output, y) @@ -335,7 +336,7 @@ class Optimizer_SGD(): self.iterations += 1 # %% [markdown] -# # Testing the Gradient Optimizer with Momentum +# ## Testing the Gradient Optimizer with Momentum # %% # Create dataset @@ -384,7 +385,8 @@ for epoch in range(10001): if not epoch % 100: print(f'epoch: {epoch}, ' + f'acc: {accuracy:.3f}, ' + - f'loss: {loss:.3f}') + f'loss: {loss:.3f}, ' + + f'lr: {optimizer.current_learning_rate}') # Backward pass loss_activation.backward(loss_activation.output, y) diff --git a/lecture25_27/notes_25.ipynb b/lecture25_27/notes_25.ipynb index cefffce..d73463f 100644 --- a/lecture25_27/notes_25.ipynb +++ b/lecture25_27/notes_25.ipynb @@ -254,12 +254,14 @@ "metadata": {}, "source": [ "# AdaGrad Optimizer\n", - "Different weights should have different learning rates. If one weight affects the loss much more strongly than the other, then consider using smaller learning rates with it. We can do this by maintaining a \"cache\" of the last gradients and normalizing based on this." + "Different weights should have different learning rates. If one weight affects the loss much more strongly than the other, then consider using smaller learning rates with it. We can do this by maintaining a \"cache\" of the last gradients and normalizing based on this.\n", + "\n", + "A downside is that as the cache keeps accumulating, some neurons will have such a small learning rate that the neuron basically becomes fixed." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -269,7 +271,7 @@ " self.current_learning_rate = self.initial_learning_rate\n", " self.decay = decay\n", " self.iterations = 0\n", - " self.epsilon = 0\n", + " self.epsilon = epsilon\n", "\n", " def pre_update_params(self):\n", " if self.decay:\n", @@ -299,114 +301,114 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "epoch: 0, acc: 0.360, loss: 1.099\n", - "epoch: 100, acc: 0.477, loss: 0.992\n", - "epoch: 200, acc: 0.560, loss: 0.934\n", - "epoch: 300, acc: 0.600, loss: 0.881\n", - "epoch: 400, acc: 0.637, loss: 0.826\n", - "epoch: 500, acc: 0.617, loss: 0.793\n", - "epoch: 600, acc: 0.643, loss: 0.748\n", - "epoch: 700, acc: 0.657, loss: 0.726\n", - "epoch: 800, acc: 0.663, loss: 0.698\n", - "epoch: 900, acc: 0.680, loss: 0.681\n", - "epoch: 1000, acc: 0.683, loss: 0.664\n", - "epoch: 1100, acc: 0.693, loss: 0.653\n", - "epoch: 1200, acc: 0.693, loss: 0.642\n", - "epoch: 1300, acc: 0.707, loss: 0.632\n", - "epoch: 1400, acc: 0.720, loss: 0.625\n", - "epoch: 1500, acc: 0.717, loss: 0.615\n", - "epoch: 1600, acc: 0.723, loss: 0.608\n", - "epoch: 1700, acc: 0.730, loss: 0.600\n", - "epoch: 1800, acc: 0.740, loss: 0.588\n", - "epoch: 1900, acc: 0.740, loss: 0.581\n", - "epoch: 2000, acc: 0.743, loss: 0.576\n", - "epoch: 2100, acc: 0.740, loss: 0.570\n", - "epoch: 2200, acc: 0.750, loss: 0.563\n", - "epoch: 2300, acc: 0.747, loss: 0.562\n", - "epoch: 2400, acc: 0.757, loss: 0.557\n", - "epoch: 2500, acc: 0.760, loss: 0.554\n", - "epoch: 2600, acc: 0.770, loss: 0.550\n", - "epoch: 2700, acc: 0.777, loss: 0.546\n", - "epoch: 2800, acc: 0.777, loss: 0.543\n", - "epoch: 2900, acc: 0.780, loss: 0.540\n", - "epoch: 3000, acc: 0.777, loss: 0.537\n", - "epoch: 3100, acc: 0.773, loss: 0.533\n", - "epoch: 3200, acc: 0.783, loss: 0.531\n", - "epoch: 3300, acc: 0.777, loss: 0.528\n", - "epoch: 3400, acc: 0.777, loss: 0.526\n", - "epoch: 3500, acc: 0.773, loss: 0.523\n", - "epoch: 3600, acc: 0.780, loss: 0.522\n", - "epoch: 3700, acc: 0.773, loss: 0.520\n", - "epoch: 3800, acc: 0.777, loss: 0.518\n", - "epoch: 3900, acc: 0.780, loss: 0.516\n", - "epoch: 4000, acc: 0.780, loss: 0.515\n", - "epoch: 4100, acc: 0.773, loss: 0.513\n", - "epoch: 4200, acc: 0.770, loss: 0.511\n", - "epoch: 4300, acc: 0.773, loss: 0.510\n", - "epoch: 4400, acc: 0.777, loss: 0.509\n", - "epoch: 4500, acc: 0.777, loss: 0.508\n", - "epoch: 4600, acc: 0.780, loss: 0.507\n", - "epoch: 4700, acc: 0.777, loss: 0.506\n", - "epoch: 4800, acc: 0.777, loss: 0.504\n", - "epoch: 4900, acc: 0.783, loss: 0.503\n", - "epoch: 5000, acc: 0.783, loss: 0.503\n", - "epoch: 5100, acc: 0.787, loss: 0.502\n", - "epoch: 5200, acc: 0.790, loss: 0.501\n", - "epoch: 5300, acc: 0.787, loss: 0.500\n", - "epoch: 5400, acc: 0.787, loss: 0.498\n", - "epoch: 5500, acc: 0.783, loss: 0.497\n", - "epoch: 5600, acc: 0.787, loss: 0.496\n", - "epoch: 5700, acc: 0.780, loss: 0.495\n", - "epoch: 5800, acc: 0.780, loss: 0.495\n", - "epoch: 5900, acc: 0.783, loss: 0.494\n", - "epoch: 6000, acc: 0.790, loss: 0.494\n", - "epoch: 6100, acc: 0.777, loss: 0.493\n", - "epoch: 6200, acc: 0.783, loss: 0.492\n", - "epoch: 6300, acc: 0.783, loss: 0.491\n", - "epoch: 6400, acc: 0.790, loss: 0.490\n", - "epoch: 6500, acc: 0.780, loss: 0.488\n", - "epoch: 6600, acc: 0.780, loss: 0.487\n", - "epoch: 6700, acc: 0.777, loss: 0.485\n", - "epoch: 6800, acc: 0.780, loss: 0.483\n", - "epoch: 6900, acc: 0.783, loss: 0.482\n", - "epoch: 7000, acc: 0.790, loss: 0.480\n", - "epoch: 7100, acc: 0.790, loss: 0.479\n", - "epoch: 7200, acc: 0.797, loss: 0.477\n", - "epoch: 7300, acc: 0.803, loss: 0.476\n", - "epoch: 7400, acc: 0.813, loss: 0.475\n", - "epoch: 7500, acc: 0.813, loss: 0.474\n", - "epoch: 7600, acc: 0.813, loss: 0.472\n", - "epoch: 7700, acc: 0.813, loss: 0.471\n", - "epoch: 7800, acc: 0.810, loss: 0.470\n", - "epoch: 7900, acc: 0.810, loss: 0.469\n", - "epoch: 8000, acc: 0.810, loss: 0.468\n", - "epoch: 8100, acc: 0.810, loss: 0.465\n", - "epoch: 8200, acc: 0.807, loss: 0.463\n", - "epoch: 8300, acc: 0.803, loss: 0.462\n", - "epoch: 8400, acc: 0.803, loss: 0.461\n", - "epoch: 8500, acc: 0.807, loss: 0.459\n", - "epoch: 8600, acc: 0.810, loss: 0.458\n", - "epoch: 8700, acc: 0.813, loss: 0.458\n", - "epoch: 8800, acc: 0.810, loss: 0.456\n", - "epoch: 8900, acc: 0.810, loss: 0.455\n", - "epoch: 9000, acc: 0.813, loss: 0.452\n", - "epoch: 9100, acc: 0.813, loss: 0.450\n", - "epoch: 9200, acc: 0.817, loss: 0.448\n", - "epoch: 9300, acc: 0.810, loss: 0.447\n", - "epoch: 9400, acc: 0.810, loss: 0.446\n", - "epoch: 9500, acc: 0.813, loss: 0.444\n", - "epoch: 9600, acc: 0.813, loss: 0.441\n", - "epoch: 9700, acc: 0.817, loss: 0.440\n", - "epoch: 9800, acc: 0.817, loss: 0.438\n", - "epoch: 9900, acc: 0.813, loss: 0.436\n", - "epoch: 10000, acc: 0.813, loss: 0.435\n" + "epoch: 0, acc: 0.353, loss: 1.099, lr: 1.0\n", + "epoch: 100, acc: 0.497, loss: 0.986, lr: 0.9901970492127933\n", + "epoch: 200, acc: 0.527, loss: 0.936, lr: 0.9804882831650161\n", + "epoch: 300, acc: 0.513, loss: 0.918, lr: 0.9709680551509855\n", + "epoch: 400, acc: 0.580, loss: 0.904, lr: 0.9616309260505818\n", + "epoch: 500, acc: 0.550, loss: 0.910, lr: 0.9524716639679969\n", + "epoch: 600, acc: 0.563, loss: 0.860, lr: 0.9434852344560807\n", + "epoch: 700, acc: 0.600, loss: 0.838, lr: 0.9346667912889054\n", + "epoch: 800, acc: 0.603, loss: 0.815, lr: 0.9260116677470135\n", + "epoch: 900, acc: 0.643, loss: 0.787, lr: 0.9175153683824203\n", + "epoch: 1000, acc: 0.617, loss: 0.810, lr: 0.9091735612328392\n", + "epoch: 1100, acc: 0.637, loss: 0.750, lr: 0.9009820704567978\n", + "epoch: 1200, acc: 0.677, loss: 0.744, lr: 0.892936869363336\n", + "epoch: 1300, acc: 0.670, loss: 0.722, lr: 0.8850340738118416\n", + "epoch: 1400, acc: 0.693, loss: 0.704, lr: 0.8772699359592947\n", + "epoch: 1500, acc: 0.680, loss: 0.708, lr: 0.8696408383337683\n", + "epoch: 1600, acc: 0.697, loss: 0.664, lr: 0.8621432882145013\n", + "epoch: 1700, acc: 0.650, loss: 0.699, lr: 0.8547739123001966\n", + "epoch: 1800, acc: 0.707, loss: 0.646, lr: 0.8475294516484448\n", + "epoch: 1900, acc: 0.693, loss: 0.633, lr: 0.8404067568703253\n", + "epoch: 2000, acc: 0.713, loss: 0.620, lr: 0.8334027835652972\n", + "epoch: 2100, acc: 0.710, loss: 0.610, lr: 0.8265145879824779\n", + "epoch: 2200, acc: 0.710, loss: 0.599, lr: 0.8197393228953193\n", + "epoch: 2300, acc: 0.713, loss: 0.588, lr: 0.8130742336775347\n", + "epoch: 2400, acc: 0.733, loss: 0.576, lr: 0.8065166545689169\n", + "epoch: 2500, acc: 0.763, loss: 0.579, lr: 0.8000640051204096\n", + "epoch: 2600, acc: 0.800, loss: 0.558, lr: 0.7937137868084768\n", + "epoch: 2700, acc: 0.803, loss: 0.551, lr: 0.7874635798094338\n", + "epoch: 2800, acc: 0.800, loss: 0.545, lr: 0.7813110399249941\n", + "epoch: 2900, acc: 0.807, loss: 0.541, lr: 0.7752538956508256\n", + "epoch: 3000, acc: 0.757, loss: 0.538, lr: 0.7692899453804138\n", + "epoch: 3100, acc: 0.757, loss: 0.532, lr: 0.7634170547370028\n", + "epoch: 3200, acc: 0.740, loss: 0.532, lr: 0.7576331540268202\n", + "epoch: 3300, acc: 0.763, loss: 0.514, lr: 0.7519362358072035\n", + "epoch: 3400, acc: 0.770, loss: 0.512, lr: 0.7463243525636241\n", + "epoch: 3500, acc: 0.757, loss: 0.507, lr: 0.7407956144899621\n", + "epoch: 3600, acc: 0.780, loss: 0.496, lr: 0.735348187366718\n", + "epoch: 3700, acc: 0.787, loss: 0.497, lr: 0.7299802905321557\n", + "epoch: 3800, acc: 0.767, loss: 0.491, lr: 0.7246901949416624\n", + "epoch: 3900, acc: 0.783, loss: 0.483, lr: 0.7194762213108857\n", + "epoch: 4000, acc: 0.790, loss: 0.484, lr: 0.7143367383384527\n", + "epoch: 4100, acc: 0.783, loss: 0.477, lr: 0.7092701610043266\n", + "epoch: 4200, acc: 0.780, loss: 0.487, lr: 0.7042749489400663\n", + "epoch: 4300, acc: 0.787, loss: 0.467, lr: 0.6993496048674733\n", + "epoch: 4400, acc: 0.793, loss: 0.465, lr: 0.6944926731022988\n", + "epoch: 4500, acc: 0.787, loss: 0.460, lr: 0.6897027381198704\n", + "epoch: 4600, acc: 0.777, loss: 0.477, lr: 0.6849784231796698\n", + "epoch: 4700, acc: 0.783, loss: 0.454, lr: 0.6803183890060548\n", + "epoch: 4800, acc: 0.800, loss: 0.448, lr: 0.6757213325224677\n", + "epoch: 4900, acc: 0.800, loss: 0.441, lr: 0.6711859856366199\n", + "epoch: 5000, acc: 0.797, loss: 0.437, lr: 0.6667111140742716\n", + "epoch: 5100, acc: 0.800, loss: 0.433, lr: 0.6622955162593549\n", + "epoch: 5200, acc: 0.813, loss: 0.429, lr: 0.6579380222383051\n", + "epoch: 5300, acc: 0.810, loss: 0.426, lr: 0.6536374926465782\n", + "epoch: 5400, acc: 0.813, loss: 0.423, lr: 0.649392817715436\n", + "epoch: 5500, acc: 0.823, loss: 0.420, lr: 0.6452029163171817\n", + "epoch: 5600, acc: 0.820, loss: 0.416, lr: 0.6410667350471184\n", + "epoch: 5700, acc: 0.820, loss: 0.413, lr: 0.6369832473405949\n", + "epoch: 5800, acc: 0.820, loss: 0.410, lr: 0.6329514526235838\n", + "epoch: 5900, acc: 0.820, loss: 0.407, lr: 0.6289703754953141\n", + "epoch: 6000, acc: 0.823, loss: 0.404, lr: 0.6250390649415589\n", + "epoch: 6100, acc: 0.823, loss: 0.401, lr: 0.6211565935772407\n", + "epoch: 6200, acc: 0.827, loss: 0.398, lr: 0.6173220569170937\n", + "epoch: 6300, acc: 0.833, loss: 0.395, lr: 0.6135345726731701\n", + "epoch: 6400, acc: 0.830, loss: 0.390, lr: 0.6097932800780536\n", + "epoch: 6500, acc: 0.827, loss: 0.387, lr: 0.6060973392326807\n", + "epoch: 6600, acc: 0.827, loss: 0.384, lr: 0.6024459304777396\n", + "epoch: 6700, acc: 0.830, loss: 0.381, lr: 0.5988382537876519\n", + "epoch: 6800, acc: 0.830, loss: 0.378, lr: 0.5952735281862016\n", + "epoch: 6900, acc: 0.830, loss: 0.375, lr: 0.5917509911829102\n", + "epoch: 7000, acc: 0.833, loss: 0.373, lr: 0.5882698982293076\n", + "epoch: 7100, acc: 0.837, loss: 0.370, lr: 0.5848295221942803\n", + "epoch: 7200, acc: 0.833, loss: 0.368, lr: 0.5814291528577243\n", + "epoch: 7300, acc: 0.833, loss: 0.366, lr: 0.5780680964217585\n", + "epoch: 7400, acc: 0.837, loss: 0.364, lr: 0.5747456750387954\n", + "epoch: 7500, acc: 0.833, loss: 0.362, lr: 0.5714612263557918\n", + "epoch: 7600, acc: 0.833, loss: 0.360, lr: 0.5682141030740383\n", + "epoch: 7700, acc: 0.837, loss: 0.358, lr: 0.5650036725238714\n", + "epoch: 7800, acc: 0.840, loss: 0.357, lr: 0.5618293162537221\n", + "epoch: 7900, acc: 0.840, loss: 0.355, lr: 0.5586904296329404\n", + "epoch: 8000, acc: 0.840, loss: 0.353, lr: 0.5555864214678593\n", + "epoch: 8100, acc: 0.843, loss: 0.351, lr: 0.5525167136305873\n", + "epoch: 8200, acc: 0.843, loss: 0.350, lr: 0.5494807407000385\n", + "epoch: 8300, acc: 0.843, loss: 0.348, lr: 0.5464779496147331\n", + "epoch: 8400, acc: 0.843, loss: 0.346, lr: 0.5435077993369205\n", + "epoch: 8500, acc: 0.847, loss: 0.345, lr: 0.5405697605275961\n", + "epoch: 8600, acc: 0.847, loss: 0.343, lr: 0.5376633152320017\n", + "epoch: 8700, acc: 0.850, loss: 0.342, lr: 0.5347879565752179\n", + "epoch: 8800, acc: 0.847, loss: 0.340, lr: 0.5319431884674717\n", + "epoch: 8900, acc: 0.847, loss: 0.338, lr: 0.5291285253188\n", + "epoch: 9000, acc: 0.847, loss: 0.337, lr: 0.5263434917627243\n", + "epoch: 9100, acc: 0.850, loss: 0.336, lr: 0.5235876223886068\n", + "epoch: 9200, acc: 0.850, loss: 0.335, lr: 0.5208604614823689\n", + "epoch: 9300, acc: 0.847, loss: 0.334, lr: 0.5181615627752734\n", + "epoch: 9400, acc: 0.847, loss: 0.333, lr: 0.5154904892004742\n", + "epoch: 9500, acc: 0.850, loss: 0.331, lr: 0.5128468126570593\n", + "epoch: 9600, acc: 0.850, loss: 0.330, lr: 0.5102301137813153\n", + "epoch: 9700, acc: 0.850, loss: 0.329, lr: 0.5076399817249606\n", + "epoch: 9800, acc: 0.850, loss: 0.328, lr: 0.5050760139400979\n", + "epoch: 9900, acc: 0.850, loss: 0.326, lr: 0.5025378159706518\n", + "epoch: 10000, acc: 0.850, loss: 0.325, lr: 0.5000250012500626\n" ] } ], @@ -457,7 +459,239 @@ " if not epoch % 100:\n", " print(f'epoch: {epoch}, ' +\n", " f'acc: {accuracy:.3f}, ' +\n", - " f'loss: {loss:.3f}')\n", + " f'loss: {loss:.3f}, ' +\n", + " f'lr: {optimizer.current_learning_rate}')\n", + " \n", + " # Backward pass\n", + " loss_activation.backward(loss_activation.output, y)\n", + " dense2.backward(loss_activation.dinputs)\n", + " activation1.backward(dense2.dinputs)\n", + " dense1.backward(activation1.dinputs)\n", + " \n", + " # Update weights and biases\n", + " optimizer.pre_update_params()\n", + " optimizer.update_params(dense1)\n", + " optimizer.update_params(dense2)\n", + " optimizer.post_update_params()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RMSProp Optimizer\n", + "Root Meas Square Propagation optimizer. It is similar to AdaGrad in that you apply different learning rates to different weights. However, the way you change the learning rate focuses more on the past cache rather than the current gradient." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "class Optimizer_RMSProp():\n", + " def __init__(self, learning_rate=1e-3, decay=0.0, epsilon=1e-7, rho=0.9):\n", + " self.initial_learning_rate = learning_rate\n", + " self.current_learning_rate = self.initial_learning_rate\n", + " self.decay = decay\n", + " self.iterations = 0\n", + " self.epsilon = epsilon\n", + " self.rho = rho\n", + "\n", + " def pre_update_params(self):\n", + " if self.decay:\n", + " self.current_learning_rate = self.initial_learning_rate / (1 + self.decay * self.iterations)\n", + "\n", + " def update_params(self, layer):\n", + " if not hasattr(layer, 'weight_cache'):\n", + " layer.weight_cache = np.zeros_like(layer.weights)\n", + " layer.bias_cache = np.zeros_like(layer.biases)\n", + "\n", + " layer.weight_cache = self.rho * layer.weight_cache + (1 - self.rho) * layer.dweights**2\n", + " layer.bias_cache = self.rho * layer.bias_cache + (1 - self.rho) * layer.dbiases**2\n", + "\n", + " layer.weights += -self.current_learning_rate * layer.dweights / (np.sqrt(layer.weight_cache) + self.epsilon)\n", + " layer.biases += -self.current_learning_rate * layer.dbiases / (np.sqrt(layer.bias_cache) + self.epsilon)\n", + "\n", + " def post_update_params(self):\n", + " self.iterations += 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing the RMSProp Optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch: 0, acc: 0.413, loss: 1.099, lr: 0.02\n", + "epoch: 100, acc: 0.467, loss: 0.980, lr: 0.01998021958261321\n", + "epoch: 200, acc: 0.480, loss: 0.904, lr: 0.019960279044701046\n", + "epoch: 300, acc: 0.507, loss: 0.864, lr: 0.019940378268975763\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch: 400, acc: 0.490, loss: 0.861, lr: 0.01992051713662487\n", + "epoch: 500, acc: 0.510, loss: 0.843, lr: 0.01990069552930875\n", + "epoch: 600, acc: 0.613, loss: 0.779, lr: 0.019880913329158343\n", + "epoch: 700, acc: 0.633, loss: 0.741, lr: 0.019861170418772778\n", + "epoch: 800, acc: 0.577, loss: 0.722, lr: 0.019841466681217078\n", + "epoch: 900, acc: 0.627, loss: 0.698, lr: 0.01982180200001982\n", + "epoch: 1000, acc: 0.630, loss: 0.675, lr: 0.019802176259170884\n", + "epoch: 1100, acc: 0.657, loss: 0.661, lr: 0.01978258934311912\n", + "epoch: 1200, acc: 0.623, loss: 0.654, lr: 0.01976304113677013\n", + "epoch: 1300, acc: 0.663, loss: 0.640, lr: 0.019743531525483964\n", + "epoch: 1400, acc: 0.627, loss: 0.672, lr: 0.01972406039507293\n", + "epoch: 1500, acc: 0.663, loss: 0.618, lr: 0.019704627631799327\n", + "epoch: 1600, acc: 0.693, loss: 0.607, lr: 0.019685233122373254\n", + "epoch: 1700, acc: 0.657, loss: 0.658, lr: 0.019665876753950384\n", + "epoch: 1800, acc: 0.730, loss: 0.587, lr: 0.019646558414129805\n", + "epoch: 1900, acc: 0.690, loss: 0.623, lr: 0.019627277990951823\n", + "epoch: 2000, acc: 0.730, loss: 0.573, lr: 0.019608035372895814\n", + "epoch: 2100, acc: 0.743, loss: 0.576, lr: 0.019588830448878047\n", + "epoch: 2200, acc: 0.740, loss: 0.560, lr: 0.019569663108249594\n", + "epoch: 2300, acc: 0.710, loss: 0.567, lr: 0.019550533240794143\n", + "epoch: 2400, acc: 0.740, loss: 0.548, lr: 0.019531440736725945\n", + "epoch: 2500, acc: 0.687, loss: 0.576, lr: 0.019512385486687673\n", + "epoch: 2600, acc: 0.710, loss: 0.550, lr: 0.01949336738174836\n", + "epoch: 2700, acc: 0.707, loss: 0.573, lr: 0.019474386313401298\n", + "epoch: 2800, acc: 0.760, loss: 0.523, lr: 0.019455442173562\n", + "epoch: 2900, acc: 0.770, loss: 0.525, lr: 0.019436534854566128\n", + "epoch: 3000, acc: 0.787, loss: 0.512, lr: 0.01941766424916747\n", + "epoch: 3100, acc: 0.763, loss: 0.517, lr: 0.019398830250535893\n", + "epoch: 3200, acc: 0.750, loss: 0.551, lr: 0.019380032752255354\n", + "epoch: 3300, acc: 0.803, loss: 0.498, lr: 0.01936127164832186\n", + "epoch: 3400, acc: 0.780, loss: 0.493, lr: 0.01934254683314152\n", + "epoch: 3500, acc: 0.780, loss: 0.514, lr: 0.019323858201528515\n", + "epoch: 3600, acc: 0.793, loss: 0.522, lr: 0.019305205648703173\n", + "epoch: 3700, acc: 0.780, loss: 0.516, lr: 0.01928658907028997\n", + "epoch: 3800, acc: 0.790, loss: 0.508, lr: 0.01926800836231563\n", + "epoch: 3900, acc: 0.750, loss: 0.523, lr: 0.019249463421207133\n", + "epoch: 4000, acc: 0.763, loss: 0.516, lr: 0.019230954143789846\n", + "epoch: 4100, acc: 0.787, loss: 0.499, lr: 0.019212480427285565\n", + "epoch: 4200, acc: 0.770, loss: 0.512, lr: 0.019194042169310647\n", + "epoch: 4300, acc: 0.790, loss: 0.496, lr: 0.019175639267874092\n", + "epoch: 4400, acc: 0.777, loss: 0.501, lr: 0.019157271621375684\n", + "epoch: 4500, acc: 0.810, loss: 0.479, lr: 0.0191389391286041\n", + "epoch: 4600, acc: 0.783, loss: 0.482, lr: 0.019120641688735073\n", + "epoch: 4700, acc: 0.797, loss: 0.463, lr: 0.019102379201329525\n", + "epoch: 4800, acc: 0.787, loss: 0.469, lr: 0.01908415156633174\n", + "epoch: 4900, acc: 0.777, loss: 0.497, lr: 0.01906595868406753\n", + "epoch: 5000, acc: 0.810, loss: 0.445, lr: 0.01904780045524243\n", + "epoch: 5100, acc: 0.793, loss: 0.450, lr: 0.019029676780939874\n", + "epoch: 5200, acc: 0.810, loss: 0.438, lr: 0.019011587562619416\n", + "epoch: 5300, acc: 0.797, loss: 0.452, lr: 0.01899353270211493\n", + "epoch: 5400, acc: 0.800, loss: 0.453, lr: 0.018975512101632844\n", + "epoch: 5500, acc: 0.820, loss: 0.419, lr: 0.018957525663750367\n", + "epoch: 5600, acc: 0.817, loss: 0.433, lr: 0.018939573291413745\n", + "epoch: 5700, acc: 0.763, loss: 0.533, lr: 0.018921654887936498\n", + "epoch: 5800, acc: 0.820, loss: 0.411, lr: 0.018903770356997703\n", + "epoch: 5900, acc: 0.817, loss: 0.424, lr: 0.01888591960264025\n", + "epoch: 6000, acc: 0.810, loss: 0.419, lr: 0.018868102529269144\n", + "epoch: 6100, acc: 0.827, loss: 0.403, lr: 0.018850319041649778\n", + "epoch: 6200, acc: 0.820, loss: 0.413, lr: 0.018832569044906263\n", + "epoch: 6300, acc: 0.820, loss: 0.414, lr: 0.018814852444519702\n", + "epoch: 6400, acc: 0.810, loss: 0.410, lr: 0.018797169146326564\n", + "epoch: 6500, acc: 0.830, loss: 0.389, lr: 0.018779519056516963\n", + "epoch: 6600, acc: 0.823, loss: 0.407, lr: 0.018761902081633038\n", + "epoch: 6700, acc: 0.823, loss: 0.403, lr: 0.018744318128567278\n", + "epoch: 6800, acc: 0.820, loss: 0.405, lr: 0.018726767104560903\n", + "epoch: 6900, acc: 0.823, loss: 0.386, lr: 0.018709248917202218\n", + "epoch: 7000, acc: 0.827, loss: 0.393, lr: 0.018691763474424996\n", + "epoch: 7100, acc: 0.800, loss: 0.428, lr: 0.018674310684506857\n", + "epoch: 7200, acc: 0.833, loss: 0.378, lr: 0.018656890456067686\n", + "epoch: 7300, acc: 0.820, loss: 0.382, lr: 0.01863950269806802\n", + "epoch: 7400, acc: 0.810, loss: 0.438, lr: 0.018622147319807447\n", + "epoch: 7500, acc: 0.833, loss: 0.379, lr: 0.018604824230923078\n", + "epoch: 7600, acc: 0.837, loss: 0.351, lr: 0.01858753334138793\n", + "epoch: 7700, acc: 0.827, loss: 0.397, lr: 0.018570274561509396\n", + "epoch: 7800, acc: 0.837, loss: 0.369, lr: 0.018553047801927663\n", + "epoch: 7900, acc: 0.787, loss: 0.458, lr: 0.018535852973614212\n", + "epoch: 8000, acc: 0.840, loss: 0.369, lr: 0.01851868998787026\n", + "epoch: 8100, acc: 0.863, loss: 0.336, lr: 0.018501558756325222\n", + "epoch: 8200, acc: 0.840, loss: 0.366, lr: 0.01848445919093522\n", + "epoch: 8300, acc: 0.833, loss: 0.369, lr: 0.018467391203981567\n", + "epoch: 8400, acc: 0.837, loss: 0.355, lr: 0.01845035470806926\n", + "epoch: 8500, acc: 0.840, loss: 0.357, lr: 0.018433349616125496\n", + "epoch: 8600, acc: 0.857, loss: 0.329, lr: 0.018416375841398172\n", + "epoch: 8700, acc: 0.843, loss: 0.352, lr: 0.018399433297454436\n", + "epoch: 8800, acc: 0.843, loss: 0.356, lr: 0.01838252189817921\n", + "epoch: 8900, acc: 0.797, loss: 0.447, lr: 0.018365641557773718\n", + "epoch: 9000, acc: 0.847, loss: 0.354, lr: 0.018348792190754044\n", + "epoch: 9100, acc: 0.840, loss: 0.349, lr: 0.0183319737119497\n", + "epoch: 9200, acc: 0.853, loss: 0.337, lr: 0.018315186036502167\n", + "epoch: 9300, acc: 0.847, loss: 0.350, lr: 0.018298429079863496\n", + "epoch: 9400, acc: 0.823, loss: 0.383, lr: 0.018281702757794862\n", + "epoch: 9500, acc: 0.853, loss: 0.338, lr: 0.018265006986365174\n", + "epoch: 9600, acc: 0.780, loss: 0.522, lr: 0.018248341681949654\n", + "epoch: 9700, acc: 0.840, loss: 0.335, lr: 0.018231706761228456\n", + "epoch: 9800, acc: 0.853, loss: 0.334, lr: 0.01821510214118526\n", + "epoch: 9900, acc: 0.723, loss: 0.696, lr: 0.018198527739105907\n", + "epoch: 10000, acc: 0.860, loss: 0.314, lr: 0.018181983472577025\n" + ] + } + ], + "source": [ + "# Create dataset\n", + "X, y = spiral_data(samples=100, classes=3)\n", + "\n", + "# Create Dense layer with 2 input features and 64 output values\n", + "dense1 = Layer_Dense(2, 64)\n", + "\n", + "# Create ReLU activation (to be used with Dense layer)\n", + "activation1 = Activation_ReLU()\n", + "\n", + "# Create second Dense layer with 64 input features (as we take output\n", + "# of previous layer here) and 3 output values (output values)\n", + "dense2 = Layer_Dense(64, 3)\n", + "\n", + "# Create Softmax classifier's combined loss and activation\n", + "loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()\n", + "\n", + "# Create optimizer\n", + "optimizer = Optimizer_RMSProp(learning_rate=0.02, decay=1e-5, rho=0.999)\n", + "\n", + "# Train in loop\n", + "for epoch in range(10001):\n", + " # Perform a forward pass of our training data through this layer\n", + " dense1.forward(X)\n", + " \n", + " # Perform a forward pass through activation function\n", + " # takes the output of first dense layer here\n", + " activation1.forward(dense1.output)\n", + " \n", + " # Perform a forward pass through second Dense layer\n", + " # takes outputs of activation function of first layer as inputs\n", + " dense2.forward(activation1.output)\n", + " \n", + " # Perform a forward pass through the activation/loss function\n", + " # takes the output of second dense layer here and returns loss\n", + " loss = loss_activation.forward(dense2.output, y)\n", + " \n", + " # Calculate accuracy from output of activation2 and targets\n", + " # calculate values along first axis\n", + " predictions = np.argmax(loss_activation.output, axis=1)\n", + " if len(y.shape) == 2:\n", + " y = np.argmax(y, axis=1)\n", + " accuracy = np.mean(predictions == y)\n", + " \n", + " if not epoch % 100:\n", + " print(f'epoch: {epoch}, ' +\n", + " f'acc: {accuracy:.3f}, ' +\n", + " f'loss: {loss:.3f}, ' +\n", + " f'lr: {optimizer.current_learning_rate}')\n", " \n", " # Backward pass\n", " loss_activation.backward(loss_activation.output, y)\n",