commit f76fa8709d14bb3549ac422aaf32403e9097bcf6 Author: judsonupchurch Date: Sat Feb 1 20:34:19 2025 +0000 Inverse pendulum testing diff --git a/clamped_inverse_time_penalty/1_validation.png b/clamped_inverse_time_penalty/1_validation.png new file mode 100644 index 0000000..ce4653a Binary files /dev/null and b/clamped_inverse_time_penalty/1_validation.png differ diff --git a/clamped_inverse_time_penalty/2_validation.png b/clamped_inverse_time_penalty/2_validation.png new file mode 100644 index 0000000..6f7b968 Binary files /dev/null and b/clamped_inverse_time_penalty/2_validation.png differ diff --git a/clamped_inverse_time_penalty/3_validation.png b/clamped_inverse_time_penalty/3_validation.png new file mode 100644 index 0000000..8e9f4ba Binary files /dev/null and b/clamped_inverse_time_penalty/3_validation.png differ diff --git a/clamped_inverse_time_penalty/4_validation.png b/clamped_inverse_time_penalty/4_validation.png new file mode 100644 index 0000000..c42cd8e Binary files /dev/null and b/clamped_inverse_time_penalty/4_validation.png differ diff --git a/clamped_inverse_time_penalty/5_validation.png b/clamped_inverse_time_penalty/5_validation.png new file mode 100644 index 0000000..cff2adf Binary files /dev/null and b/clamped_inverse_time_penalty/5_validation.png differ diff --git a/clamped_inverse_time_penalty/6_validation.png b/clamped_inverse_time_penalty/6_validation.png new file mode 100644 index 0000000..ce0a639 Binary files /dev/null and b/clamped_inverse_time_penalty/6_validation.png differ diff --git a/clamped_inverse_time_penalty/trainer.py b/clamped_inverse_time_penalty/trainer.py new file mode 100644 index 0000000..dc40681 --- /dev/null +++ b/clamped_inverse_time_penalty/trainer.py @@ -0,0 +1,175 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torchdiffeq import odeint +import numpy as np +import matplotlib.pyplot as plt + +# ---------------------------------------------------------------- +# 1) 3D Controller: [theta, omega, alpha] -> torque +# ---------------------------------------------------------------- +class PendulumController3D(nn.Module): + def __init__(self): + super().__init__() + self.net = nn.Sequential( + nn.Linear(3, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + + def forward(self, x_3d): + """ + x_4d: shape (batch_size, 4) => [theta, cos(theta), omega, alpha]. + Returns shape: (batch_size, 1) => torque. + """ + raw_torque = self.net(x_3d) + clamped_torque = torch.clamp(raw_torque, -250, 250) # Clamp torque within [-250, 250] + return clamped_torque + + +# ---------------------------------------------------------------- +# 2) Define ODE System Using `odeint` +# ---------------------------------------------------------------- +m = 10.0 +g = 9.81 +R = 1.0 + +class PendulumDynamics3D(nn.Module): + """ + Defines the ODE system for [theta, omega, alpha] with torque tracking. + """ + + def __init__(self, controller): + super().__init__() + self.controller = controller + + def forward(self, t, state): + """ + state: (batch_size, 4) => [theta, omega, alpha, tau_prev] + Returns: (batch_size, 4) => [dtheta/dt, domega/dt, dalpha/dt, dtau/dt] + """ + + theta = state[:, 0] + omega = state[:, 1] + alpha = state[:, 2] + tau_prev = state[:, 3] + + + # Create tensor input for controller: [theta, omega, alpha] + input_3d = torch.stack([theta, omega, alpha], dim=1) # shape (batch_size, 3) + + # Compute torque using the controller + tau = self.controller(input_3d).squeeze(-1) # shape (batch_size,) + + # Compute desired alpha + alpha_desired = (g / R) * torch.sin(theta) + tau / (m * R**2) + + # Define ODE system + dtheta = omega + domega = alpha + dalpha = alpha_desired - alpha # Relaxation term + dtau = tau - tau_prev # Keep track of torque evolution + + return torch.stack([dtheta, domega, dalpha, dtau], dim=1) # (batch_size, 4) + +# ---------------------------------------------------------------- +# 3) Loss Function +# ---------------------------------------------------------------- +def loss_fn(state_traj, t_span): + """ + Computes loss based on the trajectory with inverse time weighting (1/t) for theta and omega. + + Args: + state_traj: Tensor of shape (time_steps, batch_size, 4). + t_span: Tensor of time steps (time_steps,). + + Returns: + total_loss, (loss_theta, loss_omega) + """ + theta = state_traj[:, :, 0] # (time_steps, batch_size) + omega = state_traj[:, :, 1] # (time_steps, batch_size) + torque = state_traj[:, :, 3] + + # Inverse time weights w(t) = 1 / t + # Add a small epsilon to avoid division by zero + epsilon = 1e-6 + inverse_time_weights = 1.0 / (t_span + epsilon).unsqueeze(1) # Shape: (time_steps, 1) + linear_time_weights = t_span.unsqueeze(1) + + # Apply inverse time weighting for theta and omega + loss_theta = 1e-1 * torch.mean(inverse_time_weights * theta**2) # Weighted theta loss + loss_omega = 1e-2 * torch.mean(inverse_time_weights * omega**2) # Weighted omega loss + loss_torque = 1e-2 * torch.mean(linear_time_weights * torque**2) + + # Combine the losses + total_loss = loss_theta #+ loss_torque + + return total_loss, (loss_theta, loss_omega, loss_torque) + +# ---------------------------------------------------------------- +# 4) Training Setup +# ---------------------------------------------------------------- +device = torch.device("cpu" if torch.cuda.is_available() else "cpu") + +# Create the controller and pendulum dynamics model +controller = PendulumController3D().to(device) +pendulum_dynamics = PendulumDynamics3D(controller).to(device) + +# Define optimizer +optimizer = optim.Adam(controller.parameters(), lr=1e-1) + +# Initial conditions: [theta, omega, alpha, tau_prev] +initial_conditions = [ + [0.1, 0.0, 0.0, 0.0], # Small perturbation + [-0.5, 0.0, 0.0, 0.0], + [6.28, 6.28, 0.0, 0.0], + [1.57, 0.5, 0.0, 0.0], + [0.0, -6.28, 0.0, 0.0], + [1.57, -6.28, 0.0, 0.0], +] + +# Convert to torch tensor (batch_size, 4) +state_0 = torch.tensor(initial_conditions, dtype=torch.float32, device=device) + +# Time grid +t_span = torch.linspace(0, 10, 1000, device=device) + +num_epochs = 100_000 +print_every = 25 + +# ---------------------------------------------------------------- +# 5) Training Loop +# ---------------------------------------------------------------- +for epoch in range(num_epochs): + optimizer.zero_grad() + + # Integrate the ODE + state_traj = odeint(pendulum_dynamics, state_0, t_span, method='rk4') + # state_traj shape: (time_steps, batch_size, 4) + + # Compute loss + total_loss, (l_theta, l_omega, l_torque) = loss_fn(state_traj, t_span) + + # Check for NaN values + if torch.isnan(total_loss): + print(f"NaN detected at epoch {epoch}. Skipping step.") + optimizer.zero_grad() + continue # Skip this iteration + + # Backprop + total_loss.backward() + optimizer.step() + + + # Print progress + if epoch % print_every == 0: + print(f"Epoch {epoch:4d}/{num_epochs} | " + f"Total: {total_loss.item():.6f} | " + f"Theta: {l_theta.item():.6f} | " + f"Omega: {l_omega.item():.6f} | " + f"Torque: {l_torque.item():.6f}") + + torch.save(controller.state_dict(), "controller_cpu_clamped_inverse_time_punish.pth") + print("Model saved as 'controller_cpu_clamped_inverse_time_punish.pth'.") diff --git a/clamped_inverse_time_penalty/validator.py b/clamped_inverse_time_penalty/validator.py new file mode 100644 index 0000000..327de0c --- /dev/null +++ b/clamped_inverse_time_penalty/validator.py @@ -0,0 +1,116 @@ +import torch +import torch.nn as nn +import numpy as np +from scipy.integrate import solve_ivp +import matplotlib.pyplot as plt + +# ---------------------------------------------------------------- +# 1) 3D Controller: [theta, omega, alpha] -> torque +# ---------------------------------------------------------------- +class PendulumController3D(nn.Module): + def __init__(self): + super(PendulumController3D, self).__init__() + self.net = nn.Sequential( + nn.Linear(3, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + + def forward(self, x_3d): + return self.net(x_3d) + +# Load the trained 3D model +controller = PendulumController3D() +controller.load_state_dict(torch.load("controller_cpu_clamped_inverse_time_penalty.pth")) +# controller.load_state_dict(torch.load("controller_cpu_clamped.pth")) +controller.eval() +print("3D Controller loaded.") + +# ---------------------------------------------------------------- +# 2) ODE: State = [theta, omega, alpha]. +# ---------------------------------------------------------------- +m = 10.0 +g = 9.81 +R = 1.0 + +def pendulum_ode_3d(t, state): + theta, omega, alpha = state + + # Evaluate NN -> torque + inp = torch.tensor([[theta, omega, alpha]], dtype=torch.float32) + with torch.no_grad(): + torque = controller(inp).item() + # Clamp torque to ±250 for consistency with training + torque = np.clip(torque, -250, 250) + + alpha_des = (g/R)*np.sin(theta) + torque/(m*(R**2)) + + dtheta = omega + domega = alpha + dalpha = alpha_des - alpha + return [dtheta, domega, dalpha] + +# ---------------------------------------------------------------- +# 3) Validate for multiple initial conditions +# ---------------------------------------------------------------- +initial_conditions_3d = [ + (0.1, 0.0, 0.0), + (0.5, 0.0, 0.0), + (1.0, 0.0, 0.0), + (1.57, 0.5, 0.0), + (0.0, -6.28, 0.0), + (6.28, 6.28, 0.0), +] + +t_span = (0, 20) +t_eval = np.linspace(0, 20, 2000) + +for idx, (theta0, omega0, alpha0) in enumerate(initial_conditions_3d): + sol = solve_ivp( + pendulum_ode_3d, + t_span, + [theta0, omega0, alpha0], + t_eval=t_eval, + method='RK45' + ) + + t = sol.t + theta = sol.y[0] + omega = sol.y[1] + alpha_arr = sol.y[2] + + # Recompute torque over time + torques = [] + alpha_des_vals = [] + for (th, om, al) in zip(theta, omega, alpha_arr): + with torch.no_grad(): + torque_val = controller(torch.tensor([[th, om, al]], dtype=torch.float32)).item() + torque_val = np.clip(torque_val, -250, 250) + torques.append(torque_val) + alpha_des_vals.append( (g/R)*np.sin(th) + torque_val/(m*(R**2)) ) + torques = np.array(torques) + + # Plot + fig, ax1 = plt.subplots(figsize=(10,6)) + ax1.plot(t, theta, label="theta", color="blue") + ax1.plot(t, omega, label="omega", color="green") + ax1.plot(t, alpha_arr, label="alpha", color="red") + # optional: ax1.plot(t, alpha_des_vals, label="alpha_des", color="red", linestyle="--") + + ax1.set_xlabel("time [s]") + ax1.set_ylabel("theta, omega, alpha") + ax1.grid(True) + ax1.legend(loc="upper left") + + ax2 = ax1.twinx() + ax2.plot(t, torques, label="torque", color="purple", linestyle="--") + ax2.set_ylabel("Torque [Nm]") + ax2.legend(loc="upper right") + + plt.title(f"IC (theta={theta0}, omega={omega0}, alpha={alpha0})") + plt.tight_layout() + plt.savefig(f"{idx+1}_validation.png") + plt.close() + print(f"Saved {idx+1}_validation.png")