diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..ca89950 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "cwd": "${fileDirname}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/batch_trainer_cpu.py b/batch_trainer_cpu.py new file mode 100644 index 0000000..7ee2f3e --- /dev/null +++ b/batch_trainer_cpu.py @@ -0,0 +1,155 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torchdiffeq import odeint + +# Define the neural network controller +class PendulumController(nn.Module): + def __init__(self): + super(PendulumController, self).__init__() + self.fc = nn.Sequential( + nn.Linear(4, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + + def forward(self, x): + return self.fc(x) + +# Define pendulum dynamics +class PendulumDynamics(nn.Module): + def __init__(self, m=10, g=9.81, R=1.0): + super(PendulumDynamics, self).__init__() + self.m = m + self.g = g + self.R = R + self.torque_fn = None # Set later before calling odeint + + def set_torque_fn(self, torque_fn): + """ Set the neural network-based torque function """ + self.torque_fn = torque_fn + + def forward(self, t, state): + theta, omega = state[:, :, 0], state[:, :, 1] # Extract theta and omega + + # Ensure torque is correctly shaped + torque = self.torque_fn(state) # Neural network predicts torque + torque = torch.clamp(torque.squeeze(-1), -250, 250) # Limit torque + + dtheta_dt = omega + domega_dt = (self.g / self.R) * torch.sin(theta) + torque / (self.m * self.R**2) + + return torch.stack([dtheta_dt, domega_dt], dim=2) + +# Loss function with angle wrapping +def loss_fn(state, target_theta, torques): + theta = state[:, :, 0] # Extract theta trajectory + omega = state[:, :, 1] # Extract omega trajectory + + # Wrap theta to be within [-π, π] + theta_wrapped = ((theta + torch.pi) % (2 * torch.pi)) - torch.pi + + alpha = 10.0 # Heavier weight for theta + beta = 0.1 # Lighter weight for omega + gamma = 0.01 # Regularization weight for motor torque + delta = 100.0 # Large penalty for exceeding torque limit + + # Compute summation of squared differences for wrapped theta + loss_theta = alpha * torch.sum((theta_wrapped - target_theta) ** 2) + + # Add penalty for omega (average remains to avoid scaling issues) + loss_omega = beta * torch.mean(omega ** 2) + + # Add penalty for excessive torque usage (sum-based) + loss_torque = gamma * torch.sum(torques ** 2) + + # Add penalty for torque exceeding 250 + over_limit_penalty = delta * torch.sum((torques.abs() > 250) * (torques.abs() - 250) ** 2) + + # Combine all losses + loss = loss_theta + loss_omega + loss_torque + over_limit_penalty + return loss + +# Define batch of initial conditions +initial_conditions = [ + (0.1, 0.0), # Small angle, zero velocity + (0.5, 0.0), # Medium angle, zero velocity + (1.0, 0.0), # Large angle, zero velocity + (1.57, 0.5), + (0, -6.28), +] + +# Convert initial conditions to tensors +batch_size = len(initial_conditions) +theta_0 = torch.tensor([[ic[0]] for ic in initial_conditions], dtype=torch.float32) # Shape: (batch_size, 1) +omega_0 = torch.tensor([[ic[1]] for ic in initial_conditions], dtype=torch.float32) # Shape: (batch_size, 1) +state_0 = torch.cat([theta_0, omega_0], dim=1) # Shape: (batch_size, 2) + +# Simulation parameters +T_initial = torch.zeros((batch_size, 1), dtype=torch.float32) # Shape: (batch_size, 1) +t_span = torch.linspace(0, 10, 200) # Simulate for 10 seconds +target_theta = torch.zeros((batch_size, 1), dtype=torch.float32) # Upright position + +# Define the controller and optimizer +controller = PendulumController() +optimizer = optim.Adam(controller.parameters(), lr=0.01) +pendulum = PendulumDynamics() + +# Training loop +num_epochs = 10_000 +losses = [] + +for epoch in range(num_epochs): + optimizer.zero_grad() + + # Define torque function based on the neural network + def torque_fn(state): + # Ensure theta and omega have shape (batch_size, time_steps, 1) + theta = state[:, :, 0].unsqueeze(-1) + omega = state[:, :, 1].unsqueeze(-1) + + # Expand T_initial to match (batch_size, time_steps, 1) + T_initial_expanded = T_initial.unsqueeze(1).expand(-1, theta.shape[1], -1) + + # Compute theta_ddot and ensure correct shape + theta_ddot = ((pendulum.g / pendulum.R) * torch.sin(theta) + T_initial_expanded / (pendulum.m * pendulum.R**2)) + #theta_ddot = theta_ddot.unsqueeze(-1) # 🔥 Remove extra dimension, now (batch_size, time_steps, 1) + + # 🔥 Ensure correct concatenation + inputs = torch.cat([theta, omega, theta_ddot, T_initial_expanded], dim=2) # Shape: (batch_size, time_steps, 4) + + # Pass through controller (neural network) and apply torque limit + torque = controller(inputs) # Predicted torque + torque = torch.clamp(torque, -250, 250) # Limit torque + + return torque + + + + + # Set the torque function in the pendulum class + pendulum.set_torque_fn(torque_fn) + + # Solve the forward dynamics for the **entire batch** at once + state_traj = odeint(pendulum, state_0.unsqueeze(1).expand(-1, t_span.shape[0], -1), t_span, method='rk4') + + # Compute torques + torques = torque_fn(state_traj) # Shape: (batch_size, time_steps, 1) + + # Compute the loss over all initial conditions + loss = loss_fn(state_traj, target_theta, torques) + + # Backpropagation and optimization + loss.backward() + optimizer.step() + losses.append(loss.item()) + + # Print loss every 50 epochs + if epoch % 50 == 0: + print(f"Epoch {epoch}/{num_epochs}, Loss: {loss.item()}") + +# Save the trained model +torch.save(controller.state_dict(), "controller_batch_training.pth") +print("Trained model saved as 'controller_batch_training.pth'.") \ No newline at end of file diff --git a/clamped_quadratic_time_penalty/1_validation.png b/clamped_quadratic_time_penalty/1_validation.png new file mode 100644 index 0000000..dcebcb9 Binary files /dev/null and b/clamped_quadratic_time_penalty/1_validation.png differ diff --git a/clamped_quadratic_time_penalty/2_validation.png b/clamped_quadratic_time_penalty/2_validation.png new file mode 100644 index 0000000..ae7f877 Binary files /dev/null and b/clamped_quadratic_time_penalty/2_validation.png differ diff --git a/clamped_quadratic_time_penalty/3_validation.png b/clamped_quadratic_time_penalty/3_validation.png new file mode 100644 index 0000000..49b8dfd Binary files /dev/null and b/clamped_quadratic_time_penalty/3_validation.png differ diff --git a/clamped_quadratic_time_penalty/4_validation.png b/clamped_quadratic_time_penalty/4_validation.png new file mode 100644 index 0000000..688f4d8 Binary files /dev/null and b/clamped_quadratic_time_penalty/4_validation.png differ diff --git a/clamped_quadratic_time_penalty/5_validation.png b/clamped_quadratic_time_penalty/5_validation.png new file mode 100644 index 0000000..581ac45 Binary files /dev/null and b/clamped_quadratic_time_penalty/5_validation.png differ diff --git a/clamped_quadratic_time_penalty/6_validation.png b/clamped_quadratic_time_penalty/6_validation.png new file mode 100644 index 0000000..d83c4cf Binary files /dev/null and b/clamped_quadratic_time_penalty/6_validation.png differ diff --git a/clamped_quadratic_time_penalty/trainer.py b/clamped_quadratic_time_penalty/trainer.py new file mode 100644 index 0000000..4a11f3d --- /dev/null +++ b/clamped_quadratic_time_penalty/trainer.py @@ -0,0 +1,180 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torchdiffeq import odeint +import numpy as np +import matplotlib.pyplot as plt + +# ---------------------------------------------------------------- +# 1) 3D Controller: [theta, omega, alpha] -> torque +# ---------------------------------------------------------------- +class PendulumController3D(nn.Module): + def __init__(self): + super().__init__() + self.net = nn.Sequential( + nn.Linear(3, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + + def forward(self, x_3d): + """ + x_3d: shape (batch_size, 3) => [theta, omega, alpha]. + Returns shape: (batch_size, 1) => torque. + """ + raw_torque = self.net(x_3d) + clamped_torque = torch.clamp(raw_torque, -250, 250) # Clamp torque within [-250, 250] + return clamped_torque + + +# ---------------------------------------------------------------- +# 2) Define ODE System Using `odeint` +# ---------------------------------------------------------------- +m = 10.0 +g = 9.81 +R = 1.0 + +class PendulumDynamics3D(nn.Module): + """ + Defines the ODE system for [theta, omega, alpha] with torque tracking. + """ + + def __init__(self, controller): + super().__init__() + self.controller = controller + + def forward(self, t, state): + """ + state: (batch_size, 4) => [theta, omega, alpha, tau_prev] + Returns: (batch_size, 4) => [dtheta/dt, domega/dt, dalpha/dt, dtau/dt] + """ + + theta = state[:, 0] + omega = state[:, 1] + alpha = state[:, 2] + tau_prev = state[:, 3] + + # Create tensor input for controller: [theta, omega, alpha] + input_3d = torch.stack([theta, omega, alpha], dim=1) # shape (batch_size, 3) + + # Compute torque using the controller + tau = self.controller(input_3d).squeeze(-1) # shape (batch_size,) + + # Compute desired alpha + alpha_desired = (g / R) * torch.sin(theta) + tau / (m * R**2) + + # Define ODE system + dtheta = omega + domega = alpha + dalpha = alpha_desired - alpha # Relaxation term + dtau = tau - tau_prev # Keep track of torque evolution + + return torch.stack([dtheta, domega, dalpha, dtau], dim=1) # (batch_size, 4) + +# ---------------------------------------------------------------- +# 3) Loss Function +# ---------------------------------------------------------------- +def loss_fn(state_traj, t_span): + """ + Computes loss based on the trajectory: exponentially increasing theta^2 penalty over time. + + Args: + state_traj: Tensor of shape (time_steps, batch_size, 4) + t_span: Tensor of time steps (time_steps,) + + Returns: + total_loss, (loss_theta, loss_omega, loss_torque) + """ + theta = state_traj[:, :, 0] # (time_steps, batch_size) + omega = state_traj[:, :, 1] + torque = state_traj[:, :, 3] # tau_prev is stored in state + + # Quadratic weight factor lambda * t**2 + lambda_factor = 0.5 # Increase for stronger late-time punishment + time_weights = (lambda_factor * t_span**2).unsqueeze(1) # Shape: (time_steps, 1) + + # Apply increasing penalty over time + loss_theta = 1e2 * torch.mean(time_weights * (torch.cos(theta) - 1)**2) + #loss_theta = 1e-1 * torch.mean(time_weights * theta**2) + loss_omega = 1e-1 * torch.mean(omega**2) + loss_torque = 1e-5 * torch.mean(torque**2) + + # Extract the final theta value from the trajectory + final_theta = state_traj[-1, :, 0] # (batch_size,) + + # Compute the loss as the squared error from the target theta + loss_final_theta = torch.mean(final_theta ** 2) # Mean squared error + + total_loss = loss_theta #+ loss_omega + loss_torque + return total_loss, (loss_theta, loss_omega, loss_torque, loss_final_theta) + + +# ---------------------------------------------------------------- +# 4) Training Setup +# ---------------------------------------------------------------- +device = torch.device("cpu" if torch.cuda.is_available() else "cpu") + +# Create the controller and pendulum dynamics model +controller = PendulumController3D().to(device) +pendulum_dynamics = PendulumDynamics3D(controller).to(device) + +# Define optimizer +optimizer = optim.Adam(controller.parameters(), lr=1e-2) + +# Initial conditions: [theta, omega, alpha, tau_prev] +initial_conditions = [ + [0.1, 0.0, 0.0, 0.0], # Small perturbation + [-0.5, 0.0, 0.0, 0.0], + [6.28, 6.28, 0.0, 0.0], + [1.57, 0.5, 0.0, 0.0], + [0.0, -6.28, 0.0, 0.0], + [1.57, -6.28, 0.0, 0.0], +] + +# Convert to torch tensor (batch_size, 4) +state_0 = torch.tensor(initial_conditions, dtype=torch.float32, device=device) + +# Time grid +t_span = torch.linspace(0, 10, 500, device=device) # 10 seconds, 500 steps + +num_epochs = 100_000 +print_every = 25 + +# ---------------------------------------------------------------- +# 5) Training Loop +# ---------------------------------------------------------------- +for epoch in range(num_epochs): + optimizer.zero_grad() + + # Integrate the ODE + state_traj = odeint(pendulum_dynamics, state_0, t_span, method='rk4') + # state_traj shape: (time_steps, batch_size, 4) + + # Compute loss + total_loss, (l_theta, l_omega, l_torque, l_final_theta) = loss_fn(state_traj, t_span) + + # Check for NaN values + if torch.isnan(total_loss): + print(f"NaN detected at epoch {epoch}. Skipping step.") + optimizer.zero_grad() + continue # Skip this iteration + + # Backprop + total_loss.backward() + #torch.nn.utils.clip_grad_norm_(controller.parameters(), max_norm=1.0) # Fix NaNs + optimizer.step() + + + # Print progress + if epoch % print_every == 0: + print(f"Epoch {epoch:4d}/{num_epochs} | " + f"Total: {total_loss.item():.6f} | " + f"Theta: {l_theta.item():.6f} | " + f"Omega: {l_omega.item():.6f} | " + f"Torque: {l_torque.item():.6f} | " + f"Final Theta: {l_final_theta.item():.6f}") + + torch.save(controller.state_dict(), "controller_cpu_clamped_quadratic_time_punish.pth") + print("Model saved as 'controller_cpu_clamped_quadratic_time_punish.pth'.") diff --git a/clamped_quadratic_time_penalty/validator.py b/clamped_quadratic_time_penalty/validator.py new file mode 100644 index 0000000..7be459d --- /dev/null +++ b/clamped_quadratic_time_penalty/validator.py @@ -0,0 +1,116 @@ +import torch +import torch.nn as nn +import numpy as np +from scipy.integrate import solve_ivp +import matplotlib.pyplot as plt + +# ---------------------------------------------------------------- +# 1) 3D Controller: [theta, omega, alpha] -> torque +# ---------------------------------------------------------------- +class PendulumController3D(nn.Module): + def __init__(self): + super(PendulumController3D, self).__init__() + self.net = nn.Sequential( + nn.Linear(3, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + + def forward(self, x_3d): + return self.net(x_3d) + +# Load the trained 3D model +controller = PendulumController3D() +controller.load_state_dict(torch.load("controller_cpu_clamped_quadratic_time_penalty.pth")) +# controller.load_state_dict(torch.load("controller_cpu_clamped.pth")) +controller.eval() +print("3D Controller loaded.") + +# ---------------------------------------------------------------- +# 2) ODE: State = [theta, omega, alpha]. +# ---------------------------------------------------------------- +m = 10.0 +g = 9.81 +R = 1.0 + +def pendulum_ode_3d(t, state): + theta, omega, alpha = state + + # Evaluate NN -> torque + inp = torch.tensor([[theta, omega, alpha]], dtype=torch.float32) + with torch.no_grad(): + torque = controller(inp).item() + # Clamp torque to ±250 for consistency with training + torque = np.clip(torque, -250, 250) + + alpha_des = (g/R)*np.sin(theta) + torque/(m*(R**2)) + + dtheta = omega + domega = alpha + dalpha = alpha_des - alpha + return [dtheta, domega, dalpha] + +# ---------------------------------------------------------------- +# 3) Validate for multiple initial conditions +# ---------------------------------------------------------------- +initial_conditions_3d = [ + (0.1, 0.0, 0.0), + (0.5, 0.0, 0.0), + (1.0, 0.0, 0.0), + (1.57, 0.5, 0.0), + (0.0, -6.28, 0.0), + (6.28, 6.28, 0.0), +] + +t_span = (0, 20) +t_eval = np.linspace(0, 20, 2000) + +for idx, (theta0, omega0, alpha0) in enumerate(initial_conditions_3d): + sol = solve_ivp( + pendulum_ode_3d, + t_span, + [theta0, omega0, alpha0], + t_eval=t_eval, + method='RK45' + ) + + t = sol.t + theta = sol.y[0] + omega = sol.y[1] + alpha_arr = sol.y[2] + + # Recompute torque over time + torques = [] + alpha_des_vals = [] + for (th, om, al) in zip(theta, omega, alpha_arr): + with torch.no_grad(): + torque_val = controller(torch.tensor([[th, om, al]], dtype=torch.float32)).item() + torque_val = np.clip(torque_val, -250, 250) + torques.append(torque_val) + alpha_des_vals.append( (g/R)*np.sin(th) + torque_val/(m*(R**2)) ) + torques = np.array(torques) + + # Plot + fig, ax1 = plt.subplots(figsize=(10,6)) + ax1.plot(t, theta, label="theta", color="blue") + ax1.plot(t, omega, label="omega", color="green") + ax1.plot(t, alpha_arr, label="alpha", color="red") + # optional: ax1.plot(t, alpha_des_vals, label="alpha_des", color="red", linestyle="--") + + ax1.set_xlabel("time [s]") + ax1.set_ylabel("theta, omega, alpha") + ax1.grid(True) + ax1.legend(loc="upper left") + + ax2 = ax1.twinx() + ax2.plot(t, torques, label="torque", color="purple", linestyle="--") + ax2.set_ylabel("Torque [Nm]") + ax2.legend(loc="upper right") + + plt.title(f"IC (theta={theta0}, omega={omega0}, alpha={alpha0})") + plt.tight_layout() + plt.savefig(f"{idx+1}_validation.png") + plt.close() + print(f"Saved {idx+1}_validation.png") diff --git a/trainer.py b/trainer.py new file mode 100644 index 0000000..dc40681 --- /dev/null +++ b/trainer.py @@ -0,0 +1,175 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torchdiffeq import odeint +import numpy as np +import matplotlib.pyplot as plt + +# ---------------------------------------------------------------- +# 1) 3D Controller: [theta, omega, alpha] -> torque +# ---------------------------------------------------------------- +class PendulumController3D(nn.Module): + def __init__(self): + super().__init__() + self.net = nn.Sequential( + nn.Linear(3, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + + def forward(self, x_3d): + """ + x_4d: shape (batch_size, 4) => [theta, cos(theta), omega, alpha]. + Returns shape: (batch_size, 1) => torque. + """ + raw_torque = self.net(x_3d) + clamped_torque = torch.clamp(raw_torque, -250, 250) # Clamp torque within [-250, 250] + return clamped_torque + + +# ---------------------------------------------------------------- +# 2) Define ODE System Using `odeint` +# ---------------------------------------------------------------- +m = 10.0 +g = 9.81 +R = 1.0 + +class PendulumDynamics3D(nn.Module): + """ + Defines the ODE system for [theta, omega, alpha] with torque tracking. + """ + + def __init__(self, controller): + super().__init__() + self.controller = controller + + def forward(self, t, state): + """ + state: (batch_size, 4) => [theta, omega, alpha, tau_prev] + Returns: (batch_size, 4) => [dtheta/dt, domega/dt, dalpha/dt, dtau/dt] + """ + + theta = state[:, 0] + omega = state[:, 1] + alpha = state[:, 2] + tau_prev = state[:, 3] + + + # Create tensor input for controller: [theta, omega, alpha] + input_3d = torch.stack([theta, omega, alpha], dim=1) # shape (batch_size, 3) + + # Compute torque using the controller + tau = self.controller(input_3d).squeeze(-1) # shape (batch_size,) + + # Compute desired alpha + alpha_desired = (g / R) * torch.sin(theta) + tau / (m * R**2) + + # Define ODE system + dtheta = omega + domega = alpha + dalpha = alpha_desired - alpha # Relaxation term + dtau = tau - tau_prev # Keep track of torque evolution + + return torch.stack([dtheta, domega, dalpha, dtau], dim=1) # (batch_size, 4) + +# ---------------------------------------------------------------- +# 3) Loss Function +# ---------------------------------------------------------------- +def loss_fn(state_traj, t_span): + """ + Computes loss based on the trajectory with inverse time weighting (1/t) for theta and omega. + + Args: + state_traj: Tensor of shape (time_steps, batch_size, 4). + t_span: Tensor of time steps (time_steps,). + + Returns: + total_loss, (loss_theta, loss_omega) + """ + theta = state_traj[:, :, 0] # (time_steps, batch_size) + omega = state_traj[:, :, 1] # (time_steps, batch_size) + torque = state_traj[:, :, 3] + + # Inverse time weights w(t) = 1 / t + # Add a small epsilon to avoid division by zero + epsilon = 1e-6 + inverse_time_weights = 1.0 / (t_span + epsilon).unsqueeze(1) # Shape: (time_steps, 1) + linear_time_weights = t_span.unsqueeze(1) + + # Apply inverse time weighting for theta and omega + loss_theta = 1e-1 * torch.mean(inverse_time_weights * theta**2) # Weighted theta loss + loss_omega = 1e-2 * torch.mean(inverse_time_weights * omega**2) # Weighted omega loss + loss_torque = 1e-2 * torch.mean(linear_time_weights * torque**2) + + # Combine the losses + total_loss = loss_theta #+ loss_torque + + return total_loss, (loss_theta, loss_omega, loss_torque) + +# ---------------------------------------------------------------- +# 4) Training Setup +# ---------------------------------------------------------------- +device = torch.device("cpu" if torch.cuda.is_available() else "cpu") + +# Create the controller and pendulum dynamics model +controller = PendulumController3D().to(device) +pendulum_dynamics = PendulumDynamics3D(controller).to(device) + +# Define optimizer +optimizer = optim.Adam(controller.parameters(), lr=1e-1) + +# Initial conditions: [theta, omega, alpha, tau_prev] +initial_conditions = [ + [0.1, 0.0, 0.0, 0.0], # Small perturbation + [-0.5, 0.0, 0.0, 0.0], + [6.28, 6.28, 0.0, 0.0], + [1.57, 0.5, 0.0, 0.0], + [0.0, -6.28, 0.0, 0.0], + [1.57, -6.28, 0.0, 0.0], +] + +# Convert to torch tensor (batch_size, 4) +state_0 = torch.tensor(initial_conditions, dtype=torch.float32, device=device) + +# Time grid +t_span = torch.linspace(0, 10, 1000, device=device) + +num_epochs = 100_000 +print_every = 25 + +# ---------------------------------------------------------------- +# 5) Training Loop +# ---------------------------------------------------------------- +for epoch in range(num_epochs): + optimizer.zero_grad() + + # Integrate the ODE + state_traj = odeint(pendulum_dynamics, state_0, t_span, method='rk4') + # state_traj shape: (time_steps, batch_size, 4) + + # Compute loss + total_loss, (l_theta, l_omega, l_torque) = loss_fn(state_traj, t_span) + + # Check for NaN values + if torch.isnan(total_loss): + print(f"NaN detected at epoch {epoch}. Skipping step.") + optimizer.zero_grad() + continue # Skip this iteration + + # Backprop + total_loss.backward() + optimizer.step() + + + # Print progress + if epoch % print_every == 0: + print(f"Epoch {epoch:4d}/{num_epochs} | " + f"Total: {total_loss.item():.6f} | " + f"Theta: {l_theta.item():.6f} | " + f"Omega: {l_omega.item():.6f} | " + f"Torque: {l_torque.item():.6f}") + + torch.save(controller.state_dict(), "controller_cpu_clamped_inverse_time_punish.pth") + print("Model saved as 'controller_cpu_clamped_inverse_time_punish.pth'.") diff --git a/validator.py b/validator.py new file mode 100644 index 0000000..6958f31 --- /dev/null +++ b/validator.py @@ -0,0 +1,116 @@ +import torch +import torch.nn as nn +import numpy as np +from scipy.integrate import solve_ivp +import matplotlib.pyplot as plt + +# ---------------------------------------------------------------- +# 1) 3D Controller: [theta, omega, alpha] -> torque +# ---------------------------------------------------------------- +class PendulumController3D(nn.Module): + def __init__(self): + super(PendulumController3D, self).__init__() + self.net = nn.Sequential( + nn.Linear(3, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + + def forward(self, x_3d): + return self.net(x_3d) + +# Load the trained 3D model +controller = PendulumController3D() +controller.load_state_dict(torch.load("controller_cpu_clamped_inverse_time_punish.pth")) +# controller.load_state_dict(torch.load("controller_cpu_clamped.pth")) +controller.eval() +print("3D Controller loaded.") + +# ---------------------------------------------------------------- +# 2) ODE: State = [theta, omega, alpha]. +# ---------------------------------------------------------------- +m = 10.0 +g = 9.81 +R = 1.0 + +def pendulum_ode_3d(t, state): + theta, omega, alpha = state + + # Evaluate NN -> torque + inp = torch.tensor([[theta, omega, alpha]], dtype=torch.float32) + with torch.no_grad(): + torque = controller(inp).item() + # Clamp torque to ±250 for consistency with training + torque = np.clip(torque, -250, 250) + + alpha_des = (g/R)*np.sin(theta) + torque/(m*(R**2)) + + dtheta = omega + domega = alpha + dalpha = alpha_des - alpha + return [dtheta, domega, dalpha] + +# ---------------------------------------------------------------- +# 3) Validate for multiple initial conditions +# ---------------------------------------------------------------- +initial_conditions_3d = [ + (0.1, 0.0, 0.0), + (0.5, 0.0, 0.0), + (1.0, 0.0, 0.0), + (1.57, 0.5, 0.0), + (0.0, -6.28, 0.0), + (6.28, 6.28, 0.0), +] + +t_span = (0, 20) +t_eval = np.linspace(0, 20, 2000) + +for idx, (theta0, omega0, alpha0) in enumerate(initial_conditions_3d): + sol = solve_ivp( + pendulum_ode_3d, + t_span, + [theta0, omega0, alpha0], + t_eval=t_eval, + method='RK45' + ) + + t = sol.t + theta = sol.y[0] + omega = sol.y[1] + alpha_arr = sol.y[2] + + # Recompute torque over time + torques = [] + alpha_des_vals = [] + for (th, om, al) in zip(theta, omega, alpha_arr): + with torch.no_grad(): + torque_val = controller(torch.tensor([[th, om, al]], dtype=torch.float32)).item() + torque_val = np.clip(torque_val, -250, 250) + torques.append(torque_val) + alpha_des_vals.append( (g/R)*np.sin(th) + torque_val/(m*(R**2)) ) + torques = np.array(torques) + + # Plot + fig, ax1 = plt.subplots(figsize=(10,6)) + ax1.plot(t, theta, label="theta", color="blue") + ax1.plot(t, omega, label="omega", color="green") + ax1.plot(t, alpha_arr, label="alpha", color="red") + # optional: ax1.plot(t, alpha_des_vals, label="alpha_des", color="red", linestyle="--") + + ax1.set_xlabel("time [s]") + ax1.set_ylabel("theta, omega, alpha") + ax1.grid(True) + ax1.legend(loc="upper left") + + ax2 = ax1.twinx() + ax2.plot(t, torques, label="torque", color="purple", linestyle="--") + ax2.set_ylabel("Torque [Nm]") + ax2.legend(loc="upper right") + + plt.title(f"IC (theta={theta0}, omega={omega0}, alpha={alpha0})") + plt.tight_layout() + plt.savefig(f"{idx+1}_validation.png") + plt.close() + print(f"Saved {idx+1}_validation.png")