diff --git a/check_for_cuda.py b/check_for_cuda.py new file mode 100644 index 0000000..dbbc901 --- /dev/null +++ b/check_for_cuda.py @@ -0,0 +1,8 @@ +import torch + +if torch.cuda.is_available(): + print("CUDA is available") + print("Number of CUDA devices:", torch.cuda.device_count()) + print("Device name:", torch.cuda.get_device_name(0)) +else: + print("CUDA is not available") \ No newline at end of file diff --git a/clamped_no_time_penalty/1_validation.png b/clamped_no_time_penalty/1_validation.png new file mode 100644 index 0000000..5026a6a Binary files /dev/null and b/clamped_no_time_penalty/1_validation.png differ diff --git a/clamped_no_time_penalty/2_validation.png b/clamped_no_time_penalty/2_validation.png new file mode 100644 index 0000000..bf80b0b Binary files /dev/null and b/clamped_no_time_penalty/2_validation.png differ diff --git a/clamped_no_time_penalty/3_validation.png b/clamped_no_time_penalty/3_validation.png new file mode 100644 index 0000000..66c5950 Binary files /dev/null and b/clamped_no_time_penalty/3_validation.png differ diff --git a/clamped_no_time_penalty/4_validation.png b/clamped_no_time_penalty/4_validation.png new file mode 100644 index 0000000..8947843 Binary files /dev/null and b/clamped_no_time_penalty/4_validation.png differ diff --git a/clamped_no_time_penalty/5_validation.png b/clamped_no_time_penalty/5_validation.png new file mode 100644 index 0000000..3939a41 Binary files /dev/null and b/clamped_no_time_penalty/5_validation.png differ diff --git a/clamped_no_time_penalty/6_validation.png b/clamped_no_time_penalty/6_validation.png new file mode 100644 index 0000000..6a0b6c0 Binary files /dev/null and b/clamped_no_time_penalty/6_validation.png differ diff --git a/clamped_no_time_penalty/trainer.py b/clamped_no_time_penalty/trainer.py new file mode 100644 index 0000000..18247a9 --- /dev/null +++ b/clamped_no_time_penalty/trainer.py @@ -0,0 +1,161 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torchdiffeq import odeint +import numpy as np +import matplotlib.pyplot as plt + +# ---------------------------------------------------------------- +# 1) 3D Controller: [theta, omega, alpha] -> torque +# ---------------------------------------------------------------- +class PendulumController3D(nn.Module): + def __init__(self): + super().__init__() + self.net = nn.Sequential( + nn.Linear(3, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + + def forward(self, x_3d): + """ + x_4d: shape (batch_size, 4) => [theta, cos(theta), omega, alpha]. + Returns shape: (batch_size, 1) => torque. + """ + raw_torque = self.net(x_3d) + clamped_torque = torch.clamp(raw_torque, -250, 250) # Clamp torque within [-250, 250] + return clamped_torque + + +# ---------------------------------------------------------------- +# 2) Define ODE System Using `odeint` +# ---------------------------------------------------------------- +m = 10.0 +g = 9.81 +R = 1.0 + +class PendulumDynamics3D(nn.Module): + """ + Defines the ODE system for [theta, omega, alpha] with torque tracking. + """ + + def __init__(self, controller): + super().__init__() + self.controller = controller + + def forward(self, t, state): + """ + state: (batch_size, 4) => [theta, omega, alpha, tau_prev] + Returns: (batch_size, 4) => [dtheta/dt, domega/dt, dalpha/dt, dtau/dt] + """ + + theta = state[:, 0] + omega = state[:, 1] + alpha = state[:, 2] + tau_prev = state[:, 3] + + # Create tensor input for controller: [theta, omega, alpha] + input_3d = torch.stack([theta, omega, alpha], dim=1) # shape (batch_size, 3) + + # Compute torque using the controller + tau = self.controller(input_3d).squeeze(-1) # shape (batch_size,) + + # Compute desired alpha + alpha_desired = (g / R) * torch.sin(theta) + tau / (m * R**2) + + # Define ODE system + dtheta = omega + domega = alpha + dalpha = alpha_desired - alpha # Relaxation term + dtau = tau - tau_prev # Keep track of torque evolution + + return torch.stack([dtheta, domega, dalpha, dtau], dim=1) # (batch_size, 4) + +# ---------------------------------------------------------------- +# 3) Loss Function +# ---------------------------------------------------------------- +def loss_fn(state_traj, t_span): + """ + Computes loss based on the trajectory with inverse time weighting (1/t) for theta and omega. + + Args: + state_traj: Tensor of shape (time_steps, batch_size, 4). + t_span: Tensor of time steps (time_steps,). + + Returns: + total_loss, (loss_theta, loss_omega) + """ + theta = state_traj[:, :, 0] # (time_steps, batch_size) + + loss_theta = 1e3 * torch.mean(theta**2) + + # Combine the losses + total_loss = loss_theta + + return total_loss, (loss_theta) + +# ---------------------------------------------------------------- +# 4) Training Setup +# ---------------------------------------------------------------- +device = torch.device("cpu") + +# Create the controller and pendulum dynamics model +controller = PendulumController3D().to(device) +pendulum_dynamics = PendulumDynamics3D(controller).to(device) + +# Define optimizer +optimizer = optim.Adam(controller.parameters(), lr=1e-1) + +# Initial conditions: [theta, omega, alpha, tau_prev] +initial_conditions = [ + [0.1, 0.0, 0.0, 0.0], # Small perturbation + [-0.5, 0.0, 0.0, 0.0], + [6.28, 6.28, 0.0, 0.0], + [1.57, 0.5, 0.0, 0.0], + [0.0, -6.28, 0.0, 0.0], + [1.57, -6.28, 0.0, 0.0], +] + +# Convert to torch tensor (batch_size, 4) +state_0 = torch.tensor(initial_conditions, dtype=torch.float32, device=device) + +# Time grid +t_span = torch.linspace(0, 10, 200, device=device) + +num_epochs = 100_000 +print_every = 25 + +# ---------------------------------------------------------------- +# 5) Training Loop +# ---------------------------------------------------------------- +for epoch in range(num_epochs): + optimizer.zero_grad() + + # Integrate the ODE + state_traj = odeint(pendulum_dynamics, state_0, t_span, method='rk4') + # state_traj shape: (time_steps, batch_size, 4) + + # Compute loss + total_loss, (l_theta) = loss_fn(state_traj, t_span) + + # Check for NaN values + if torch.isnan(total_loss): + print(f"NaN detected at epoch {epoch}. Skipping step.") + optimizer.zero_grad() + continue # Skip this iteration + + # Backprop + total_loss.backward() + optimizer.step() + + + # Print progress + if epoch % print_every == 0: + print(f"Epoch {epoch:4d}/{num_epochs} | " + f"Total: {total_loss.item():.6f} | " + f"Theta: {l_theta.item():.6f}") + + torch.save(controller.state_dict(), "controller_cpu_clamped_no_time_penalty.pth") + print("Model saved as 'controller_cpu_clamped_no_time_penalty.pth'.") diff --git a/clamped_no_time_penalty/validator.py b/clamped_no_time_penalty/validator.py new file mode 100644 index 0000000..9b618d6 --- /dev/null +++ b/clamped_no_time_penalty/validator.py @@ -0,0 +1,118 @@ +import torch +import torch.nn as nn +import numpy as np +from scipy.integrate import solve_ivp +import matplotlib.pyplot as plt + + +controller_file_name = "controller_cpu_clamped_no_time_penalty.pth" + +# ---------------------------------------------------------------- +# 1) 3D Controller: [theta, omega, alpha] -> torque +# ---------------------------------------------------------------- +class PendulumController3D(nn.Module): + def __init__(self): + super(PendulumController3D, self).__init__() + self.net = nn.Sequential( + nn.Linear(3, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1) + ) + + def forward(self, x_3d): + return self.net(x_3d) + +# Load the trained 3D model +controller = PendulumController3D() +controller.load_state_dict(torch.load(controller_file_name)) +controller.eval() +print(f"{controller_file_name} loaded.") + +# ---------------------------------------------------------------- +# 2) ODE: State = [theta, omega, alpha]. +# ---------------------------------------------------------------- +m = 10.0 +g = 9.81 +R = 1.0 + +def pendulum_ode_3d(t, state): + theta, omega, alpha = state + + # Evaluate NN -> torque + inp = torch.tensor([[theta, omega, alpha]], dtype=torch.float32) + with torch.no_grad(): + torque = controller(inp).item() + # Clamp torque to ±250 for consistency with training + torque = np.clip(torque, -250, 250) + + alpha_des = (g/R)*np.sin(theta) + torque/(m*(R**2)) + + dtheta = omega + domega = alpha + dalpha = alpha_des - alpha + return [dtheta, domega, dalpha] + +# ---------------------------------------------------------------- +# 3) Validate for multiple initial conditions +# ---------------------------------------------------------------- +initial_conditions_3d = [ + (0.1, 0.0, 0.0), + (0.5, 0.0, 0.0), + (1.0, 0.0, 0.0), + (1.57, 0.5, 0.0), + (0.0, -6.28, 0.0), + (6.28, 6.28, 0.0), +] + +t_span = (0, 20) +t_eval = np.linspace(0, 20, 2000) + +for idx, (theta0, omega0, alpha0) in enumerate(initial_conditions_3d): + sol = solve_ivp( + pendulum_ode_3d, + t_span, + [theta0, omega0, alpha0], + t_eval=t_eval, + method='RK45' + ) + + t = sol.t + theta = sol.y[0] + omega = sol.y[1] + alpha_arr = sol.y[2] + + # Recompute torque over time + torques = [] + alpha_des_vals = [] + for (th, om, al) in zip(theta, omega, alpha_arr): + with torch.no_grad(): + torque_val = controller(torch.tensor([[th, om, al]], dtype=torch.float32)).item() + torque_val = np.clip(torque_val, -250, 250) + torques.append(torque_val) + alpha_des_vals.append( (g/R)*np.sin(th) + torque_val/(m*(R**2)) ) + torques = np.array(torques) + + # Plot + fig, ax1 = plt.subplots(figsize=(10,6)) + ax1.plot(t, theta, label="theta", color="blue") + ax1.plot(t, omega, label="omega", color="green") + ax1.plot(t, alpha_arr, label="alpha", color="red") + # optional: ax1.plot(t, alpha_des_vals, label="alpha_des", color="red", linestyle="--") + + ax1.set_xlabel("time [s]") + ax1.set_ylabel("theta, omega, alpha") + ax1.grid(True) + ax1.legend(loc="upper left") + + ax2 = ax1.twinx() + ax2.plot(t, torques, label="torque", color="purple", linestyle="--") + ax2.set_ylabel("Torque [Nm]") + ax2.legend(loc="upper right") + + plt.title(f"IC (theta={theta0}, omega={omega0}, alpha={alpha0})") + plt.tight_layout() + plt.savefig(f"{idx+1}_validation.png") + plt.close() + print(f"Saved {idx+1}_validation.png") diff --git a/trainer.py b/trainer.py index dc40681..e1089b7 100644 --- a/trainer.py +++ b/trainer.py @@ -3,173 +3,134 @@ import torch.nn as nn import torch.optim as optim from torchdiffeq import odeint import numpy as np -import matplotlib.pyplot as plt +import random -# ---------------------------------------------------------------- -# 1) 3D Controller: [theta, omega, alpha] -> torque -# ---------------------------------------------------------------- -class PendulumController3D(nn.Module): +# Generate a random seed +random_seed = random.randint(0, 10000) + +# Set the seeds for reproducibility +torch.manual_seed(random_seed) +np.random.seed(random_seed) + +# Print the chosen random seed +print(f"Random seed for torch and numpy: {random_seed}") + +class PendulumController(nn.Module): def __init__(self): super().__init__() self.net = nn.Sequential( - nn.Linear(3, 64), + nn.Linear(4, 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, 1) ) - def forward(self, x_3d): - """ - x_4d: shape (batch_size, 4) => [theta, cos(theta), omega, alpha]. - Returns shape: (batch_size, 1) => torque. - """ - raw_torque = self.net(x_3d) - clamped_torque = torch.clamp(raw_torque, -250, 250) # Clamp torque within [-250, 250] - return clamped_torque + def forward(self, x): + raw_torque = self.net(x) + return torch.clamp(raw_torque, -250, 250) # Clamp torque - -# ---------------------------------------------------------------- -# 2) Define ODE System Using `odeint` -# ---------------------------------------------------------------- +# Constants m = 10.0 g = 9.81 R = 1.0 -class PendulumDynamics3D(nn.Module): - """ - Defines the ODE system for [theta, omega, alpha] with torque tracking. - """ - +class PendulumDynamics(nn.Module): def __init__(self, controller): super().__init__() self.controller = controller def forward(self, t, state): - """ - state: (batch_size, 4) => [theta, omega, alpha, tau_prev] - Returns: (batch_size, 4) => [dtheta/dt, domega/dt, dalpha/dt, dtau/dt] - """ - theta = state[:, 0] omega = state[:, 1] alpha = state[:, 2] - tau_prev = state[:, 3] + desired_theta = state[:, 3] # Extract desired theta + # Pass desired_theta as input to the controller + input = torch.stack([theta, omega, alpha, desired_theta], dim=1) + tau = self.controller(input).squeeze(-1) - # Create tensor input for controller: [theta, omega, alpha] - input_3d = torch.stack([theta, omega, alpha], dim=1) # shape (batch_size, 3) - - # Compute torque using the controller - tau = self.controller(input_3d).squeeze(-1) # shape (batch_size,) - - # Compute desired alpha alpha_desired = (g / R) * torch.sin(theta) + tau / (m * R**2) - # Define ODE system dtheta = omega domega = alpha - dalpha = alpha_desired - alpha # Relaxation term - dtau = tau - tau_prev # Keep track of torque evolution + dalpha = alpha_desired - alpha - return torch.stack([dtheta, domega, dalpha, dtau], dim=1) # (batch_size, 4) + return torch.stack([dtheta, domega, dalpha, torch.zeros_like(desired_theta)], dim=1) -# ---------------------------------------------------------------- -# 3) Loss Function -# ---------------------------------------------------------------- -def loss_fn(state_traj, t_span): - """ - Computes loss based on the trajectory with inverse time weighting (1/t) for theta and omega. +def loss_fn(state_traj): + theta = state_traj[:, :, 0] # Extract theta + desired_theta = state_traj[:, :, 3] # Extract desired theta - Args: - state_traj: Tensor of shape (time_steps, batch_size, 4). - t_span: Tensor of time steps (time_steps,). + loss_theta = 1e3 * torch.mean((theta - desired_theta)**2) + return loss_theta - Returns: - total_loss, (loss_theta, loss_omega) - """ - theta = state_traj[:, :, 0] # (time_steps, batch_size) - omega = state_traj[:, :, 1] # (time_steps, batch_size) - torque = state_traj[:, :, 3] +# Device setup +device = torch.device("cpu") - # Inverse time weights w(t) = 1 / t - # Add a small epsilon to avoid division by zero - epsilon = 1e-6 - inverse_time_weights = 1.0 / (t_span + epsilon).unsqueeze(1) # Shape: (time_steps, 1) - linear_time_weights = t_span.unsqueeze(1) +# Initial conditions (theta0, omega0, alpha0, desired_theta) +state_0 = torch.tensor([ + # Theta perturbations + [1/6 * torch.pi, 0.0, 0.0, 0.0], + [-1/6 * torch.pi, 0.0, 0.0, 0.0], + [2/3 * torch.pi, 0.0, 0.0, 0.0], + [-2/3 * torch.pi, 0.0, 0.0, 0.0], - # Apply inverse time weighting for theta and omega - loss_theta = 1e-1 * torch.mean(inverse_time_weights * theta**2) # Weighted theta loss - loss_omega = 1e-2 * torch.mean(inverse_time_weights * omega**2) # Weighted omega loss - loss_torque = 1e-2 * torch.mean(linear_time_weights * torque**2) + # Omega perturbations + [0.0, 1/3 * torch.pi, 0.0, 0.0], + [0.0, -1/3 * torch.pi, 0.0, 0.0], + [0.0, 2 * torch.pi, 0.0, 0.0], + [0.0, -2 * torch.pi, 0.0, 0.0], - # Combine the losses - total_loss = loss_theta #+ loss_torque + # Return to non-zero theta + [0.0, 0.0, 0.0, 2*torch.pi], + [0.0, 0.0, 0.0, -2*torch.pi], + [0.0, 0.0, 0.0, 1/2 * torch.pi], + [0.0, 0.0, 0.0, -1/2 *torch.pi], + [0.0, 0.0, 0.0, 1/3 * torch.pi], + [0.0, 0.0, 0.0, -1/3 *torch.pi], - return total_loss, (loss_theta, loss_omega, loss_torque) + # Mix cases + [1/4 * torch.pi, 1 * torch.pi, 0.0, 0.0], + [-1/4 * torch.pi, -1 * torch.pi, 0.0, 0.0], + [1/2 * torch.pi, -1 * torch.pi, 0.0, 1/3 * torch.pi], + [-1/2 * torch.pi, 1 * torch.pi, 0.0, -1/3 *torch.pi], + [1/4 * torch.pi, 1 * torch.pi, 0.0, 2 * torch.pi], + [-1/4 * torch.pi, -1 * torch.pi, 0.0, 2 * torch.pi], + [1/2 * torch.pi, -1 * torch.pi, 0.0, 4 * torch.pi], + [-1/2 * torch.pi, 1 * torch.pi, 0.0, -4 *torch.pi], -# ---------------------------------------------------------------- -# 4) Training Setup -# ---------------------------------------------------------------- -device = torch.device("cpu" if torch.cuda.is_available() else "cpu") - -# Create the controller and pendulum dynamics model -controller = PendulumController3D().to(device) -pendulum_dynamics = PendulumDynamics3D(controller).to(device) - -# Define optimizer -optimizer = optim.Adam(controller.parameters(), lr=1e-1) - -# Initial conditions: [theta, omega, alpha, tau_prev] -initial_conditions = [ - [0.1, 0.0, 0.0, 0.0], # Small perturbation - [-0.5, 0.0, 0.0, 0.0], - [6.28, 6.28, 0.0, 0.0], - [1.57, 0.5, 0.0, 0.0], - [0.0, -6.28, 0.0, 0.0], - [1.57, -6.28, 0.0, 0.0], -] - -# Convert to torch tensor (batch_size, 4) -state_0 = torch.tensor(initial_conditions, dtype=torch.float32, device=device) +], dtype=torch.float32, device=device) # Time grid t_span = torch.linspace(0, 10, 1000, device=device) -num_epochs = 100_000 +# Initialize controller and dynamics +controller = PendulumController().to(device) +pendulum_dynamics = PendulumDynamics(controller).to(device) + +# Optimizer +optimizer = optim.Adam(controller.parameters(), lr=1e-1, weight_decay=0) + +# Training parameters +num_epochs = 10_000 print_every = 25 -# ---------------------------------------------------------------- -# 5) Training Loop -# ---------------------------------------------------------------- for epoch in range(num_epochs): optimizer.zero_grad() + + state_traj = odeint(pendulum_dynamics, state_0, t_span, method='rk4') + loss = loss_fn(state_traj) - # Integrate the ODE - state_traj = odeint(pendulum_dynamics, state_0, t_span, method='rk4') - # state_traj shape: (time_steps, batch_size, 4) - - # Compute loss - total_loss, (l_theta, l_omega, l_torque) = loss_fn(state_traj, t_span) - - # Check for NaN values - if torch.isnan(total_loss): + if torch.isnan(loss): print(f"NaN detected at epoch {epoch}. Skipping step.") optimizer.zero_grad() - continue # Skip this iteration + continue - # Backprop - total_loss.backward() + loss.backward() optimizer.step() - - # Print progress if epoch % print_every == 0: - print(f"Epoch {epoch:4d}/{num_epochs} | " - f"Total: {total_loss.item():.6f} | " - f"Theta: {l_theta.item():.6f} | " - f"Omega: {l_omega.item():.6f} | " - f"Torque: {l_torque.item():.6f}") - - torch.save(controller.state_dict(), "controller_cpu_clamped_inverse_time_punish.pth") - print("Model saved as 'controller_cpu_clamped_inverse_time_punish.pth'.") + print(f"Epoch {epoch}/{num_epochs} | Loss: {loss.item():.6f}") + torch.save(controller.state_dict(), "controller_with_desired_theta.pth") + print("Model saved as 'controller_with_desired_theta.pth'.") diff --git a/validator.py b/validator.py index 6958f31..d67dc7b 100644 --- a/validator.py +++ b/validator.py @@ -1,103 +1,174 @@ import torch import torch.nn as nn import numpy as np -from scipy.integrate import solve_ivp import matplotlib.pyplot as plt +import pandas as pd -# ---------------------------------------------------------------- -# 1) 3D Controller: [theta, omega, alpha] -> torque -# ---------------------------------------------------------------- -class PendulumController3D(nn.Module): +# Load Controller +controller_file_name = "controller_with_desired_theta.pth" + +class PendulumController(nn.Module): def __init__(self): - super(PendulumController3D, self).__init__() + super().__init__() self.net = nn.Sequential( - nn.Linear(3, 64), + nn.Linear(4, 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, 1) ) - def forward(self, x_3d): - return self.net(x_3d) + def forward(self, x): + return self.net(x) -# Load the trained 3D model -controller = PendulumController3D() -controller.load_state_dict(torch.load("controller_cpu_clamped_inverse_time_punish.pth")) -# controller.load_state_dict(torch.load("controller_cpu_clamped.pth")) +controller = PendulumController() +controller.load_state_dict(torch.load(controller_file_name)) controller.eval() -print("3D Controller loaded.") +print(f"{controller_file_name} loaded.") -# ---------------------------------------------------------------- -# 2) ODE: State = [theta, omega, alpha]. -# ---------------------------------------------------------------- +# Constants m = 10.0 g = 9.81 R = 1.0 -def pendulum_ode_3d(t, state): +# Time step settings +dt = 0.01 # Fixed time step +T = 20.0 # Total simulation time +num_steps = int(T / dt) +t_eval = np.linspace(0, T, num_steps) + +# Define ODE System (RK4) - Also Returns Torque +def pendulum_ode_step(state, dt, desired_theta): theta, omega, alpha = state - # Evaluate NN -> torque - inp = torch.tensor([[theta, omega, alpha]], dtype=torch.float32) - with torch.no_grad(): - torque = controller(inp).item() - # Clamp torque to ±250 for consistency with training - torque = np.clip(torque, -250, 250) + def compute_torque(th, om, al): + # Evaluate NN -> torque + inp = torch.tensor([[th, om, al, desired_theta]], dtype=torch.float32) + with torch.no_grad(): + torque = controller(inp) + torque = torch.clamp(torque, -250, 250) + return float(torque) - alpha_des = (g/R)*np.sin(theta) + torque/(m*(R**2)) + def derivatives(state, torque): + th, om, al = state + a = (g / R) * np.sin(th) + torque / (m * R**2) + return np.array([om, a, 0]) # dtheta, domega, dalpha - dtheta = omega - domega = alpha - dalpha = alpha_des - alpha - return [dtheta, domega, dalpha] + # Compute k1 + torque1 = compute_torque(theta, omega, alpha) + k1 = dt * derivatives(state, torque1) -# ---------------------------------------------------------------- -# 3) Validate for multiple initial conditions -# ---------------------------------------------------------------- -initial_conditions_3d = [ - (0.1, 0.0, 0.0), - (0.5, 0.0, 0.0), - (1.0, 0.0, 0.0), - (1.57, 0.5, 0.0), - (0.0, -6.28, 0.0), - (6.28, 6.28, 0.0), + # Compute k2 + state_k2 = state + 0.5 * k1 + torque2 = compute_torque(state_k2[0], state_k2[1], state_k2[2]) + k2 = dt * derivatives(state_k2, torque2) + + # Compute k3 + state_k3 = state + 0.5 * k2 + torque3 = compute_torque(state_k3[0], state_k3[1], state_k3[2]) + k3 = dt * derivatives(state_k3, torque3) + + # Compute k4 + state_k4 = state + k3 + torque4 = compute_torque(state_k4[0], state_k4[1], state_k4[2]) + k4 = dt * derivatives(state_k4, torque4) + + # Update state using RK4 formula + new_state = state + (k1 + 2*k2 + 2*k3 + k4) / 6.0 + + # Compute weighted final torque + final_torque = (torque1 + 2*torque2 + 2*torque3 + torque4) / 6.0 + + return new_state, final_torque + + + + +# Run Simulations for Different Initial Conditions +# [theta0, omega0, alpha0, desired_theta] +in_sample_cases = [ + # Theta perturbations + (1/6 * np.pi, 0.0, 0.0, 0.0), + (-1/6 * np.pi, 0.0, 0.0, 0.0), + (2/3 * np.pi, 0.0, 0.0, 0.0), + (-2/3 * np.pi, 0.0, 0.0, 0.0), + + # Omega perturbations + (0.0, 1/3 * np.pi, 0.0, 0.0), + (0.0, -1/3 * np.pi, 0.0, 0.0), + (0.0, 2 * np.pi, 0.0, 0.0), + (0.0, -2 * np.pi, 0.0, 0.0), + + # Return to non-zero theta + (0.0, 0.0, 0.0, 2*np.pi), + (0.0, 0.0, 0.0, -2*np.pi), + (0.0, 0.0, 0.0, 1/2 * np.pi), + (0.0, 0.0, 0.0, -1/2 *np.pi), + (0.0, 0.0, 0.0, 1/3 * np.pi), + (0.0, 0.0, 0.0, -1/3 *np.pi), + + # Mix cases + (1/4 * np.pi, 1 * np.pi, 0.0, 0.0), + (-1/4 * np.pi, -1 * np.pi, 0.0, 0.0), + (1/2 * np.pi, -1 * np.pi, 0.0, 1/3 * np.pi), + (-1/2 * np.pi, 1 * np.pi, 0.0, -1/3 *np.pi), + (1/4 * np.pi, 1 * np.pi, 0.0, 2 * np.pi), + (-1/4 * np.pi, -1 * np.pi, 0.0, 2 * np.pi), + (1/2 * np.pi, -1 * np.pi, 0.0, 4 * np.pi), + (-1/2 * np.pi, 1 * np.pi, 0.0, -4 *np.pi), ] -t_span = (0, 20) -t_eval = np.linspace(0, 20, 2000) +# Validation in-sample cases +print("Performing in-sample validation") -for idx, (theta0, omega0, alpha0) in enumerate(initial_conditions_3d): - sol = solve_ivp( - pendulum_ode_3d, - t_span, - [theta0, omega0, alpha0], - t_eval=t_eval, - method='RK45' - ) +losses = [] +final_thetas = [] - t = sol.t - theta = sol.y[0] - omega = sol.y[1] - alpha_arr = sol.y[2] +for idx, (theta0, omega0, alpha0, desired_theta) in enumerate(in_sample_cases): + state = np.array([theta0, omega0, alpha0]) - # Recompute torque over time - torques = [] - alpha_des_vals = [] - for (th, om, al) in zip(theta, omega, alpha_arr): - with torch.no_grad(): - torque_val = controller(torch.tensor([[th, om, al]], dtype=torch.float32)).item() - torque_val = np.clip(torque_val, -250, 250) - torques.append(torque_val) - alpha_des_vals.append( (g/R)*np.sin(th) + torque_val/(m*(R**2)) ) - torques = np.array(torques) + theta_vals, omega_vals, alpha_vals, torque_vals = [], [], [], [] - # Plot - fig, ax1 = plt.subplots(figsize=(10,6)) - ax1.plot(t, theta, label="theta", color="blue") - ax1.plot(t, omega, label="omega", color="green") - ax1.plot(t, alpha_arr, label="alpha", color="red") - # optional: ax1.plot(t, alpha_des_vals, label="alpha_des", color="red", linestyle="--") + for _ in range(num_steps): + # Save values + theta_vals.append(state[0]) + omega_vals.append(state[1]) + alpha_vals.append(state[2]) + + # Compute ODE step with real state + state, torque = pendulum_ode_step(state, dt, desired_theta) + + # Store torque + torque_vals.append(torque) + + # Convert lists to arrays + theta_vals = np.array(theta_vals) + omega_vals = np.array(omega_vals) + alpha_vals = np.array(alpha_vals) + torque_vals = np.array(torque_vals) + + # Get the final theta of the system at t=t_final + final_theta = theta_vals[-1] + final_thetas.append(final_theta) + + # Calculate this specific condition's loss + loss = 1e3 * np.mean((theta_vals - desired_theta)**2) + losses.append(loss) + + # Plot Results + fig, ax1 = plt.subplots(figsize=(10, 6)) + ax1.plot(t_eval, theta_vals, label="theta", color="blue") + ax1.plot(t_eval, omega_vals, label="omega", color="green") + ax1.plot(t_eval, alpha_vals, label="alpha", color="red") + ax1.axhline(desired_theta, label="Desired Theta", color="black") + + # Draw horizontal lines at theta = 2n*pi (as many as fit within range) + y_min = min(np.min(theta_vals), np.min(omega_vals), np.min(alpha_vals)) + y_max = max(np.max(theta_vals), np.max(omega_vals), np.max(alpha_vals)) + + n_min = int(np.ceil(y_min / (2 * np.pi))) + n_max = int(np.floor(y_max / (2 * np.pi))) + theta_lines = [2 * n * np.pi for n in range(n_min, n_max + 1)] ax1.set_xlabel("time [s]") ax1.set_ylabel("theta, omega, alpha") @@ -105,12 +176,133 @@ for idx, (theta0, omega0, alpha0) in enumerate(initial_conditions_3d): ax1.legend(loc="upper left") ax2 = ax1.twinx() - ax2.plot(t, torques, label="torque", color="purple", linestyle="--") + ax2.plot(t_eval, torque_vals, label="torque", color="purple", linestyle="--") ax2.set_ylabel("Torque [Nm]") ax2.legend(loc="upper right") plt.title(f"IC (theta={theta0}, omega={omega0}, alpha={alpha0})") plt.tight_layout() - plt.savefig(f"{idx+1}_validation.png") + + filename = f"{idx+1}_theta0_{theta0:.3f}_omega0_{omega0:.3f}_alpha0_{alpha0:.3f}_desiredtheta_{desired_theta:.3f}_finaltheta_{final_theta:.3f}.png" + plt.savefig(f"validation/in-sample/{filename}") plt.close() - print(f"Saved {idx+1}_validation.png") + + print(f"Saved in-sample validation case {idx+1}") + +# Create a DataFrame for tabular representation +df_losses = pd.DataFrame(in_sample_cases, columns=["theta0", "omega0", "alpha0", "desired_theta"]) +df_losses["final_theta"] = final_theta +df_losses["loss"] = losses + +# Add run # column +df_losses.insert(0, "Run #", range(1, len(in_sample_cases) + 1)) + +# Print the table +print(df_losses.to_string(index=False)) + + +# Out-of-sample validation +print("\nPerforming out-of-sample validation") + +# Out of sample cases previously generated by numpy +out_sample_cases = [ + (-2.198958, -4.428501, 0.450833, 0.000000), + (1.714196, -0.769896, 0.202738, 0.000000), + (0.241195, -5.493715, 0.438996, 0.000000), + (0.030605, 4.901513, -0.479243, 0.000000), + (1.930445, -1.301926, -0.454050, 0.000000), + (-0.676063, 4.246865, 0.036303, 0.000000), + (0.734920, -5.925202, 0.047097, 0.000000), + (-3.074471, -3.535424, 0.315438, 0.000000), + (-0.094486, 6.111091, 0.150525, 0.000000), + (-1.647671, 5.720526, 0.334181, 0.000000), + (-2.611260, 5.087704, 0.045460, -3.610785), + (1.654137, 0.982081, -0.192725, 1.003872), + (-2.394899, 3.550547, -0.430938, 3.261897), + (0.474917, 0.555166, -0.285173, 1.866752), + (-0.640369, -4.678490, -0.340663, 3.150098), + (1.747517, -3.248204, -0.001520, 1.221787), + (2.505283, -2.875006, -0.065617, -3.690269), + (1.337244, 2.221707, 0.044979, -2.459730), + (1.531012, 2.230981, -0.291206, -1.924535), + (-1.065792, 4.320740, 0.075405, -1.550644), +] + + + +for idx, (theta0, omega0, alpha0, desired_theta) in enumerate(out_sample_cases): + state = np.array([theta0, omega0, alpha0]) + + theta_vals, omega_vals, alpha_vals, torque_vals = [], [], [], [] + + for _ in range(num_steps): + # Save values + theta_vals.append(state[0]) + omega_vals.append(state[1]) + alpha_vals.append(state[2]) + + # Compute ODE step with real state + state, torque = pendulum_ode_step(state, dt, desired_theta) + + # Store torque + torque_vals.append(torque) + + # Convert lists to arrays + theta_vals = np.array(theta_vals) + omega_vals = np.array(omega_vals) + alpha_vals = np.array(alpha_vals) + torque_vals = np.array(torque_vals) + + # Get the final theta of the system at t=t_final + final_theta = theta_vals[-1] + final_thetas.append(final_theta) + + # Calculate this specific condition's loss + loss = 1e3 * np.mean((theta_vals - desired_theta)**2) + losses.append(loss) + + # Plot Results + fig, ax1 = plt.subplots(figsize=(10, 6)) + ax1.plot(t_eval, theta_vals, label="theta", color="blue") + ax1.plot(t_eval, omega_vals, label="omega", color="green") + ax1.plot(t_eval, alpha_vals, label="alpha", color="red") + ax1.axhline(desired_theta, label="Desired Theta", color="black") + + # Draw horizontal lines at theta = 2n*pi (as many as fit within range) + y_min = min(np.min(theta_vals), np.min(omega_vals), np.min(alpha_vals)) + y_max = max(np.max(theta_vals), np.max(omega_vals), np.max(alpha_vals)) + + n_min = int(np.ceil(y_min / (2 * np.pi))) + n_max = int(np.floor(y_max / (2 * np.pi))) + theta_lines = [2 * n * np.pi for n in range(n_min, n_max + 1)] + + ax1.set_xlabel("time [s]") + ax1.set_ylabel("theta, omega, alpha") + ax1.grid(True) + ax1.legend(loc="upper left") + + ax2 = ax1.twinx() + ax2.plot(t_eval, torque_vals, label="torque", color="purple", linestyle="--") + ax2.set_ylabel("Torque [Nm]") + ax2.legend(loc="upper right") + + plt.title(f"IC (theta={theta0}, omega={omega0}, alpha={alpha0})") + plt.tight_layout() + + filename = f"{idx+1}_theta0_{theta0:.3f}_omega0_{omega0:.3f}_alpha0_{alpha0:.3f}_desiredtheta_{desired_theta:.3f}_finaltheta_{final_theta:.3f}.png" + plt.savefig(f"validation/out-of-sample/{filename}") + plt.close() + + print(f"Saved out-of-sample validation case {idx+1}") + + +# Create a DataFrame for tabular representation +df_losses = pd.DataFrame(out_sample_cases, columns=["theta0", "omega0", "alpha0", "desired_theta"]) +df_losses["final_theta"] = final_theta +df_losses["loss"] = losses + +# Add run # column +df_losses.insert(0, "Run #", range(1, len(out_sample_cases) + 1)) + +# Print the table +print(df_losses.to_string(index=False)) \ No newline at end of file