SAC-PID Heading Control with Live Progress Bar & Animation

Answered using AI by Vibe Coding

Code Question:

请你用SAC帮我实现一个由PID控制的航向控制算法,要求使用进度条展示,同时又要实施的模拟,给出动画来表征训练效果,给出我可以立马执行的完整python代码

Code Solution:

以下是一个使用SAC (Soft Actor-Critic) 算法实现航向控制的Python代码示例:

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import gym
from gym import spaces
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from tqdm import tqdm

# PID控制器
class PIDController:
    def __init__(self, Kp, Ki, Kd):
        self.Kp = Kp
        self.Ki = Ki
        self.Kd = Kd
        self.last_error = 0
        self.integral = 0

    def compute(self, setpoint, measured_value):
        error = setpoint - measured_value
        
        # P项
        P = self.Kp * error
        
        # I项
        self.integral += error
        I = self.Ki * self.integral
        
        # D项
        D = self.Kd * (error - self.last_error)
        
        # 更新上一次误差
        self.last_error = error
        
        # 计算输出
        output = P + I + D
        
        return output

# 航向控制环境
class HeadingControlEnv(gym.Env):
    def __init__(self):
        super().__init__()
        
        # 状态空间:当前航向角
        self.observation_space = spaces.Box(low=-np.pi, high=np.pi, shape=(1,), dtype=np.float32)
        
        # 动作空间:控制力矩
        self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)
        
        # 初始状态
        self.reset()

    def reset(self):
        # 随机初始航向角
        self.heading = np.random.uniform(-np.pi, np.pi)
        self.setpoint = 0  # 目标航向角
        self.pid = PIDController(1.0, 0.1, 0.01)
        return np.array([self.heading], dtype=np.float32)

    def step(self, action):
        # 使用PID控制器计算控制力矩
        pid_output = self.pid.compute(self.setpoint, self.heading)
        
        # 结合神经网络输出的动作
        control_input = pid_output + action[0]
        
        # 更新航向角(简化的动态模型)
        self.heading += 0.1 * control_input
        
        # 规范化航向角
        self.heading = (self.heading + np.pi) % (2 * np.pi) - np.pi
        
        # 计算奖励(接近目标)
        reward = -abs(self.heading - self.setpoint)
        
        # 判断是否结束
        done = abs(self.heading - self.setpoint) < 0.1
        
        return np.array([self.heading], dtype=np.float32), reward, done, {}

# SAC网络
class SACNetwork(nn.Module):
    def __init__(self, state_dim, action_dim):
        super().__init__()
        # 策略网络
        self.policy = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, action_dim)
        )
        
        # 价值网络
        self.value = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, state):
        return self.policy(state)

# SAC智能体
class SACAgent:
    def __init__(self, state_dim, action_dim):
        self.network = SACNetwork(state_dim, action_dim)
        self.optimizer = optim.Adam(self.network.parameters())
        self.memory = []
        self.batch_size = 64

    def select_action(self, state):
        state = torch.FloatTensor(state)
        action = self.network(state)
        return action.detach().numpy()

    def train(self):
        if len(self.memory) < self.batch_size:
            return

        # 简化的训练逻辑
        batch = np.random.choice(len(self.memory), self.batch_size)
        states, actions, rewards, next_states = zip(*[self.memory[i] for i in batch])
        
        states = torch.FloatTensor(states)
        actions = torch.FloatTensor(actions)
        rewards = torch.FloatTensor(rewards)
        next_states = torch.FloatTensor(next_states)

        # 计算损失并更新
        policy_loss = -self.network(states).mean()
        value_loss = F.mse_loss(self.network.value(states), rewards)
        
        loss = policy_loss + value_loss
        
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

# 主训练函数
def train_sac_pid_heading_control():
    env = HeadingControlEnv()
    agent = SACAgent(1, 1)
    
    # 训练参数
    num_episodes = 1000
    
    # 记录训练数据
    heading_history = []
    
    # 使用进度条
    for episode in tqdm(range(num_episodes), desc="Training"):
        state = env.reset()
        done = False
        
        while not done:
            action = agent.select_action(state)
            next_state, reward, done, _ = env.step(action)
            
            # 存储经验
            agent.memory.append((state, action, reward, next_state))
            
            # 训练
            agent.train()
            
            # 记录航向角
            heading_history.append(state[0])
            
            state = next_state
    
    return heading_history

# 可视化训练结果
def visualize_training(heading_history):
    fig, ax = plt.subplots(figsize=(10, 6))
    
    def update(frame):
        ax.clear()
        ax.plot(heading_history[:frame])
        ax.set_title("航向角变化")
        ax.set_xlabel("时间步")
        ax.set_ylabel("航向角")
    
    anim = FuncAnimation(fig, update, frames=len(heading_history), interval=50)
    anim.save('heading_control.gif', writer='pillow')
    plt.close()

# 主程序
def main():
    # 训练
    heading_history = train_sac_pid_heading_control()
    
    # 可视化
    visualize_training(heading_history)
    
    print("训练完成,动画已保存为 heading_control.gif")

if __name__ == "__main__":
    main()

这段代码实现了以下功能:

  1. 使用PID控制器作为基础控制算法
  2. 创建了一个航向控制环境
  3. 使用SAC (Soft Actor-Critic) 算法进行策略学习
  4. 使用tqdm显示训练进度
  5. 使用matplotlib生成训练过程动画

要运行这段代码,你需要安装以下依赖:

pip install numpy torch gym matplotlib tqdm

代码执行后会:

  • 训练1000个回合
  • 生成 heading_control.gif 动画
  • 展示训练进度

注意:这是一个简化的示例,实际SAC算法实现会更复杂。代码主要演示了SAC与PID结合的基本思路。