请你用SAC帮我实现一个由PID控制的航向控制算法,要求使用进度条展示,同时又要实施的模拟,给出动画来表征训练效果,给出我可以立马执行的完整python代码
以下是一个使用SAC (Soft Actor-Critic) 算法实现航向控制的Python代码示例:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import gym
from gym import spaces
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from tqdm import tqdm
# PID控制器
class PIDController:
def __init__(self, Kp, Ki, Kd):
self.Kp = Kp
self.Ki = Ki
self.Kd = Kd
self.last_error = 0
self.integral = 0
def compute(self, setpoint, measured_value):
error = setpoint - measured_value
# P项
P = self.Kp * error
# I项
self.integral += error
I = self.Ki * self.integral
# D项
D = self.Kd * (error - self.last_error)
# 更新上一次误差
self.last_error = error
# 计算输出
output = P + I + D
return output
# 航向控制环境
class HeadingControlEnv(gym.Env):
def __init__(self):
super().__init__()
# 状态空间:当前航向角
self.observation_space = spaces.Box(low=-np.pi, high=np.pi, shape=(1,), dtype=np.float32)
# 动作空间:控制力矩
self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)
# 初始状态
self.reset()
def reset(self):
# 随机初始航向角
self.heading = np.random.uniform(-np.pi, np.pi)
self.setpoint = 0 # 目标航向角
self.pid = PIDController(1.0, 0.1, 0.01)
return np.array([self.heading], dtype=np.float32)
def step(self, action):
# 使用PID控制器计算控制力矩
pid_output = self.pid.compute(self.setpoint, self.heading)
# 结合神经网络输出的动作
control_input = pid_output + action[0]
# 更新航向角(简化的动态模型)
self.heading += 0.1 * control_input
# 规范化航向角
self.heading = (self.heading + np.pi) % (2 * np.pi) - np.pi
# 计算奖励(接近目标)
reward = -abs(self.heading - self.setpoint)
# 判断是否结束
done = abs(self.heading - self.setpoint) < 0.1
return np.array([self.heading], dtype=np.float32), reward, done, {}
# SAC网络
class SACNetwork(nn.Module):
def __init__(self, state_dim, action_dim):
super().__init__()
# 策略网络
self.policy = nn.Sequential(
nn.Linear(state_dim, 64),
nn.ReLU(),
nn.Linear(64, 64),
nn.ReLU(),
nn.Linear(64, action_dim)
)
# 价值网络
self.value = nn.Sequential(
nn.Linear(state_dim, 64),
nn.ReLU(),
nn.Linear(64, 64),
nn.ReLU(),
nn.Linear(64, 1)
)
def forward(self, state):
return self.policy(state)
# SAC智能体
class SACAgent:
def __init__(self, state_dim, action_dim):
self.network = SACNetwork(state_dim, action_dim)
self.optimizer = optim.Adam(self.network.parameters())
self.memory = []
self.batch_size = 64
def select_action(self, state):
state = torch.FloatTensor(state)
action = self.network(state)
return action.detach().numpy()
def train(self):
if len(self.memory) < self.batch_size:
return
# 简化的训练逻辑
batch = np.random.choice(len(self.memory), self.batch_size)
states, actions, rewards, next_states = zip(*[self.memory[i] for i in batch])
states = torch.FloatTensor(states)
actions = torch.FloatTensor(actions)
rewards = torch.FloatTensor(rewards)
next_states = torch.FloatTensor(next_states)
# 计算损失并更新
policy_loss = -self.network(states).mean()
value_loss = F.mse_loss(self.network.value(states), rewards)
loss = policy_loss + value_loss
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# 主训练函数
def train_sac_pid_heading_control():
env = HeadingControlEnv()
agent = SACAgent(1, 1)
# 训练参数
num_episodes = 1000
# 记录训练数据
heading_history = []
# 使用进度条
for episode in tqdm(range(num_episodes), desc="Training"):
state = env.reset()
done = False
while not done:
action = agent.select_action(state)
next_state, reward, done, _ = env.step(action)
# 存储经验
agent.memory.append((state, action, reward, next_state))
# 训练
agent.train()
# 记录航向角
heading_history.append(state[0])
state = next_state
return heading_history
# 可视化训练结果
def visualize_training(heading_history):
fig, ax = plt.subplots(figsize=(10, 6))
def update(frame):
ax.clear()
ax.plot(heading_history[:frame])
ax.set_title("航向角变化")
ax.set_xlabel("时间步")
ax.set_ylabel("航向角")
anim = FuncAnimation(fig, update, frames=len(heading_history), interval=50)
anim.save('heading_control.gif', writer='pillow')
plt.close()
# 主程序
def main():
# 训练
heading_history = train_sac_pid_heading_control()
# 可视化
visualize_training(heading_history)
print("训练完成,动画已保存为 heading_control.gif")
if __name__ == "__main__":
main()
这段代码实现了以下功能:
要运行这段代码,你需要安装以下依赖:
pip install numpy torch gym matplotlib tqdm
代码执行后会:
heading_control.gif 动画注意:这是一个简化的示例,实际SAC算法实现会更复杂。代码主要演示了SAC与PID结合的基本思路。