嗨,我正在尝试训练 DQN 来解决健身房的 Cartpole 问题。出于某种原因,损失看起来像这样(橙色线)。你们都可以看看我的代码并帮助解决这个问题吗?我已经对超参数进行了相当多的研究,所以我认为它们不是这里的问题。
class DQN(nn.Module):
def __init__(self, input_dim, output_dim):
super(DQN, self).__init__()
self.linear1 = nn.Linear(input_dim, 16)
self.linear2 = nn.Linear(16, 32)
self.linear3 = nn.Linear(32, 32)
self.linear4 = nn.Linear(32, output_dim)
def forward(self, x):
x = F.relu(self.linear1(x))
x = F.relu(self.linear2(x))
x = F.relu(self.linear3(x))
return self.linear4(x)
final_epsilon = 0.05
initial_epsilon = 1
epsilon_decay = 5000
global steps_done
steps_done = 0
def select_action(state):
global steps_done
sample = random.random()
eps_threshold = final_epsilon + (initial_epsilon - final_epsilon) * \
math.exp(-1. * steps_done / epsilon_decay) …
Run Code Online (Sandbox Code Playgroud)