Commit 04cd0dcf authored by Szymon Sidor's avatar Szymon Sidor Committed by GitHub
Browse files

Merge pull request #52 from farbeiza/patch-1

Effectively apply weights from the replay buffer
parents 248aad1c d76cd129
......@@ -89,6 +89,7 @@ def learn(env,
gamma=1.0,
target_network_update_freq=500,
prioritized_replay=False,
prioritized_importance_sampling=False,
prioritized_replay_alpha=0.6,
prioritized_replay_beta0=0.4,
prioritized_replay_beta_iters=None,
......@@ -232,6 +233,9 @@ def learn(env,
else:
obses_t, actions, rewards, obses_tp1, dones = replay_buffer.sample(batch_size)
weights, batch_idxes = np.ones_like(rewards), None
if prioritized_importance_sampling:
td_errors = train(obses_t, actions, rewards, obses_tp1, dones, weights)
else:
td_errors = train(obses_t, actions, rewards, obses_tp1, dones, np.ones_like(rewards))
if prioritized_replay:
new_priorities = np.abs(td_errors) + prioritized_replay_eps
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment