Commit 4862140c authored by Jan Humplik's avatar Jan Humplik
Browse files

Use standardized advantages in trpo.

parent df82a15f
...@@ -207,7 +207,7 @@ def learn(env, policy_func, *, ...@@ -207,7 +207,7 @@ def learn(env, policy_func, *,
if hasattr(pi, "ret_rms"): pi.ret_rms.update(tdlamret) if hasattr(pi, "ret_rms"): pi.ret_rms.update(tdlamret)
if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy
args = seg["ob"], seg["ac"], seg["adv"] args = seg["ob"], seg["ac"], atarg
fvpargs = [arr[::5] for arr in args] fvpargs = [arr[::5] for arr in args]
def fisher_vector_product(p): def fisher_vector_product(p):
return allmean(compute_fvp(p, *fvpargs)) + cg_damping * p return allmean(compute_fvp(p, *fvpargs)) + cg_damping * p
...@@ -288,4 +288,4 @@ def learn(env, policy_func, *, ...@@ -288,4 +288,4 @@ def learn(env, policy_func, *,
logger.dump_tabular() logger.dump_tabular()
def flatten_lists(listoflists): def flatten_lists(listoflists):
return [el for list_ in listoflists for el in list_] return [el for list_ in listoflists for el in list_]
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment