Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
13530934
baselines
Commits
f8663eaf
Commit
f8663eaf
authored
Sep 30, 2017
by
Elman Mansimov
Browse files
fixes acktr_cont issues
parent
699919f1
Changes
3
Show whitespace changes
Inline
Side-by-side
baselines/acktr/acktr_cont.py
View file @
f8663eaf
...
...
@@ -46,7 +46,7 @@ def rollout(env, policy, max_pathlength, animate=False, obfilter=None):
"action_dist"
:
np
.
array
(
ac_dists
),
"logp"
:
np
.
array
(
logps
)}
def
learn
(
env
,
policy
,
vf
,
gamma
,
lam
,
timesteps_per_batch
,
num_timesteps
,
animate
=
False
,
callback
=
None
,
optimizer
=
"adam"
,
desired_kl
=
0.002
):
animate
=
False
,
callback
=
None
,
desired_kl
=
0.002
):
obfilter
=
ZFilter
(
env
.
observation_space
.
shape
)
...
...
@@ -117,14 +117,16 @@ def learn(env, policy, vf, gamma, lam, timesteps_per_batch, num_timesteps,
# Policy update
do_update
(
ob_no
,
action_na
,
standardized_adv_n
)
min_stepsize
=
np
.
float32
(
1e-8
)
max_stepsize
=
np
.
float32
(
1e0
)
# Adjust stepsize
kl
=
policy
.
compute_kl
(
ob_no
,
oldac_dist
)
if
kl
>
desired_kl
*
2
:
logger
.
log
(
"kl too high"
)
U
.
eval
(
tf
.
assign
(
stepsize
,
stepsize
/
1.5
))
U
.
eval
(
tf
.
assign
(
stepsize
,
tf
.
maximum
(
min_stepsize
,
stepsize
/
1.5
))
)
elif
kl
<
desired_kl
/
2
:
logger
.
log
(
"kl too low"
)
U
.
eval
(
tf
.
assign
(
stepsize
,
stepsize
*
1.5
))
U
.
eval
(
tf
.
assign
(
stepsize
,
tf
.
minimum
(
max_stepsize
,
stepsize
*
1.5
))
)
else
:
logger
.
log
(
"kl just right!"
)
...
...
baselines/acktr/run_mujoco.py
View file @
f8663eaf
...
...
@@ -39,4 +39,4 @@ if __name__ == "__main__":
parser
.
add_argument
(
'--seed'
,
help
=
'RNG seed'
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
'--env'
,
help
=
'environment ID'
,
type
=
str
,
default
=
"Reacher-v1"
)
args
=
parser
.
parse_args
()
train
(
args
.
env
_id
,
num_timesteps
=
1e6
,
seed
=
args
.
seed
)
train
(
args
.
env
,
num_timesteps
=
1e6
,
seed
=
args
.
seed
)
baselines/acktr/value_functions.py
View file @
f8663eaf
...
...
@@ -13,7 +13,7 @@ class NeuralNetValueFunction(object):
wd_dict
=
{}
h1
=
tf
.
nn
.
elu
(
dense
(
X
,
64
,
"h1"
,
weight_init
=
U
.
normc_initializer
(
1.0
),
bias_init
=
0
,
weight_loss_dict
=
wd_dict
))
h2
=
tf
.
nn
.
elu
(
dense
(
h1
,
64
,
"h2"
,
weight_init
=
U
.
normc_initializer
(
1.0
),
bias_init
=
0
,
weight_loss_dict
=
wd_dict
))
vpred_n
=
dense
(
h2
,
1
,
"hfinal"
,
weight_init
=
U
.
normc_initializer
(
1.0
)
,
bias_init
=
0
,
weight_loss_dict
=
wd_dict
)[:,
0
]
vpred_n
=
dense
(
h2
,
1
,
"hfinal"
,
weight_init
=
None
,
bias_init
=
0
,
weight_loss_dict
=
wd_dict
)[:,
0
]
sample_vpred_n
=
vpred_n
+
tf
.
random_normal
(
tf
.
shape
(
vpred_n
))
wd_loss
=
tf
.
get_collection
(
"vf_losses"
,
None
)
loss
=
U
.
mean
(
tf
.
square
(
vpred_n
-
vtarg_n
))
+
tf
.
add_n
(
wd_loss
)
...
...
@@ -22,7 +22,7 @@ class NeuralNetValueFunction(object):
optim
=
kfac
.
KfacOptimizer
(
learning_rate
=
0.001
,
cold_lr
=
0.001
*
(
1
-
0.9
),
momentum
=
0.9
,
\
clip_kl
=
0.3
,
epsilon
=
0.1
,
stats_decay
=
0.95
,
\
async
=
1
,
kfac_update
=
2
,
cold_iter
=
50
,
\
weight_decay_dict
=
wd_dict
,
max_grad_norm
=
None
)
weight_decay_dict
=
wd_dict
,
max_grad_norm
=
1.0
)
vf_var_list
=
[]
for
var
in
tf
.
trainable_variables
():
if
"vf"
in
var
.
name
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment