Commit 902ffcb7 authored by John Schulman's avatar John Schulman Committed by GitHub
Browse files

Merge pull request #120 from hamzamerzic/tensorflow_global_variable

Deprecated VARIABLES -> GLOBAL_VARIABLES.
parents 4e2a570e a7320b80
......@@ -18,7 +18,7 @@ class CnnPolicy(object):
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
x = ob / 255.0
if kind == 'small': # from A3C paper
x = tf.nn.relu(U.conv2d(x, 16, "l1", [8, 8], [4, 4], pad="VALID"))
......@@ -49,7 +49,7 @@ class CnnPolicy(object):
ac1, vpred1 = self._act(stochastic, ob[None])
return ac1[0], vpred1[0]
def get_variables(self):
return tf.get_collection(tf.GraphKeys.VARIABLES, self.scope)
return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.scope)
def get_trainable_variables(self):
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
def get_initial_state(self):
......
......@@ -18,7 +18,7 @@ class MlpPolicy(object):
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
with tf.variable_scope("obfilter"):
self.ob_rms = RunningMeanStd(shape=ob_space.shape)
......@@ -27,12 +27,12 @@ class MlpPolicy(object):
for i in range(num_hid_layers):
last_out = tf.nn.tanh(U.dense(last_out, hid_size, "vffc%i"%(i+1), weight_init=U.normc_initializer(1.0)))
self.vpred = U.dense(last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:,0]
last_out = obz
for i in range(num_hid_layers):
last_out = tf.nn.tanh(U.dense(last_out, hid_size, "polfc%i"%(i+1), weight_init=U.normc_initializer(1.0)))
if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
mean = U.dense(last_out, pdtype.param_shape()[0]//2, "polfinal", U.normc_initializer(0.01))
mean = U.dense(last_out, pdtype.param_shape()[0]//2, "polfinal", U.normc_initializer(0.01))
logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer())
pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
else:
......@@ -51,7 +51,7 @@ class MlpPolicy(object):
ac1, vpred1 = self._act(stochastic, ob[None])
return ac1[0], vpred1[0]
def get_variables(self):
return tf.get_collection(tf.GraphKeys.VARIABLES, self.scope)
return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.scope)
def get_trainable_variables(self):
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
def get_initial_state(self):
......
......@@ -18,7 +18,7 @@ class CnnPolicy(object):
sequence_length = None
ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))
obscaled = ob / 255.0
with tf.variable_scope("pol"):
......@@ -49,7 +49,7 @@ class CnnPolicy(object):
ac1, vpred1 = self._act(stochastic, ob[None])
return ac1[0], vpred1[0]
def get_variables(self):
return tf.get_collection(tf.GraphKeys.VARIABLES, self.scope)
return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.scope)
def get_trainable_variables(self):
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
def get_initial_state(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment