Commit f13c1c7f authored by Jayant Khatkar's avatar Jayant Khatkar

simulation put on hold

parent 38fed5df
......@@ -233,47 +233,49 @@ class Tree:
self._expansion(start_node)
### SIMULATION
avg_reward = 0
best_reward = float("-Inf")
best_rollout = None
for i in range(nsims):
temp_state = self.graph.node[start_node]["state"]
d = 0 # depth
while d<depth: # also breaks at no available options
d += 1
# Get the available actions
options = self.available_actions(
self.data,
temp_state,
self.id
)
# If no actions possible, simulation complete
if len(options)==0:
break
# "randomly" choose 1 - function provided by user
state[self.id] = temp_state
sim_action = self.sim_selection_func(self.data, options, state)
# add that to the actions of the current robot
temp_state = self.state_store(self.data, temp_state, sim_action)
# calculate the reward at the end of simulation
rew = self.reward(self.data, temp_state) \
- self.reward(self.data, self._null_state(temp_state))
avg reward += rew/nsims
# if best reward so far, store the rollout in the new node
if rew > best_reward:
best_reward = rew
best_rollout = temp_state
# avg_reward = 0
# best_reward = float("-Inf")
# best_rollout = None
# for i in range(nsims):
# temp_state = self.graph.node[start_node]["state"]
#
# d = 0 # depth
# while d<depth: # also breaks at no available options
# d += 1
#
# # Get the available actions
# options = self.available_actions(
# self.data,
# temp_state,
# self.id
# )
#
# # If no actions possible, simulation complete
# if len(options)==0:
# break
#
# # "randomly" choose 1 - function provided by user
# state[self.id] = temp_state
# sim_action = self.sim_selection_func(self.data, options, state)
#
# # add that to the actions of the current robot
# temp_state = self.state_store(self.data, temp_state, sim_action)
#
# # calculate the reward at the end of simulation
# rew = self.reward(self.data, temp_state) \
# - self.reward(self.data, self._null_state(temp_state))
# avg reward += rew/nsims
#
# # if best reward so far, store the rollout in the new node
# if rew > best_reward:
# best_reward = rew
# best_rollout = temp_state
avg_reward = self.reward(self.data, temp_state) \
- self.reward(self.data, self._null_state(temp_state))
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
self.graph.node[start_node]["best_rollout"] = best_rollout
# self.graph.node[start_node]["best_rollout"] = best_rollout
### BACKPROPOGATION
while start_node!=1: #while not root node
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment