Commit 60855847 authored by Jayant Khatkar's avatar Jayant Khatkar

added simulation

parent 5f2d10ab
......@@ -89,7 +89,7 @@ class Tree:
reward_func,
avail_actions_func,
state_store_func,
#sim_selection_func,
sim_selection_func,
comm_n,
robot_id,
c_p=1):
......@@ -99,7 +99,7 @@ class Tree:
self.reward = reward_func
self.available_actions = avail_actions_func
self.state_store = state_store_func
#self.sim_selection_func = sim_selection_func
self.sim_selection_func = sim_selection_func
self.c_p = c_p
self.id = robot_id
self.comms = {} # Plan with no robots initially
......@@ -238,48 +238,47 @@ class Tree:
self._expansion(start_node)
### SIMULATION
# avg_reward = 0
# best_reward = float("-Inf")
# best_rollout = None
# for i in range(nsims):
# temp_state = self.graph.node[start_node]["state"]
#
# d = 0 # depth
# while d<depth: # also breaks at no available options
# d += 1
#
# # Get the available actions
# options = self.available_actions(
# self.data,
# temp_state,
# self.id
# )
#
# # If no actions possible, simulation complete
# if len(options)==0:
# break
#
# # "randomly" choose 1 - function provided by user
# state[self.id] = temp_state
# sim_action = self.sim_selection_func(self.data, options, state)
#
# # add that to the actions of the current robot
# temp_state = self.state_store(self.data, temp_state, sim_action, self.id)
#
# # calculate the reward at the end of simulation
# rew = self.reward(self.data, temp_state) \
#
# # if best reward so far, store the rollout in the new node
# if rew > best_reward:
# best_reward = rew
# best_rollout = temp_state
temp_state = self._get_system_state(start_node)
avg_reward = self.reward(self.data, temp_state) \
- self.reward(self.data, self._null_state(temp_state))
self.graph.node[start_node]["mu"] = avg_reward
avg_reward = 0
best_reward = float("-Inf")
best_rollout = None
for i in range(nsims):
temp_state = self.graph.node[start_node]["state"]
d = 0 # depth
while d<depth: # also breaks at no available options
d += 1
# Get the available actions
options = self.available_actions(
self.data,
temp_state,
self.id
)
# If no actions possible, simulation complete
if len(options)==0:
break
# "randomly" choose 1 - function provided by user
#state[self.id] = temp_state
sim_action = self.sim_selection_func(self.data, options, temp_state)
# add that to the actions of the current robot
temp_state = self.state_store(self.data, temp_state, sim_action, self.id)
# calculate the reward at the end of simulation
state[self.id] = temp_state
rew = self.reward(self.data, state) \
- self.reward(self.data, self._null_state(state))
# if best reward so far, store the rollout in the new node
if rew > best_reward:
best_reward = rew
best_rollout = copy(temp_state)
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
# self.graph.node[start_node]["best_rollout"] = best_rollout
self.graph.node[start_node]["best_rollout"] = copy(best_rollout)
### BACKPROPOGATION
while start_node!=1: #while not root node
......@@ -287,7 +286,8 @@ class Tree:
start_node = self._parent(start_node)
self.graph.node[start_node]["mu"] = \
(gamma * self.graph.node[start_node]["mu"]*self.graph.node[start_node]["N"] + avg_reward)\
(gamma * self.graph.node[start_node]["mu"] * \
self.graph.node[start_node]["N"] + avg_reward) \
/(self.graph.node[start_node]["N"] + 1)
self.graph.node[start_node]["N"] = \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment