Skip to content
Snippets Groups Projects
Commit 60855847 authored by Jayant Khatkar's avatar Jayant Khatkar
Browse files

added simulation

parent 5f2d10ab
No related merge requests found
...@@ -89,7 +89,7 @@ class Tree: ...@@ -89,7 +89,7 @@ class Tree:
reward_func, reward_func,
avail_actions_func, avail_actions_func,
state_store_func, state_store_func,
#sim_selection_func, sim_selection_func,
comm_n, comm_n,
robot_id, robot_id,
c_p=1): c_p=1):
...@@ -99,7 +99,7 @@ class Tree: ...@@ -99,7 +99,7 @@ class Tree:
self.reward = reward_func self.reward = reward_func
self.available_actions = avail_actions_func self.available_actions = avail_actions_func
self.state_store = state_store_func self.state_store = state_store_func
#self.sim_selection_func = sim_selection_func self.sim_selection_func = sim_selection_func
self.c_p = c_p self.c_p = c_p
self.id = robot_id self.id = robot_id
self.comms = {} # Plan with no robots initially self.comms = {} # Plan with no robots initially
...@@ -238,48 +238,47 @@ class Tree: ...@@ -238,48 +238,47 @@ class Tree:
self._expansion(start_node) self._expansion(start_node)
### SIMULATION ### SIMULATION
# avg_reward = 0 avg_reward = 0
# best_reward = float("-Inf") best_reward = float("-Inf")
# best_rollout = None best_rollout = None
# for i in range(nsims): for i in range(nsims):
# temp_state = self.graph.node[start_node]["state"] temp_state = self.graph.node[start_node]["state"]
#
# d = 0 # depth d = 0 # depth
# while d<depth: # also breaks at no available options while d<depth: # also breaks at no available options
# d += 1 d += 1
#
# # Get the available actions # Get the available actions
# options = self.available_actions( options = self.available_actions(
# self.data, self.data,
# temp_state, temp_state,
# self.id self.id
# ) )
#
# # If no actions possible, simulation complete # If no actions possible, simulation complete
# if len(options)==0: if len(options)==0:
# break break
#
# # "randomly" choose 1 - function provided by user # "randomly" choose 1 - function provided by user
# state[self.id] = temp_state #state[self.id] = temp_state
# sim_action = self.sim_selection_func(self.data, options, state) sim_action = self.sim_selection_func(self.data, options, temp_state)
#
# # add that to the actions of the current robot # add that to the actions of the current robot
# temp_state = self.state_store(self.data, temp_state, sim_action, self.id) temp_state = self.state_store(self.data, temp_state, sim_action, self.id)
#
# # calculate the reward at the end of simulation # calculate the reward at the end of simulation
# rew = self.reward(self.data, temp_state) \ state[self.id] = temp_state
# rew = self.reward(self.data, state) \
# # if best reward so far, store the rollout in the new node - self.reward(self.data, self._null_state(state))
# if rew > best_reward:
# best_reward = rew # if best reward so far, store the rollout in the new node
# best_rollout = temp_state if rew > best_reward:
temp_state = self._get_system_state(start_node) best_reward = rew
avg_reward = self.reward(self.data, temp_state) \ best_rollout = copy(temp_state)
- self.reward(self.data, self._null_state(temp_state)) self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1 self.graph.node[start_node]["N"] = 1
# self.graph.node[start_node]["best_rollout"] = best_rollout self.graph.node[start_node]["best_rollout"] = copy(best_rollout)
### BACKPROPOGATION ### BACKPROPOGATION
while start_node!=1: #while not root node while start_node!=1: #while not root node
...@@ -287,7 +286,8 @@ class Tree: ...@@ -287,7 +286,8 @@ class Tree:
start_node = self._parent(start_node) start_node = self._parent(start_node)
self.graph.node[start_node]["mu"] = \ self.graph.node[start_node]["mu"] = \
(gamma * self.graph.node[start_node]["mu"]*self.graph.node[start_node]["N"] + avg_reward)\ (gamma * self.graph.node[start_node]["mu"] * \
self.graph.node[start_node]["N"] + avg_reward) \
/(self.graph.node[start_node]["N"] + 1) /(self.graph.node[start_node]["N"] + 1)
self.graph.node[start_node]["N"] = \ self.graph.node[start_node]["N"] = \
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment