Commit 27782e65 authored by Jayant Khatkar's avatar Jayant Khatkar

simulation with differect available actions function

parent f9afb912
......@@ -90,6 +90,7 @@ class Tree:
avail_actions_func,
state_store_func,
sim_selection_func,
sim_avail_actions_func,
comm_n,
robot_id,
c_p=1):
......@@ -98,6 +99,7 @@ class Tree:
self.graph = nx.DiGraph()
self.reward = reward_func
self.available_actions = avail_actions_func
self.sim_available_actions = sim_avail_actions_func
self.state_store = state_store_func
self.sim_selection_func = sim_selection_func
self.c_p = c_p
......@@ -243,15 +245,16 @@ class Tree:
best_rollout = None
for i in range(nsims):
temp_state = self.graph.node[start_node]["state"]
state[self.id] = temp_state
d = 0 # depth
while d<depth: # also breaks at no available options
d += 1
# Get the available actions
options = self.available_actions(
options = self.sim_available_actions(
self.data,
temp_state,
state,
self.id
)
......@@ -265,9 +268,9 @@ class Tree:
# add that to the actions of the current robot
temp_state = self.state_store(self.data, temp_state, sim_action, self.id)
state[self.id] = temp_state
# calculate the reward at the end of simulation
state[self.id] = temp_state
rew = self.reward(self.data, state) \
- self.reward(self.data, self._null_state(state))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment