Commit f0d8d4e0 authored by Jayant Khatkar's avatar Jayant Khatkar

roughly coded up, need to design new test for new interface

parent bac0caa0
......@@ -17,7 +17,7 @@ class ActionDistribution:
Working with action sequences and their respective probability
To initialise, Inputs:
- X: list of action sequences
- X: list of action sequences
- NOTE: X is is simply a state object returned by state_store.
You are expected to store action sequence in this object
- q: probability of each action sequence (normalised in intialisation)
......@@ -25,7 +25,7 @@ class ActionDistribution:
"""
def __init__(self, X, q):
# Action sequence as provided
self.X = X
......@@ -55,7 +55,7 @@ class Tree:
DecMCTS tree
To Initiate, Inputs:
- data
- data required to calculate reward, available options
- data required to calculate reward, available options
- reward
- This is a function which has inputs (data, state) and
returns the GLOBAL reward to be maximised
......@@ -68,6 +68,9 @@ class Tree:
(data, parent_state, action) and returns an object to
store in the node.
- Root Node has parent state None and action None.
- sim_selection_func
- This is a function which chooses an available of option
during simulation (can be random or more advanced)
- c_p
- exploration multiplier (number between 0 and 1)
......@@ -81,13 +84,14 @@ class Tree:
reward/available actions in coordination with others
"""
def __init__(self,
data,
reward_func,
avail_actions_func,
def __init__(self,
data,
reward_func,
avail_actions_func,
state_store_func,
comm_n,
robot_id,
sim_selection_func,
comm_n,
robot_id,
c_p=1):
self.data = data
......@@ -95,6 +99,7 @@ class Tree:
self.reward = reward_func
self.available_actions = avail_actions_func
self.state_store = state_store_func
self.sim_selection_func = sim_selection_func
self.c_p = c_p
self.id = robot_id
self.comms = {} # Plan with no robots initially
......@@ -163,7 +168,7 @@ class Tree:
def _get_system_state(self, node_id):
"""
Randomly select 1 path taken by every other robot & path taken by
Randomly select 1 path taken by every other robot & path taken by
this robot to get to this node
Returns tuple where first element is path of current robot,
......@@ -173,7 +178,7 @@ class Tree:
system_state = {k:self.comms[k].random_action() for k in self.comms}
system_state[self.id] = self.graph.node[node_id]["state"]
return system_state
return system_state
def _null_state(self, state):
......@@ -225,23 +230,48 @@ class Tree:
start_node = self._select(self._childNodes(start_node))
### EXPANSION
self._expansion(start_node)
self._expansion(start_node)
### SIMULATION
avg_reward = 0
best_reward = float("-Inf")
best_rollout = None
for i in range(nsims):
# TODO TODO TODO
# Get the available actions
# "randomly" choose 1 - function provided by user
# add that to the actions of the current robot
# calculate the reward for that state
# avg reward += reward for sim/nsims
temp_state = self.graph.node[start_node]["state"]
simulation_complete = False
while not simulation_complete:
# TODO HOW TO CHECK IF SIMULATION IS COMPLETE
simulation_complete = True
# Get the available actions
options = self.available_actions(
self.data,
temp_state,
self.id
)
# "randomly" choose 1 - function provided by user
state[self.id] = temp_state
sim_action = self.sim_selection_func(self.data, options, state)
# add that to the actions of the current robot
temp_state = self.state_store(self.data, temp_state, sim_action)
# calculate the reward at the end of simulation
rew = self.reward(self.data, temp_state) \
- self.reward(self.data, self._null_state(temp_state))
avg reward += rew/nsims
# if best reward so far, store the rollout in the new node
pass
if rew > best_reward:
best_reward = rew
# TODO: STORE THE BEST ROLLOUT IN THE NODE
best_rollout = temp_state
state[self.id] = self.graph.node[start_node]["state"]
avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
#state[self.id] = self.graph.node[start_node]["state"]
#avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
......@@ -269,7 +299,7 @@ class Tree:
def receive_comms(self, comms_in, robot_id):
"""
Save data which has been communicated to this tree
Only receives from one robot at a time, call once
Only receives from one robot at a time, call once
for each robot
Inputs:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment