Skip to content
Snippets Groups Projects
Commit 5824ee43 authored by Jayant Khatkar's avatar Jayant Khatkar
Browse files

updated state interface so user has more control and can store whatever

parent d934c926
No related merge requests found
......@@ -17,7 +17,9 @@ class ActionDistribution:
Working with action sequences and their respective probability
To initialise, Inputs:
- X: list of action sequences
- X: list of action sequences
- NOTE: X is is simply a state object returned by state_store.
You are expected to store action sequence in this object
- q: probability of each action sequence (normalised in intialisation)
"""
......@@ -57,14 +59,15 @@ class Tree:
- reward
- This is a function which has inputs (data, state) and
returns the GLOBAL reward to be maximised
- It may be the case that the reward function can only
calculated once simulation is complete, in which
case it should return "None" while simulation is
incomplete
- MUST RETURN POSITIVE VALUE
- available_actions
- This is a function which has inputs (data, state) and
returns the possible actions which can be taken
- state_store
- This is a function which has inputs
(data, parent_state, action) and returns an object to
store in the node.
- Root Node has parent state None and action None.
- c_p
- exploration multiplier (number between 0 and 1)
......@@ -82,16 +85,16 @@ class Tree:
data,
reward_func,
avail_actions_func,
state_store_func,
comm_n,
robot_id,
c_p=1,
time_func=None):
c_p=1):
self.data = data
self.graph = nx.DiGraph()
self.reward = reward_func
self.available_actions = avail_actions_func
self.time_func = time_func
self.state_store = state_store_func
self.c_p = c_p
self.id = robot_id
self.comms = {} # Plan with no robots initially
......@@ -104,8 +107,7 @@ class Tree:
self.graph.add_node(1,
mu=0,
N=0,
action_seq=[],
time=0
state=self.state_store(self.data, None, None)
)
self._expansion(1)
......@@ -153,13 +155,13 @@ class Tree:
temp=nx.get_node_attributes(self.graph, "mu")
top_n_nodes = sorted(temp, key=temp.get, reverse=True)[:self.comm_n]
X = [self.graph.node[n]["action_seq"] for n in top_n_nodes]
X = [self.graph.node[n]["state"] for n in top_n_nodes]
q = [self.graph.node[n]["mu"]**2 for n in top_n_nodes]
self.my_act_dist = ActionDistribution(X,q)
return True
def _get_state(self, node_id):
def _get_system_state(self, node_id):
"""
Randomly select 1 path taken by every other robot & path taken by
this robot to get to this node
......@@ -168,16 +170,16 @@ class Tree:
and second element is a dictionary of the other paths
"""
node_path = self.graph.node[node_id]["action_seq"]
all_paths = {k:self.comms[k].random_action() for k in self.comms}
all_paths[self.id] = node_path
robot_state = self.graph.node[node_id]["state"]
system_state = {k:self.comms[k].random_action() for k in self.comms}
system_state[self.id] = robot_state
return all_paths
return system_state
def _null_state(self, state):
temp = deepcopy(state)
temp[self.id] = []
temp[self.id] = self.graph.node[1]["state"] # Null state is if robot still at root node
return temp
......@@ -188,7 +190,7 @@ class Tree:
Init step.
"""
state = self._get_state(start_node)
state = self._get_system_state(start_node)
options = self.available_actions(self.data, state, self.id)
# create empty nodes underneath the node being expanded
......@@ -197,20 +199,16 @@ class Tree:
self.graph.add_node(len(self.graph)+1,
mu = 0,
N = 0,
action_seq=self.graph.node[start_node]["action_seq"] + [o],
time=0
state=self.state_store(self.data, self.graph.node[start_node]["state"], o)
)
self.graph.add_edge(start_node, len(self.graph))
if self.time_func != None:
# TODO Do the time calculations
pass
return True
def grow(self, nsims=10, ntime=None, gamma = 0.9):
def grow(self, nsims=10, gamma = 0.9):
"""
Grow Tree by one node
"""
......@@ -231,7 +229,7 @@ class Tree:
# SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD
pass
state = self._get_state(start_node)
state = self._get_system_state(start_node)
avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
......
from DecMCTS import Tree
from copy import deepcopy
data = {}
def avail_actions(data, state, robot_id):
class State:
def __init__(self, act_seq, cum_sum):
self.action_seq = act_seq
self.cumulative_sum = cum_sum
def state_storer(data, parent_state, action):
if parent_state == None:
return State([],0) # This state is also used Null action when calculating local reward
state = deepcopy(parent_state)
state.action_seq.append(action)
state.cumulative_sum = state.cumulative_sum + action
return state
def avail_actions(data, states, robot_id):
return [1,2,3,4,5]
def reward(dat, state):
each_robot_sum= [sum(state[a]) for a in state]
def reward(dat, states):
print states
each_robot_sum= [states[robot].cumulative_sum for robot in states]
return sum(each_robot_sum)
comm_n = 5
tree1 = Tree(data, reward, avail_actions, state_storer, comm_n, 1)
tree1 = Tree(data, reward, avail_actions, comm_n, 1)
tree2 = Tree(data, reward, avail_actions, comm_n, 2)
tree2 = Tree(data, reward, avail_actions, state_storer, comm_n, 2)
for i in range(350):
tree1.grow()
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment