Commit 5824ee43 authored by Jayant Khatkar's avatar Jayant Khatkar

updated state interface so user has more control and can store whatever

parent d934c926
......@@ -17,7 +17,9 @@ class ActionDistribution:
Working with action sequences and their respective probability
To initialise, Inputs:
- X: list of action sequences
- X: list of action sequences
- NOTE: X is is simply a state object returned by state_store.
You are expected to store action sequence in this object
- q: probability of each action sequence (normalised in intialisation)
"""
......@@ -57,14 +59,15 @@ class Tree:
- reward
- This is a function which has inputs (data, state) and
returns the GLOBAL reward to be maximised
- It may be the case that the reward function can only
calculated once simulation is complete, in which
case it should return "None" while simulation is
incomplete
- MUST RETURN POSITIVE VALUE
- available_actions
- This is a function which has inputs (data, state) and
returns the possible actions which can be taken
- state_store
- This is a function which has inputs
(data, parent_state, action) and returns an object to
store in the node.
- Root Node has parent state None and action None.
- c_p
- exploration multiplier (number between 0 and 1)
......@@ -82,16 +85,16 @@ class Tree:
data,
reward_func,
avail_actions_func,
state_store_func,
comm_n,
robot_id,
c_p=1,
time_func=None):
c_p=1):
self.data = data
self.graph = nx.DiGraph()
self.reward = reward_func
self.available_actions = avail_actions_func
self.time_func = time_func
self.state_store = state_store_func
self.c_p = c_p
self.id = robot_id
self.comms = {} # Plan with no robots initially
......@@ -104,8 +107,7 @@ class Tree:
self.graph.add_node(1,
mu=0,
N=0,
action_seq=[],
time=0
state=self.state_store(self.data, None, None)
)
self._expansion(1)
......@@ -153,13 +155,13 @@ class Tree:
temp=nx.get_node_attributes(self.graph, "mu")
top_n_nodes = sorted(temp, key=temp.get, reverse=True)[:self.comm_n]
X = [self.graph.node[n]["action_seq"] for n in top_n_nodes]
X = [self.graph.node[n]["state"] for n in top_n_nodes]
q = [self.graph.node[n]["mu"]**2 for n in top_n_nodes]
self.my_act_dist = ActionDistribution(X,q)
return True
def _get_state(self, node_id):
def _get_system_state(self, node_id):
"""
Randomly select 1 path taken by every other robot & path taken by
this robot to get to this node
......@@ -168,16 +170,16 @@ class Tree:
and second element is a dictionary of the other paths
"""
node_path = self.graph.node[node_id]["action_seq"]
all_paths = {k:self.comms[k].random_action() for k in self.comms}
all_paths[self.id] = node_path
robot_state = self.graph.node[node_id]["state"]
system_state = {k:self.comms[k].random_action() for k in self.comms}
system_state[self.id] = robot_state
return all_paths
return system_state
def _null_state(self, state):
temp = deepcopy(state)
temp[self.id] = []
temp[self.id] = self.graph.node[1]["state"] # Null state is if robot still at root node
return temp
......@@ -188,7 +190,7 @@ class Tree:
Init step.
"""
state = self._get_state(start_node)
state = self._get_system_state(start_node)
options = self.available_actions(self.data, state, self.id)
# create empty nodes underneath the node being expanded
......@@ -197,20 +199,16 @@ class Tree:
self.graph.add_node(len(self.graph)+1,
mu = 0,
N = 0,
action_seq=self.graph.node[start_node]["action_seq"] + [o],
time=0
state=self.state_store(self.data, self.graph.node[start_node]["state"], o)
)
self.graph.add_edge(start_node, len(self.graph))
if self.time_func != None:
# TODO Do the time calculations
pass
return True
def grow(self, nsims=10, ntime=None, gamma = 0.9):
def grow(self, nsims=10, gamma = 0.9):
"""
Grow Tree by one node
"""
......@@ -231,7 +229,7 @@ class Tree:
# SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD
pass
state = self._get_state(start_node)
state = self._get_system_state(start_node)
avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
......
from DecMCTS import Tree
from copy import deepcopy
data = {}
def avail_actions(data, state, robot_id):
class State:
def __init__(self, act_seq, cum_sum):
self.action_seq = act_seq
self.cumulative_sum = cum_sum
def state_storer(data, parent_state, action):
if parent_state == None:
return State([],0) # This state is also used Null action when calculating local reward
state = deepcopy(parent_state)
state.action_seq.append(action)
state.cumulative_sum = state.cumulative_sum + action
return state
def avail_actions(data, states, robot_id):
return [1,2,3,4,5]
def reward(dat, state):
each_robot_sum= [sum(state[a]) for a in state]
def reward(dat, states):
print states
each_robot_sum= [states[robot].cumulative_sum for robot in states]
return sum(each_robot_sum)
comm_n = 5
tree1 = Tree(data, reward, avail_actions, state_storer, comm_n, 1)
tree1 = Tree(data, reward, avail_actions, comm_n, 1)
tree2 = Tree(data, reward, avail_actions, comm_n, 2)
tree2 = Tree(data, reward, avail_actions, state_storer, comm_n, 2)
for i in range(350):
tree1.grow()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment