updated state interface so user has more control and can store whatever

5824ee43 · Jayant Khatkar · d934c926 · 5824ee43 · 5824ee43
Commit 5824ee43 authored 5 years ago by Jayant Khatkar
--- a/DecMCTS.py
+++ b/DecMCTS.py
@@ -17,7 +17,9 @@ class ActionDistribution:
    Working with action sequences and their respective probability

    To initialise, Inputs:
-    - X: list of action sequences
+    - X: list of action sequences 
+        - NOTE: X is is simply a state object returned by state_store.
+            You are expected to store action sequence in this object
    - q: probability of each action sequence (normalised in intialisation)

    """
@@ -57,14 +59,15 @@ class Tree:
    - reward
        - This is a function which has inputs (data, state) and
            returns the GLOBAL reward to be maximised
-        - It may be the case that the reward function can only
-            calculated once simulation is complete, in which 
-            case it should return "None" while simulation is
-            incomplete
        - MUST RETURN POSITIVE VALUE
    - available_actions
        - This is a function which has inputs (data, state) and
            returns the possible actions which can be taken
+    - state_store
+        - This is a function which has inputs 
+            (data, parent_state, action) and returns an object to
+            store in the node. 
+        - Root Node has parent state None and action None.
    - c_p
        - exploration multiplier (number between 0 and 1)

@@ -82,16 +85,16 @@ class Tree:
            data, 
            reward_func, 
            avail_actions_func, 
+            state_store_func,
            comm_n, 
            robot_id, 
-            c_p=1, 
-            time_func=None):
+            c_p=1):

        self.data = data
        self.graph = nx.DiGraph()
        self.reward = reward_func
        self.available_actions = avail_actions_func
-        self.time_func = time_func
+        self.state_store = state_store_func
        self.c_p = c_p
        self.id = robot_id
        self.comms = {} # Plan with no robots initially
@@ -104,8 +107,7 @@ class Tree:
        self.graph.add_node(1, 
                mu=0, 
                N=0, 
-                action_seq=[], 
-                time=0
+                state=self.state_store(self.data, None, None)
                )

        self._expansion(1)
@@ -153,13 +155,13 @@ class Tree:
        temp=nx.get_node_attributes(self.graph, "mu")

        top_n_nodes = sorted(temp, key=temp.get, reverse=True)[:self.comm_n]
-        X = [self.graph.node[n]["action_seq"] for n in top_n_nodes]
+        X = [self.graph.node[n]["state"] for n in top_n_nodes]
        q = [self.graph.node[n]["mu"]**2 for n in top_n_nodes]
        self.my_act_dist = ActionDistribution(X,q)
        return True


-    def _get_state(self, node_id):
+    def _get_system_state(self, node_id):
        """
        Randomly select 1 path taken by every other robot & path taken by 
            this robot to get to this node
@@ -168,16 +170,16 @@ class Tree:
            and second element is a dictionary of the other paths
        """

-        node_path = self.graph.node[node_id]["action_seq"]
-        all_paths = {k:self.comms[k].random_action() for k in self.comms}
-        all_paths[self.id] = node_path
+        robot_state = self.graph.node[node_id]["state"]
+        system_state = {k:self.comms[k].random_action() for k in self.comms}
+        system_state[self.id] = robot_state 

-        return all_paths
+        return system_state 


    def _null_state(self, state):
        temp = deepcopy(state)
-        temp[self.id] = []
+        temp[self.id] = self.graph.node[1]["state"] # Null state is if robot still at root node
        return temp

    
@@ -188,7 +190,7 @@ class Tree:
            Init step.
        """

-        state = self._get_state(start_node)
+        state = self._get_system_state(start_node)
        options = self.available_actions(self.data, state, self.id)

        # create empty nodes underneath the node being expanded
@@ -197,20 +199,16 @@ class Tree:
            self.graph.add_node(len(self.graph)+1,
                    mu = 0,
                    N = 0,
-                    action_seq=self.graph.node[start_node]["action_seq"] + [o],
-                    time=0
+                    state=self.state_store(self.data, self.graph.node[start_node]["state"], o)
                    )

            self.graph.add_edge(start_node, len(self.graph))
-            if self.time_func != None:
-                # TODO Do the time calculations
-                pass
        return True




-    def grow(self, nsims=10, ntime=None, gamma = 0.9):
+    def grow(self, nsims=10, gamma = 0.9):
        """
        Grow Tree by one node
        """
@@ -231,7 +229,7 @@ class Tree:
            # SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD
            pass

-        state = self._get_state(start_node)
+        state = self._get_system_state(start_node)
        avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
        self.graph.node[start_node]["mu"] = avg_reward
        self.graph.node[start_node]["N"] = 1

--- a/test/test.py
+++ b/test/test.py
 from DecMCTS import Tree
+from copy import deepcopy


 data = {}

-def avail_actions(data, state, robot_id):
+class State:
+    def __init__(self, act_seq, cum_sum):
+        self.action_seq = act_seq
+        self.cumulative_sum = cum_sum
+
+def state_storer(data, parent_state, action):
+    
+    if parent_state == None:
+        return State([],0) # This state is also used Null action when calculating local reward
+
+    state = deepcopy(parent_state)
+    state.action_seq.append(action)
+    state.cumulative_sum = state.cumulative_sum + action
+    return state
+
+def avail_actions(data, states, robot_id):
    return [1,2,3,4,5]

-def reward(dat, state):
-    each_robot_sum= [sum(state[a]) for a in state]
+def reward(dat, states):
+    print states
+    each_robot_sum= [states[robot].cumulative_sum for robot in states]
    return sum(each_robot_sum)

 comm_n = 5

+tree1 = Tree(data, reward, avail_actions, state_storer, comm_n, 1)

-tree1 = Tree(data, reward, avail_actions, comm_n, 1)
-
-tree2 = Tree(data, reward, avail_actions, comm_n, 2)
+tree2 = Tree(data, reward, avail_actions, state_storer, comm_n, 2)

 for i in range(350):
    tree1.grow()