outline for simulation in decmcts

bac0caa0 · Jayant Khatkar · f6ef8e88 · bac0caa0
Commit bac0caa0 authored 5 years ago by Jayant Khatkar
--- a/DecMCTS.py
+++ b/DecMCTS.py
@@ -23,7 +23,7 @@ class ActionDistribution:
    - q: probability of each action sequence (normalised in intialisation)

    """
-    
+
    def __init__(self, X, q):
        
        # Action sequence as provided
@@ -215,25 +215,36 @@ class Tree:

        ### SELECTION
        start_node = 1
+
+        # Sample actions of other robots
+        # NOTE: Sampling done at the begining for dependency graph reasons
+        state = self._get_system_state(start_node)
+
+        # Propagate down the tree
        while len(self._childNodes(start_node))>0:
            start_node = self._select(self._childNodes(start_node))

        ### EXPANSION
        self._expansion(start_node)        
-        
+
        ### SIMULATION
        avg_reward = 0
        best_reward = float("-Inf")
        for i in range(nsims):
-            # TODO
-            # SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD
+            # TODO TODO TODO
+            # Get the available actions
+            # "randomly" choose 1 - function provided by user
+            # add that to the actions of the current robot
+            # calculate the reward for that state
+            # avg reward += reward for sim/nsims
+            # if best reward so far, store the rollout in the new node
            pass

-        state = self._get_system_state(start_node)
+        state[self.id] = self.graph.node[start_node]["state"]
        avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
        self.graph.node[start_node]["mu"] = avg_reward
        self.graph.node[start_node]["N"] = 1
-        
+
        ### BACKPROPOGATION
        while start_node!=1: #while not root node

@@ -245,9 +256,9 @@ class Tree:

            self.graph.node[start_node]["N"] = \
                    gamma * self.graph.node[start_node]["N"] + 1
-        
+
        self._update_distribution()
-        
+
        return avg_reward


@@ -260,7 +271,7 @@ class Tree:
        Save data which has been communicated to this tree
        Only receives from one robot at a time, call once 
        for each robot
-        
+
        Inputs:
        - comms_in
            - An Action distribution object