storing rollout, depth added to simulation

a6264978 · Jayant Khatkar · f0d8d4e0 · a6264978
Commit a6264978 authored 5 years ago by Jayant Khatkar
--- a/DecMCTS.py
+++ b/DecMCTS.py
@@ -213,7 +213,7 @@ class Tree:
        return True


-    def grow(self, nsims=10, gamma = 0.9):
+    def grow(self, nsims=10, gamma = 0.9, depth=10):
        """
        Grow Tree by one node
        """
@@ -239,10 +239,9 @@ class Tree:
        for i in range(nsims):
            temp_state = self.graph.node[start_node]["state"]

-            simulation_complete = False
-            while not simulation_complete:
-                # TODO HOW TO CHECK IF SIMULATION IS COMPLETE
-                simulation_complete = True
+            d = 0 # depth
+            while d<depth: # also breaks at no available options
+                d += 1

                # Get the available actions
                options = self.available_actions(
@@ -251,6 +250,10 @@ class Tree:
                            self.id
                        )

+                # If no actions possible, simulation complete
+                if len(options)==0:
+                    break
+
                # "randomly" choose 1 - function provided by user
                state[self.id] = temp_state
                sim_action = self.sim_selection_func(self.data, options, state)
@@ -266,14 +269,11 @@ class Tree:
            # if best reward so far, store the rollout in the new node
            if rew > best_reward:
                best_reward = rew
-                # TODO: STORE THE BEST ROLLOUT IN THE NODE
                best_rollout = temp_state

-
-        #state[self.id] = self.graph.node[start_node]["state"]
-        #avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
        self.graph.node[start_node]["mu"] = avg_reward
        self.graph.node[start_node]["N"] = 1
+        self.graph.node[start_node]["best_rollout"] = best_rollout

        ### BACKPROPOGATION
        while start_node!=1: #while not root node