added simulation

60855847 · Jayant Khatkar · 5f2d10ab · 60855847
Commit 60855847 authored 5 years ago by Jayant Khatkar
--- a/DecMCTS.py
+++ b/DecMCTS.py
@@ -89,7 +89,7 @@ class Tree:
            reward_func,
            avail_actions_func,
            state_store_func,
-            #sim_selection_func,
+            sim_selection_func,
            comm_n,
            robot_id,
            c_p=1):
@@ -99,7 +99,7 @@ class Tree:
        self.reward = reward_func
        self.available_actions = avail_actions_func
        self.state_store = state_store_func
-        #self.sim_selection_func = sim_selection_func
+        self.sim_selection_func = sim_selection_func
        self.c_p = c_p
        self.id = robot_id
        self.comms = {} # Plan with no robots initially
@@ -238,48 +238,47 @@ class Tree:
        self._expansion(start_node)

        ### SIMULATION
-#        avg_reward = 0
-#        best_reward = float("-Inf")
-#        best_rollout = None
-#        for i in range(nsims):
-#            temp_state = self.graph.node[start_node]["state"]
-#
-#            d = 0 # depth
-#            while d<depth: # also breaks at no available options
-#                d += 1
-#
-#                # Get the available actions
-#                options = self.available_actions(
-#                            self.data,
-#                            temp_state,
-#                            self.id
-#                        )
-#
-#                # If no actions possible, simulation complete
-#                if len(options)==0:
-#                    break
-#
-#                # "randomly" choose 1 - function provided by user
-#                state[self.id] = temp_state
-#                sim_action = self.sim_selection_func(self.data, options, state)
-#
-#                # add that to the actions of the current robot
-#                temp_state = self.state_store(self.data, temp_state, sim_action, self.id)
-#
-#            # calculate the reward at the end of simulation
-#            rew = self.reward(self.data, temp_state) \
-#
-#            # if best reward so far, store the rollout in the new node
-#            if rew > best_reward:
-#                best_reward = rew
-#                best_rollout = temp_state
-        temp_state = self._get_system_state(start_node)
-        avg_reward = self.reward(self.data, temp_state) \
-            - self.reward(self.data, self._null_state(temp_state))
-
-        self.graph.node[start_node]["mu"] = avg_reward
+        avg_reward = 0
+        best_reward = float("-Inf")
+        best_rollout = None
+        for i in range(nsims):
+            temp_state = self.graph.node[start_node]["state"]
+
+            d = 0 # depth
+            while d<depth: # also breaks at no available options
+                d += 1
+
+                # Get the available actions
+                options = self.available_actions(
+                            self.data,
+                            temp_state,
+                            self.id
+                        )
+
+                # If no actions possible, simulation complete
+                if len(options)==0:
+                    break
+
+                # "randomly" choose 1 - function provided by user
+                #state[self.id] = temp_state
+                sim_action = self.sim_selection_func(self.data, options, temp_state)
+
+                # add that to the actions of the current robot
+                temp_state = self.state_store(self.data, temp_state, sim_action, self.id)
+
+            # calculate the reward at the end of simulation
+            state[self.id] = temp_state
+            rew = self.reward(self.data, state) \
+                - self.reward(self.data, self._null_state(state))
+
+            # if best reward so far, store the rollout in the new node
+            if rew > best_reward:
+                best_reward = rew
+                best_rollout = copy(temp_state)
+                self.graph.node[start_node]["mu"] = avg_reward
+
        self.graph.node[start_node]["N"] = 1
-#        self.graph.node[start_node]["best_rollout"] = best_rollout
+        self.graph.node[start_node]["best_rollout"] = copy(best_rollout)

        ### BACKPROPOGATION
        while start_node!=1: #while not root node
@@ -287,7 +286,8 @@ class Tree:
            start_node = self._parent(start_node)

            self.graph.node[start_node]["mu"] = \
-                    (gamma * self.graph.node[start_node]["mu"]*self.graph.node[start_node]["N"] + avg_reward)\
+                    (gamma * self.graph.node[start_node]["mu"] * \
+                     self.graph.node[start_node]["N"] + avg_reward) \
                    /(self.graph.node[start_node]["N"] + 1)

            self.graph.node[start_node]["N"] = \