fixed reward from global to local contribution

bcaefc81 · Jayant Khatkar · ed11e79b · bcaefc81 · bcaefc81
Commit bcaefc81 authored 5 years ago by Jayant Khatkar
--- a/src/DecMCTS.py
+++ b/src/DecMCTS.py
@@ -55,7 +55,7 @@ class Tree:
        - data required to calculate reward, available options 
    - reward
        - This is a function which has inputs (data, state) and
-            returns the global reward to be maximised
+            returns the GLOBAL reward to be maximised
        - It may be the case that the reward function can only
            calculated once simulation is complete, in which 
            case it should return "None" while simulation is
@@ -164,6 +164,10 @@ class Tree:
        return (node_path, other_paths)
+    def _null_state(self, state):
+        return ([], state[1])
    def _expansion(self, start_node):
        """
@@ -216,7 +220,7 @@ class Tree:
            pass
        state = self._get_state(start_node)
-        avg_reward = self.reward(self.data, state)
+        avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
        self.graph.node[start_node]["mu"] = avg_reward
        self.graph.node[start_node]["N"] = 1

--- a/src/test.py
+++ b/src/test.py
@@ -8,8 +8,8 @@ def avail_actions(data, state):
 def reward(dat, state):
    other_robots = [sum(state[1][a]) for a in state[1]]
-    if sum(other_robots) + sum(state[0]) >25:
+    #if sum(other_robots) + sum(state[0]) >25:
-        return 0
+    #    return 0
    return sum(state[0]) + sum(other_robots)
 comm_n = 5
@@ -19,7 +19,7 @@ tree1 = Tree(data, reward, avail_actions, comm_n)
 tree2 = Tree(data, reward, avail_actions, comm_n)
-for i in range(150):
+for i in range(350):
    tree1.grow()
    tree2.grow()
    tree1.receive_comms(tree2.send_comms(), 2)