Skip to content
Snippets Groups Projects
Commit bcaefc81 authored by Jayant Khatkar's avatar Jayant Khatkar
Browse files

fixed reward from global to local contribution

parent ed11e79b
No related merge requests found
...@@ -55,7 +55,7 @@ class Tree: ...@@ -55,7 +55,7 @@ class Tree:
- data required to calculate reward, available options - data required to calculate reward, available options
- reward - reward
- This is a function which has inputs (data, state) and - This is a function which has inputs (data, state) and
returns the global reward to be maximised returns the GLOBAL reward to be maximised
- It may be the case that the reward function can only - It may be the case that the reward function can only
calculated once simulation is complete, in which calculated once simulation is complete, in which
case it should return "None" while simulation is case it should return "None" while simulation is
...@@ -164,6 +164,10 @@ class Tree: ...@@ -164,6 +164,10 @@ class Tree:
return (node_path, other_paths) return (node_path, other_paths)
def _null_state(self, state):
return ([], state[1])
def _expansion(self, start_node): def _expansion(self, start_node):
""" """
...@@ -216,7 +220,7 @@ class Tree: ...@@ -216,7 +220,7 @@ class Tree:
pass pass
state = self._get_state(start_node) state = self._get_state(start_node)
avg_reward = self.reward(self.data, state) avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
self.graph.node[start_node]["mu"] = avg_reward self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1 self.graph.node[start_node]["N"] = 1
......
...@@ -8,8 +8,8 @@ def avail_actions(data, state): ...@@ -8,8 +8,8 @@ def avail_actions(data, state):
def reward(dat, state): def reward(dat, state):
other_robots = [sum(state[1][a]) for a in state[1]] other_robots = [sum(state[1][a]) for a in state[1]]
if sum(other_robots) + sum(state[0]) >25: #if sum(other_robots) + sum(state[0]) >25:
return 0 # return 0
return sum(state[0]) + sum(other_robots) return sum(state[0]) + sum(other_robots)
comm_n = 5 comm_n = 5
...@@ -19,7 +19,7 @@ tree1 = Tree(data, reward, avail_actions, comm_n) ...@@ -19,7 +19,7 @@ tree1 = Tree(data, reward, avail_actions, comm_n)
tree2 = Tree(data, reward, avail_actions, comm_n) tree2 = Tree(data, reward, avail_actions, comm_n)
for i in range(150): for i in range(350):
tree1.grow() tree1.grow()
tree2.grow() tree2.grow()
tree1.receive_comms(tree2.send_comms(), 2) tree1.receive_comms(tree2.send_comms(), 2)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment