Commit bcaefc81 authored by Jayant Khatkar's avatar Jayant Khatkar

fixed reward from global to local contribution

parent ed11e79b
......@@ -55,7 +55,7 @@ class Tree:
- data required to calculate reward, available options
- reward
- This is a function which has inputs (data, state) and
returns the global reward to be maximised
returns the GLOBAL reward to be maximised
- It may be the case that the reward function can only
calculated once simulation is complete, in which
case it should return "None" while simulation is
......@@ -164,6 +164,10 @@ class Tree:
return (node_path, other_paths)
def _null_state(self, state):
return ([], state[1])
def _expansion(self, start_node):
"""
......@@ -216,7 +220,7 @@ class Tree:
pass
state = self._get_state(start_node)
avg_reward = self.reward(self.data, state)
avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
......
......@@ -8,8 +8,8 @@ def avail_actions(data, state):
def reward(dat, state):
other_robots = [sum(state[1][a]) for a in state[1]]
if sum(other_robots) + sum(state[0]) >25:
return 0
#if sum(other_robots) + sum(state[0]) >25:
# return 0
return sum(state[0]) + sum(other_robots)
comm_n = 5
......@@ -19,7 +19,7 @@ tree1 = Tree(data, reward, avail_actions, comm_n)
tree2 = Tree(data, reward, avail_actions, comm_n)
for i in range(150):
for i in range(350):
tree1.grow()
tree2.grow()
tree1.receive_comms(tree2.send_comms(), 2)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment