diff --git a/DecMCTS.py b/DecMCTS.py index 3f13de2568a82d80dfa8c83941d774929fe8c91d..dd739262b89fe6591e7e686bb72af1d889ad8176 100644 --- a/DecMCTS.py +++ b/DecMCTS.py @@ -23,7 +23,7 @@ class ActionDistribution: - q: probability of each action sequence (normalised in intialisation) """ - + def __init__(self, X, q): # Action sequence as provided @@ -215,25 +215,36 @@ class Tree: ### SELECTION start_node = 1 + + # Sample actions of other robots + # NOTE: Sampling done at the begining for dependency graph reasons + state = self._get_system_state(start_node) + + # Propagate down the tree while len(self._childNodes(start_node))>0: start_node = self._select(self._childNodes(start_node)) ### EXPANSION self._expansion(start_node) - + ### SIMULATION avg_reward = 0 best_reward = float("-Inf") for i in range(nsims): - # TODO - # SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD + # TODO TODO TODO + # Get the available actions + # "randomly" choose 1 - function provided by user + # add that to the actions of the current robot + # calculate the reward for that state + # avg reward += reward for sim/nsims + # if best reward so far, store the rollout in the new node pass - state = self._get_system_state(start_node) + state[self.id] = self.graph.node[start_node]["state"] avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state)) self.graph.node[start_node]["mu"] = avg_reward self.graph.node[start_node]["N"] = 1 - + ### BACKPROPOGATION while start_node!=1: #while not root node @@ -245,9 +256,9 @@ class Tree: self.graph.node[start_node]["N"] = \ gamma * self.graph.node[start_node]["N"] + 1 - + self._update_distribution() - + return avg_reward @@ -260,7 +271,7 @@ class Tree: Save data which has been communicated to this tree Only receives from one robot at a time, call once for each robot - + Inputs: - comms_in - An Action distribution object