Commit bac0caa0 by Jayant Khatkar

### outline for simulation in decmcts

parent f6ef8e88
 ... ... @@ -23,7 +23,7 @@ class ActionDistribution: - q: probability of each action sequence (normalised in intialisation) """ def __init__(self, X, q): # Action sequence as provided ... ... @@ -215,25 +215,36 @@ class Tree: ### SELECTION start_node = 1 # Sample actions of other robots # NOTE: Sampling done at the begining for dependency graph reasons state = self._get_system_state(start_node) # Propagate down the tree while len(self._childNodes(start_node))>0: start_node = self._select(self._childNodes(start_node)) ### EXPANSION self._expansion(start_node) ### SIMULATION avg_reward = 0 best_reward = float("-Inf") for i in range(nsims): # TODO # SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD # TODO TODO TODO # Get the available actions # "randomly" choose 1 - function provided by user # add that to the actions of the current robot # calculate the reward for that state # avg reward += reward for sim/nsims # if best reward so far, store the rollout in the new node pass state = self._get_system_state(start_node) state[self.id] = self.graph.node[start_node]["state"] avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state)) self.graph.node[start_node]["mu"] = avg_reward self.graph.node[start_node]["N"] = 1 ### BACKPROPOGATION while start_node!=1: #while not root node ... ... @@ -245,9 +256,9 @@ class Tree: self.graph.node[start_node]["N"] = \ gamma * self.graph.node[start_node]["N"] + 1 self._update_distribution() return avg_reward ... ... @@ -260,7 +271,7 @@ class Tree: Save data which has been communicated to this tree Only receives from one robot at a time, call once for each robot Inputs: - comms_in - An Action distribution object ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!