Commit bac0caa0 authored by Jayant Khatkar's avatar Jayant Khatkar

outline for simulation in decmcts

parent f6ef8e88
......@@ -23,7 +23,7 @@ class ActionDistribution:
- q: probability of each action sequence (normalised in intialisation)
"""
def __init__(self, X, q):
# Action sequence as provided
......@@ -215,25 +215,36 @@ class Tree:
### SELECTION
start_node = 1
# Sample actions of other robots
# NOTE: Sampling done at the begining for dependency graph reasons
state = self._get_system_state(start_node)
# Propagate down the tree
while len(self._childNodes(start_node))>0:
start_node = self._select(self._childNodes(start_node))
### EXPANSION
self._expansion(start_node)
### SIMULATION
avg_reward = 0
best_reward = float("-Inf")
for i in range(nsims):
# TODO
# SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD
# TODO TODO TODO
# Get the available actions
# "randomly" choose 1 - function provided by user
# add that to the actions of the current robot
# calculate the reward for that state
# avg reward += reward for sim/nsims
# if best reward so far, store the rollout in the new node
pass
state = self._get_system_state(start_node)
state[self.id] = self.graph.node[start_node]["state"]
avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
### BACKPROPOGATION
while start_node!=1: #while not root node
......@@ -245,9 +256,9 @@ class Tree:
self.graph.node[start_node]["N"] = \
gamma * self.graph.node[start_node]["N"] + 1
self._update_distribution()
return avg_reward
......@@ -260,7 +271,7 @@ class Tree:
Save data which has been communicated to this tree
Only receives from one robot at a time, call once
for each robot
Inputs:
- comms_in
- An Action distribution object
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment