Commit c883a38a authored by Jayant Khatkar's avatar Jayant Khatkar

state_storer needs to be corrected

parent f0218158
......@@ -89,7 +89,7 @@ class Tree:
reward_func,
avail_actions_func,
state_store_func,
sim_selection_func,
#sim_selection_func,
comm_n,
robot_id,
c_p=1):
......@@ -99,7 +99,7 @@ class Tree:
self.reward = reward_func
self.available_actions = avail_actions_func
self.state_store = state_store_func
self.sim_selection_func = sim_selection_func
#self.sim_selection_func = sim_selection_func
self.c_p = c_p
self.id = robot_id
self.comms = {} # Plan with no robots initially
......@@ -263,13 +263,12 @@ class Tree:
#
# # calculate the reward at the end of simulation
# rew = self.reward(self.data, temp_state) \
# - self.reward(self.data, self._null_state(temp_state))
# avg reward += rew/nsims
#
# # if best reward so far, store the rollout in the new node
# if rew > best_reward:
# best_reward = rew
# best_rollout = temp_state
temp_state = self._get_system_state(start_node)
avg_reward = self.reward(self.data, temp_state) \
- self.reward(self.data, self._null_state(temp_state))
......
from DecMCTS import Tree
from copy import deepcopy
import numpy as np
import numpy.random as rand
from planning_utils import *
import matplotlib.pyplot as plt
# Creat a bunch of jobs
# Each job has a time to do (in seconds?)
......@@ -24,10 +26,10 @@ actions = [Action(job_locs[i], job_locs[i], job_times[i], job=i)
for i in range(n_jobs)]
# Premptively calculate distance pairs
travel_time = {(j1,j2):actions[j1].travel_to(actions[j2])/vel
for j1 in range(n_jobs)
for j2 in range(n_jobs)
if j1!=j2}
travel_time = {(j1,j2):j1.travel_to(j2)/vel
for j1 in actions
for j2 in actions
if j1 is not j2}
# Data needed for any calculations
data = {
"actions": set(actions),
......@@ -41,30 +43,35 @@ class State:
"""
def __init__(self, start_pos):
self.jobs = []
self.actions = []
self.time_so_far = 0
self.time_wasted = 0
self.current_pos = start_pos
def append(self, new_job):
self.jobs.append(new_job)
self.time_so_far += data["tt"][self.jobs[-1], self.jobs[-2]] \
+ job_times[new_job]
self.time_wasted += data["tt"][self.jobs[-1], self.jobs[-2]]
self.current_pos = job_locs[new_job]
self.actions.append(new_job)
if len(self.actions)>1:
self.time_so_far += data["tt"][self.actions[-1], self.actions[-2]] \
+ new_job.time
self.time_wasted += data["tt"][self.actions[-1], self.actions[-2]]
else:
self.time_wasted = \
Action(Position(0,0,0),Position(0,0,0),0).travel_to(new_job)/vel
self.time_so_far = new_job.time + self.time_wasted
self.current_pos = new_job.end_location
def state_storer(data, parent_state, action):
if parent_state == None:
return State(Position(0,0,0)) # This state is also used Null action when calculating local reward
state = deepcopy(parent_state) # NOTE THIS WILL NEED TO CHANGE WITH CONTOURS
state = State(action.start_location) # NOTE THIS WILL NEED TO CHANGE WITH CONTOURS
state.append(action)
return state
# All actions not done by the current robot are considered available
# actions of other robots are not considered
def avail_actions(data, states, robot_id):
def avail_actions(data, state, robot_id):
# NOTE THIS WILL NEED TO CHANGE WITH DEPENDENCY GRAPH
return [j for j in list(range(n_jobs)) if j not in states[robot_id].jobs]
return [j for j in data["actions"] if j not in state.actions]
# reward is inversely proportional to total time taken
def reward(dat, states):
......@@ -75,18 +82,42 @@ def reward(dat, states):
# 2- estimated time wasted in doing the remaining jobs
# This is the number of remaining jobs * mean travel time
# 3- time diff between each robot and the robot with longest plan
done_jobs = set(sum([states[robot].jobs for robot in states], []))
# 4- Duplicate executions of jobs
done_jobs = sum([states[robot].actions for robot in states], [])
time_wasted_1 = sum([states[robot].time_wasted for robot in states])
time_wasted_2 = len(data["actions"] - done_jobs)*data["mean_tt"]
time_wasted_2 = len(data["actions"] - set(done_jobs))*data["mean_tt"]
t_so_far= [states[robot].time_so_far for robot in states]
time_wasted_3 = sum(max(t_so_far) - np.array(t_so_far))
return 1/(time_wasted_1 + time_wasted_2 + time_wasted_3)
time_wasted_4 = sum([a.time for a in done_jobs if done_jobs.count(a)>1])/2
return -(time_wasted_1 + time_wasted_2 + time_wasted_3 + time_wasted_4)
# Communicate top n nodes of the MCTS tree
comm_n = 5
# Create the trees
n_robots = 2
trees = [None]*n_robots
for i in range(n_robots):
trees[i] = Tree(data, reward, avail_actions, state_storer, comm_n, i)
# Plot function
def plot_actions(actions, plan = None):
"""
Plot actions
- actions - list of actions
"""
xs = [p.start_location[0] for p in actions]
ys = [p.start_location[1] for p in actions]
max_size = 50
min_size = 10
ss = [p.time for p in actions]
ss = [(p-min(ss))*(max_size-min_size)+min_size for p in ss]
plt.scatter(xs, ys, s = ss)
plt.show()
if __name__=="__main__":
trees = [None]*n_robots
for i in range(n_robots):
trees[i] = Tree(data, reward, avail_actions, state_storer, comm_n, i)
for i in range(200):
trees[0].grow()
trees[1].grow()
trees[0].receive_comms(trees[1].send_comms(), 1)
trees[1].receive_comms(trees[0].send_comms(), 0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment