Commit f0218158 authored by Jayant Khatkar's avatar Jayant Khatkar

reward function for scheduler testing

parent f13c1c7f
from DecMCTS import Tree
import numpy as np
import numpy.random as rand
from planning_utils import *
......@@ -29,8 +30,9 @@ travel_time = {(j1,j2):actions[j1].travel_to(actions[j2])/vel
if j1!=j2}
# Data needed for any calculations
data = {
"actions": actions
"tt": travel_time
"actions": set(actions),
"tt": travel_time,
"mean_tt": sum(travel_time.values())/len(travel_time)
}
class State:
......@@ -53,7 +55,7 @@ class State:
def state_storer(data, parent_state, action):
if parent_state == None:
return State((0,0)) # This state is also used Null action when calculating local reward
return State(Position(0,0,0)) # This state is also used Null action when calculating local reward
state = deepcopy(parent_state) # NOTE THIS WILL NEED TO CHANGE WITH CONTOURS
state.append(action)
return state
......@@ -66,9 +68,19 @@ def avail_actions(data, states, robot_id):
# reward is inversely proportional to total time taken
def reward(dat, states):
# TODO WILL NEED TO DO GREEDY ROLLOUT TO CALC REWARD
time_wasted = [states[robot].time_wasted for robot in states]
return 1/sum(each_robot_sum)
# TODO DO GREEDY ROLLOUT TO CALC REWARD ??
# Reward is 1/(estimation of total time wasted)
# the estimation is a sum of:
# 1- time wasted so far in the plan
# 2- estimated time wasted in doing the remaining jobs
# This is the number of remaining jobs * mean travel time
# 3- time diff between each robot and the robot with longest plan
done_jobs = set(sum([states[robot].jobs for robot in states], []))
time_wasted_1 = sum([states[robot].time_wasted for robot in states])
time_wasted_2 = len(data["actions"] - done_jobs)*data["mean_tt"]
t_so_far= [states[robot].time_so_far for robot in states]
time_wasted_3 = sum(max(t_so_far) - np.array(t_so_far))
return 1/(time_wasted_1 + time_wasted_2 + time_wasted_3)
# Communicate top n nodes of the MCTS tree
comm_n = 5
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment