Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Working mono with comments #1

Open
wants to merge 2 commits into
base: working_mono
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions env_pkg/envs/fooEnvPY.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,24 @@
np.random.seed(10)

class FooEnv(gym.Env):
def __init__(self, n_cars=3 , n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, n_feats=11, ob_radius=10):
def __init__(self, n_cars=3 , n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, n_feats=11, ob_radius=10, verbose = False):

self.tree_obs = TreeObsForRailEnv(max_depth=n_nodes, predictor=ShortestPathPredictorForRailEnv(30))
self.total_feats = n_feats * sum([4**i for i in range(n_nodes+1)])
self.total_feats = n_feats * (4**(n_nodes+1) -1)#n_feats * sum([4**i for i in range(n_nodes+1)])
"""maximum of possible features in evironment
4 is number of observed nodes from each node already observed
number of total features is equal for each car.
"""
self.action_space = spaces.MultiDiscrete([n_acts]*n_cars)
""" discrete action space for multiple agents
every car can do 5 acts, so any action is vector where number of elements equal number of cars
and element is integer in [0; 5)
https://github.com/openai/gym/blob/master/gym/spaces/multi_discrete.py
"""
self.observation_space = spaces.Box(low=min_obs, high=max_obs, shape=(n_cars, self.total_feats), dtype=np.float32)
"""It's all possible points in a box(including bound) in R**n space, see
https://github.com/openai/gym/blob/master/gym/spaces/box.py
"""
self.n_cars = n_cars
self.n_nodes = n_nodes
self.ob_radius = ob_radius
Expand All @@ -70,7 +82,7 @@ def __init__(self, n_cars=3 , n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, n_feat
self.info = dict()
self.updates = dict()
self.old_obs = dict()

self.verbose = verbose

def step(self, action):
"""
Expand All @@ -80,7 +92,7 @@ def step(self, action):
see https://gym.openai.com/docs/#observations
"""

print(action)
if self.verbose: print(action)

for agent_id in range(self._rail_env.get_num_agents()):

Expand All @@ -89,9 +101,9 @@ def step(self, action):
self.updates[agent_id] = True
else:
self.updates[agent_id] = False
action[agent_id] = 0
action[agent_id] = 0 #told agent to don't do anything
self.action_dict.update({agent_id: action[agent_id]})
print(self.action_dict)
if self.verbose: print(self.action_dict)
next_obs, all_rewards, done, self.info = self._rail_env.step(self.action_dict)

# if done['__all__']:
Expand All @@ -117,6 +129,10 @@ def reset(self):
return obs: initial observation of the space
"""
obs, self.info = self._rail_env.reset(True, True)
"""maybe? obs, self.info = self._rail_env.reset()
regenerating rails and regenerating schedule
https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/rail_env.py#L287
"""
for agent_id in range(self._rail_env.get_num_agents()):
obs[agent_id] = normalize_observation(obs[agent_id], self.n_nodes, self.ob_radius)
feats = [f.reshape(1,-1) for f in obs.values()]
Expand All @@ -128,5 +144,5 @@ def render(self, mode=None):

self.renderer.render_env()
image = self.renderer.get_image()
cv2.imshow('Render', image)
cv2.waitKey(20)
cv2.imshow('Render', image)#'Render' is name of the window
cv2.waitKey(20)#shows window for 20 ms
38 changes: 21 additions & 17 deletions env_pkg/envs/observation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,28 @@ def max_lt(seq, val):
Return greatest item in seq for which item < val applies.
None is returned if seq was empty or all items in seq were >= val.
"""
max = 0
idx = len(seq) - 1
while idx >= 0:
if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max:
max = seq[idx]
idx -= 1
return max
if list(seq):
np_seq = np.array(seq)
condition_array = np.logical_and(np_seq < val, np_seq >=0)
if condition_array.any():
return np.min(np_seq[condition_array])
else: None
else: None


def min_gt(seq, val):
"""
Return smallest item in seq for which item > val applies.
None is returned if seq was empty or all items in seq were >= val.
None is returned if seq was empty or all items in seq were < val.
"""
min = np.inf
idx = len(seq) - 1
while idx >= 0:
if seq[idx] >= val and seq[idx] < min:
min = seq[idx]
idx -= 1
return min

if list(seq):
np_seq = np.array(seq)
condition_array = np_seq > val
if condition_array.any():
return np.min(np_seq[condition_array])
else: None
else: None

def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_range=False):
"""
Expand Down Expand Up @@ -59,7 +59,11 @@ def _split_node_into_feature_groups(node: TreeObsForRailEnv.Node) -> (np.ndarray
distance = np.zeros(1)
agent_data = np.zeros(4)
if node is None:
print('FUCK ME, NO OBSERVATION?')
print('DUCK ME, NO OBSERVATION?')
"""maybe?
raise ValueError("No Node have been given to observe")

"""
data[0] = node.dist_own_target_encountered
data[1] = node.dist_other_target_encountered
data[2] = node.dist_other_agent_encountered
Expand Down