From bde440877c77361a33f39499d014cf79aa8e84f7 Mon Sep 17 00:00:00 2001 From: Isaev Kimal Date: Sat, 13 Jun 2020 03:38:01 +0300 Subject: [PATCH 1/2] added comments for FooEnv --- env_pkg/envs/fooEnvPY.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/env_pkg/envs/fooEnvPY.py b/env_pkg/envs/fooEnvPY.py index a10ed76..bbd7934 100644 --- a/env_pkg/envs/fooEnvPY.py +++ b/env_pkg/envs/fooEnvPY.py @@ -41,12 +41,24 @@ np.random.seed(10) class FooEnv(gym.Env): - def __init__(self, n_cars=3 , n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, n_feats=11, ob_radius=10): + def __init__(self, n_cars=3 , n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, n_feats=11, ob_radius=10, verbose = False): self.tree_obs = TreeObsForRailEnv(max_depth=n_nodes, predictor=ShortestPathPredictorForRailEnv(30)) - self.total_feats = n_feats * sum([4**i for i in range(n_nodes+1)]) + self.total_feats = n_feats * (4**(n_nodes+1) -1)#n_feats * sum([4**i for i in range(n_nodes+1)]) + """maximum of possible features in evironment + 4 is number of observed nodes from each node already observed + number of total features is equal for each car. + """ self.action_space = spaces.MultiDiscrete([n_acts]*n_cars) + """ discrete action space for multiple agents + every car can do 5 acts, so any action is vector where number of elements equal number of cars + and element is integer in [0; 5) + https://github.com/openai/gym/blob/master/gym/spaces/multi_discrete.py + """ self.observation_space = spaces.Box(low=min_obs, high=max_obs, shape=(n_cars, self.total_feats), dtype=np.float32) + """It's all possible points in a box(including bound) in R**n space, see + https://github.com/openai/gym/blob/master/gym/spaces/box.py + """ self.n_cars = n_cars self.n_nodes = n_nodes self.ob_radius = ob_radius @@ -70,7 +82,7 @@ def __init__(self, n_cars=3 , n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, n_feat self.info = dict() self.updates = dict() self.old_obs = dict() - + self.verbose = verbose def step(self, action): """ @@ -80,7 +92,7 @@ def step(self, action): see https://gym.openai.com/docs/#observations """ - print(action) + if self.verbose: print(action) for agent_id in range(self._rail_env.get_num_agents()): @@ -89,9 +101,9 @@ def step(self, action): self.updates[agent_id] = True else: self.updates[agent_id] = False - action[agent_id] = 0 + action[agent_id] = 0 #told agent to don't do anything self.action_dict.update({agent_id: action[agent_id]}) - print(self.action_dict) + if self.verbose: print(self.action_dict) next_obs, all_rewards, done, self.info = self._rail_env.step(self.action_dict) # if done['__all__']: @@ -117,6 +129,10 @@ def reset(self): return obs: initial observation of the space """ obs, self.info = self._rail_env.reset(True, True) + """maybe? obs, self.info = self._rail_env.reset() + regenerating rails and regenerating schedule + https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/rail_env.py#L287 + """ for agent_id in range(self._rail_env.get_num_agents()): obs[agent_id] = normalize_observation(obs[agent_id], self.n_nodes, self.ob_radius) feats = [f.reshape(1,-1) for f in obs.values()] @@ -128,5 +144,5 @@ def render(self, mode=None): self.renderer.render_env() image = self.renderer.get_image() - cv2.imshow('Render', image) - cv2.waitKey(20) + cv2.imshow('Render', image)#'Render' is name of the window + cv2.waitKey(20)#shows window for 20 ms From 84e32d39b7e72893b6d3897e93bbfa7f37b97aa1 Mon Sep 17 00:00:00 2001 From: Isaev Kimal Date: Sat, 13 Jun 2020 06:42:13 +0300 Subject: [PATCH 2/2] Added comments and optimization to observation_utils --- env_pkg/envs/observation_utils.py | 38 +++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/env_pkg/envs/observation_utils.py b/env_pkg/envs/observation_utils.py index 19f3116..1133493 100644 --- a/env_pkg/envs/observation_utils.py +++ b/env_pkg/envs/observation_utils.py @@ -7,28 +7,28 @@ def max_lt(seq, val): Return greatest item in seq for which item < val applies. None is returned if seq was empty or all items in seq were >= val. """ - max = 0 - idx = len(seq) - 1 - while idx >= 0: - if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max: - max = seq[idx] - idx -= 1 - return max + if list(seq): + np_seq = np.array(seq) + condition_array = np.logical_and(np_seq < val, np_seq >=0) + if condition_array.any(): + return np.min(np_seq[condition_array]) + else: None + else: None def min_gt(seq, val): """ Return smallest item in seq for which item > val applies. - None is returned if seq was empty or all items in seq were >= val. + None is returned if seq was empty or all items in seq were < val. """ - min = np.inf - idx = len(seq) - 1 - while idx >= 0: - if seq[idx] >= val and seq[idx] < min: - min = seq[idx] - idx -= 1 - return min - + if list(seq): + np_seq = np.array(seq) + condition_array = np_seq > val + if condition_array.any(): + return np.min(np_seq[condition_array]) + else: None + else: None + def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_range=False): """ @@ -59,7 +59,11 @@ def _split_node_into_feature_groups(node: TreeObsForRailEnv.Node) -> (np.ndarray distance = np.zeros(1) agent_data = np.zeros(4) if node is None: - print('FUCK ME, NO OBSERVATION?') + print('DUCK ME, NO OBSERVATION?') + """maybe? + raise ValueError("No Node have been given to observe") + + """ data[0] = node.dist_own_target_encountered data[1] = node.dist_other_target_encountered data[2] = node.dist_other_agent_encountered