EduardDurech · KimalIsaev · Jun 13, 2020 · Jun 13, 2020
diff --git a/env_pkg/envs/fooEnvPY.py b/env_pkg/envs/fooEnvPY.py
@@ -41,12 +41,24 @@
 np.random.seed(10)
 
 class FooEnv(gym.Env):
-    def __init__(self, n_cars=3 , n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, n_feats=11, ob_radius=10):
+    def __init__(self, n_cars=3 , n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, n_feats=11, ob_radius=10, verbose = False):
 
         self.tree_obs = TreeObsForRailEnv(max_depth=n_nodes, predictor=ShortestPathPredictorForRailEnv(30))
-        self.total_feats = n_feats * sum([4**i for i in range(n_nodes+1)])
+        self.total_feats = n_feats * (4**(n_nodes+1) -1)#n_feats * sum([4**i for i in range(n_nodes+1)])
+        """maximum of possible features in evironment
+        4 is number of observed nodes from each node already observed
+        number of total features is equal for each car.
+        """
         self.action_space = spaces.MultiDiscrete([n_acts]*n_cars)
+        """ discrete action space for multiple agents
+        every car can do 5 acts, so any action is vector where number of elements equal number of cars   
+        and element is integer in [0; 5)
+        https://github.com/openai/gym/blob/master/gym/spaces/multi_discrete.py
+        """
         self.observation_space = spaces.Box(low=min_obs, high=max_obs, shape=(n_cars, self.total_feats), dtype=np.float32)
+        """It's all possible points in a box(including bound) in R**n space, see 
+        https://github.com/openai/gym/blob/master/gym/spaces/box.py
+        """
         self.n_cars = n_cars
         self.n_nodes = n_nodes
         self.ob_radius = ob_radius
@@ -70,7 +82,7 @@ def __init__(self, n_cars=3 , n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, n_feat
         self.info = dict()
         self.updates = dict()
         self.old_obs = dict()
-
+        self.verbose = verbose
 
     def step(self, action):
         """
@@ -80,7 +92,7 @@ def step(self, action):
             see https://gym.openai.com/docs/#observations
         """
 
-        print(action)
+        if self.verbose: print(action)
 
         for agent_id in range(self._rail_env.get_num_agents()):
 
@@ -89,9 +101,9 @@ def step(self, action):
                 self.updates[agent_id] = True
             else:
                 self.updates[agent_id] = False
-                action[agent_id] = 0
+                action[agent_id] = 0 #told agent to don't do anything
             self.action_dict.update({agent_id: action[agent_id]})
-        print(self.action_dict)
+        if self.verbose: print(self.action_dict)
         next_obs, all_rewards, done, self.info = self._rail_env.step(self.action_dict)
 
         # if done['__all__']:
@@ -117,6 +129,10 @@ def reset(self):
         return obs: initial observation of the space
         """
         obs, self.info = self._rail_env.reset(True, True)
+        """maybe? obs, self.info = self._rail_env.reset()
+        regenerating rails and regenerating schedule
+        https://gitlab.aicrowd.com/flatland/flatland/blob/master/flatland/envs/rail_env.py#L287
+        """
         for agent_id in range(self._rail_env.get_num_agents()):
             obs[agent_id] = normalize_observation(obs[agent_id], self.n_nodes, self.ob_radius)
         feats = [f.reshape(1,-1) for f in obs.values()]
@@ -128,5 +144,5 @@ def render(self, mode=None):
 
         self.renderer.render_env()
         image = self.renderer.get_image()
-        cv2.imshow('Render', image)
-        cv2.waitKey(20)
+        cv2.imshow('Render', image)#'Render' is name of the window
+        cv2.waitKey(20)#shows window for 20 ms
diff --git a/env_pkg/envs/observation_utils.py b/env_pkg/envs/observation_utils.py
@@ -7,28 +7,28 @@ def max_lt(seq, val):
     Return greatest item in seq for which item < val applies.
     None is returned if seq was empty or all items in seq were >= val.
     """
-    max = 0
-    idx = len(seq) - 1
-    while idx >= 0:
-        if seq[idx] < val and seq[idx] >= 0 and seq[idx] > max:
-            max = seq[idx]
-        idx -= 1
-    return max
+    if list(seq):
+        np_seq = np.array(seq)
+        condition_array = np.logical_and(np_seq < val, np_seq >=0) 
+        if condition_array.any():
+            return np.min(np_seq[condition_array])
+        else: None
+    else: None
 
 
 def min_gt(seq, val):
     """
     Return smallest item in seq for which item > val applies.
-    None is returned if seq was empty or all items in seq were >= val.
+    None is returned if seq was empty or all items in seq were < val.
     """
-    min = np.inf
-    idx = len(seq) - 1
-    while idx >= 0:
-        if seq[idx] >= val and seq[idx] < min:
-            min = seq[idx]
-        idx -= 1
-    return min
-
+    if list(seq):
+        np_seq = np.array(seq)
+        condition_array = np_seq > val
+        if condition_array.any():
+            return np.min(np_seq[condition_array])
+        else: None
+    else: None
+    
 
 def norm_obs_clip(obs, clip_min=-1, clip_max=1, fixed_radius=0, normalize_to_range=False):
     """
@@ -59,7 +59,11 @@ def _split_node_into_feature_groups(node: TreeObsForRailEnv.Node) -> (np.ndarray
     distance = np.zeros(1)
     agent_data = np.zeros(4)
     if node is None:
-        print('FUCK ME, NO OBSERVATION?')
+        print('DUCK ME, NO OBSERVATION?')
+        """maybe?
+        raise ValueError("No Node have been given to observe")
+
+        """
     data[0] = node.dist_own_target_encountered
     data[1] = node.dist_other_target_encountered
     data[2] = node.dist_other_agent_encountered