[Feature] IsaacGymEnvs integration (pytorch#1443)

vmoens · web-flow · commit 6316a57c8130 · 2023-08-09T16:55:27.000+01:00
diff --git a/test/test_libs.py b/test/test_libs.py
@@ -2,6 +2,16 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+import importlib
+
+_has_isaac = importlib.util.find_spec("isaacgym") is not None
+
+if _has_isaac:
+    # isaac gym asks to be imported before torch...
+    import isaacgym  # noqa
+    import isaacgymenvs  # noqa
+    from torchrl.envs.libs.isaacgym import IsaacGymEnv
+
 import argparse
 import importlib
 
@@ -18,6 +28,7 @@
     _make_multithreaded_env,
     CARTPOLE_VERSIONED,
     get_available_devices,
+    get_default_devices,
     HALFCHEETAH_VERSIONED,
     PENDULUM_VERSIONED,
     PONG_VERSIONED,
@@ -1534,6 +1545,76 @@ def test_data(self, dataset):
         assert len(data) // 2048 in (i, i - 1)
 
 
+@pytest.mark.skipif(not _has_isaac, reason="IsaacGym not found")
+@pytest.mark.parametrize(
+    "task",
+    [
+        "AllegroHand",
+        # "AllegroKuka",
+        # "AllegroKukaTwoArms",
+        # "AllegroHandManualDR",
+        # "AllegroHandADR",
+        "Ant",
+        # "Anymal",
+        # "AnymalTerrain",
+        # "BallBalance",
+        # "Cartpole",
+        # "FactoryTaskGears",
+        # "FactoryTaskInsertion",
+        # "FactoryTaskNutBoltPick",
+        # "FactoryTaskNutBoltPlace",
+        # "FactoryTaskNutBoltScrew",
+        # "FrankaCabinet",
+        # "FrankaCubeStack",
+        "Humanoid",
+        # "HumanoidAMP",
+        # "Ingenuity",
+        # "Quadcopter",
+        # "ShadowHand",
+        "Trifinger",
+    ],
+)
+@pytest.mark.parametrize("num_envs", [10, 20])
+@pytest.mark.parametrize("device", get_default_devices())
+class TestIsaacGym:
+    @classmethod
+    def _run_on_proc(cls, q, task, num_envs, device):
+        try:
+            env = IsaacGymEnv(task=task, num_envs=num_envs, device=device)
+            check_env_specs(env)
+            q.put(("succeeded!", None))
+        except Exception as err:
+            q.put(("failed!", err))
+            raise err
+
+    def test_env(self, task, num_envs, device):
+        from torch import multiprocessing as mp
+
+        q = mp.Queue(1)
+        proc = mp.Process(target=self._run_on_proc, args=(q, task, num_envs, device))
+        try:
+            proc.start()
+            msg, error = q.get()
+            if msg != "succeeded!":
+                raise error
+        finally:
+            q.close()
+            proc.join()
+
+    #
+    # def test_collector(self, task, num_envs, device):
+    #     env = IsaacGymEnv(task=task, num_envs=num_envs, device=device)
+    #     collector = SyncDataCollector(
+    #         env,
+    #         policy=SafeModule(nn.LazyLinear(out_features=env.observation_spec['obs'].shape[-1]), in_keys=["obs"], out_keys=["action"]),
+    #         frames_per_batch=20,
+    #         total_frames=-1
+    #     )
+    #     for c in collector:
+    #         assert c.shape == torch.Size([num_envs, 20])
+    #         break
+
+
 if __name__ == "__main__":
     args, unknown = argparse.ArgumentParser().parse_known_args()
     pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
diff --git a/torchrl/envs/gym_like.py b/torchrl/envs/gym_like.py
@@ -166,7 +166,6 @@ def read_obs(
 
         """
         if isinstance(observations, dict):
-            observations = {key: value for key, value in observations.items()}
             if "state" in observations and "observation" not in observations:
                 # we rename "state" in "observation" as "observation" is the conventional name
                 # for single observation in torchrl.
diff --git a/torchrl/envs/libs/gym.py b/torchrl/envs/libs/gym.py
@@ -528,8 +528,8 @@ def _set_seed_initial(self, seed: int) -> None:  # noqa: F811
             self._seed_calls_reset = False
             self._env.seed(seed=seed)
 
-    def _make_specs(self, env: "gym.Env") -> None:  # noqa: F821
-        self.action_spec = _gym_to_torchrl_spec_transform(
+    def _make_specs(self, env: "gym.Env", batch_size=None) -> None:  # noqa: F821
+        action_spec = _gym_to_torchrl_spec_transform(
             env.action_space,
             device=self.device,
             categorical_action_encoding=self._categorical_action_encoding,
@@ -544,18 +544,26 @@ def _make_specs(self, env: "gym.Env") -> None:  # noqa: F821
                 observation_spec = CompositeSpec(pixels=observation_spec)
             else:
                 observation_spec = CompositeSpec(observation=observation_spec)
-        self.observation_spec = observation_spec
         if hasattr(env, "reward_space") and env.reward_space is not None:
-            self.reward_spec = _gym_to_torchrl_spec_transform(
+            reward_spec = _gym_to_torchrl_spec_transform(
                 env.reward_space,
                 device=self.device,
                 categorical_action_encoding=self._categorical_action_encoding,
             )
         else:
-            self.reward_spec = UnboundedContinuousTensorSpec(
+            reward_spec = UnboundedContinuousTensorSpec(
                 shape=[1],
                 device=self.device,
             )
+        if batch_size is not None:
+            action_spec = action_spec.expand(*batch_size, *action_spec.shape)
+            reward_spec = reward_spec.expand(*batch_size, *reward_spec.shape)
+            observation_spec = observation_spec.expand(
+                *batch_size, *observation_spec.shape
+            )
+        self.action_spec = action_spec
+        self.reward_spec = reward_spec
+        self.observation_spec = observation_spec
 
     def _init_env(self):
         self.reset()
diff --git a/torchrl/envs/libs/isaacgym.py b/torchrl/envs/libs/isaacgym.py
@@ -0,0 +1,170 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+import importlib.util
+
+import itertools
+import warnings
+from typing import Any, Dict, List, Union
+
+import numpy as np
+import torch
+
+from tensordict import TensorDictBase
+from torchrl.envs import make_composite_from_td
+from torchrl.envs.libs.gym import GymWrapper
+
+_has_isaac = importlib.util.find_spec("isaacgym") is not None
+
+
+class IsaacGymWrapper(GymWrapper):
+    """Wrapper for IsaacGymEnvs environments.
+
+    The original library can be found `here <https://github.com/NVIDIA-Omniverse/IsaacGymEnvs>`_
+    and is based on IsaacGym which can be downloaded `through NVIDIA's webpage <https://developer.nvidia.com/isaac-gym>_`.
+
+    .. note:: IsaacGym environments cannot be executed consecutively, ie. instantiating one
+        environment after another (even if it has been cleared) will cause
+        CUDA memory issues. We recommend creating one environment per process only.
+        If you need more than one environment, the best way to achieve that is
+        to spawn them across processes.
+
+    .. note:: IsaacGym works on CUDA devices by essence. Make sure your machine
+        has GPUs available and the required setup for IsaacGym (eg, Ubuntu 20.04).
+
+    """
+
+    def __init__(
+        self, env: "isaacgymenvs.tasks.base.vec_task.Env", **kwargs
+    ):  # noqa: F821
+        warnings.warn(
+            "IsaacGym environment support is an experimental feature that may change in the future."
+        )
+        num_envs = env.num_envs
+        super().__init__(
+            env, torch.device(env.device), batch_size=torch.Size([num_envs]), **kwargs
+        )
+        if not hasattr(self, "task"):
+            # by convention in IsaacGymEnvs
+            self.task = env.__name__
+
+    def _make_specs(self, env: "gym.Env") -> None:  # noqa: F821
+        super()._make_specs(env, batch_size=self.batch_size)
+        self.done_spec = self.done_spec.squeeze(-1)
+        self.observation_spec["obs"] = self.observation_spec["observation"]
+        del self.observation_spec["observation"]
+
+        data = self.rollout(3).get("next")[..., 0]
+        del data[self.reward_key]
+        del data[self.done_key]
+        specs = make_composite_from_td(data)
+
+        obs_spec = self.observation_spec
+        obs_spec.unlock_()
+        obs_spec.update(specs)
+        obs_spec.lock_()
+        self.__dict__["_observation_spec"] = obs_spec
+
+    @classmethod
+    def _make_envs(cls, *, task, num_envs, device, seed=None, headless=True, **kwargs):
+        import isaacgym  # noqa
+        import isaacgymenvs  # noqa
+
+        envs = isaacgymenvs.make(
+            seed=seed,
+            task=task,
+            num_envs=num_envs,
+            sim_device=str(device),
+            rl_device=str(device),
+            headless=headless,
+            **kwargs,
+        )
+        return envs
+
+    def _set_seed(self, seed: int) -> int:
+        # as of #665c32170d84b4be66722eea405a1e08b6e7f761 the seed points nowhere in gym.make for IsaacGymEnvs
+        return seed
+
+    def read_action(self, action):
+        """Reads the action obtained from the input TensorDict and transforms it in the format expected by the contained environment.
+
+        Args:
+            action (Tensor or TensorDict): an action to be taken in the environment
+
+        Returns: an action in a format compatible with the contained environment.
+
+        """
+        return action
+
+    def read_done(self, done):
+        """Done state reader.
+
+        Reads a done state and returns a tuple containing:
+        - a done state to be set in the environment
+        - a boolean value indicating whether the frame_skip loop should be broken
+
+        Args:
+            done (np.ndarray, boolean or other format): done state obtained from the environment
+
+        """
+        return done.bool(), done.any()
+
+    def read_reward(self, total_reward, step_reward):
+        """Reads a reward and the total reward so far (in the frame skip loop) and returns a sum of the two.
+
+        Args:
+            total_reward (torch.Tensor or TensorDict): total reward so far in the step
+            step_reward (reward in the format provided by the inner env): reward of this particular step
+
+        """
+        return total_reward + step_reward
+
+    def read_obs(
+        self, observations: Union[Dict[str, Any], torch.Tensor, np.ndarray]
+    ) -> Dict[str, Any]:
+        """Reads an observation from the environment and returns an observation compatible with the output TensorDict.
+
+        Args:
+            observations (observation under a format dictated by the inner env): observation to be read.
+
+        """
+        if isinstance(observations, dict):
+            if "state" in observations and "observation" not in observations:
+                # we rename "state" in "observation" as "observation" is the conventional name
+                # for single observation in torchrl.
+                # naming it 'state' will result in envs that have a different name for the state vector
+                # when queried with and without pixels
+                observations["observation"] = observations.pop("state")
+        if not isinstance(observations, (TensorDictBase, dict)):
+            (key,) = itertools.islice(self.observation_spec.keys(True, True), 1)
+            observations = {key: observations}
+        return observations
+
+
+class IsaacGymEnv(IsaacGymWrapper):
+    """A TorchRL Env interface for IsaacGym environments.
+
+    See :class:`~.IsaacGymWrapper` for more information.
+
+    Examples:
+        >>> env = IsaacGymEnv(task="Ant", num_envs=2000, device="cuda:0")
+        >>> rollout = env.rollout(3)
+        >>> assert env.batch_size == (2000,)
+
+    """
+
+    @property
+    def available_envs(cls) -> List[str]:
+        import isaacgymenvs  # noqa
+
+        return list(isaacgymenvs.tasks.isaacgym_task_map.keys())
+
+    def __init__(self, task=None, *, env=None, num_envs, device, **kwargs):
+        if env is not None and task is not None:
+            raise RuntimeError("Cannot provide both `task` and `env` arguments.")
+        elif env is not None:
+            task = env
+        envs = self._make_envs(task=task, num_envs=num_envs, device=device, **kwargs)
+        self.task = task
+        super().__init__(envs, **kwargs)