Base¶

The base class, its state and config for all MjxPlaygroundEnv environments used in Mujorax.

`mujorax.MjxPlaygroundEnv` ¶

Bases: JaxEnv[Box, Box, MjxPlaygroundState, MjxPlaygroundConfig]

Base wrapper that exposes a mujoco_playground environment via Envrax's JaxEnv API.

Subclasses set _PLAYGROUND_NAME to a name accepted by mujoco_playground.registry.load. Override _reward, _done, or _info to customise per-env behaviour.

Parameters:

Name	Type	Description	Default
`config`	`MjxPlaygroundConfig`	Static configuration. Defaults to `MjxPlaygroundConfig()`.	required

Source code in mujorax/envs/_base.py

Python
class MjxPlaygroundEnv(JaxEnv[Box, Box, MjxPlaygroundState, MjxPlaygroundConfig]):
    """
    Base wrapper that exposes a `mujoco_playground` environment via
    Envrax's `JaxEnv` API.

    Subclasses set `_PLAYGROUND_NAME` to a name accepted by
    `mujoco_playground.registry.load`. Override `_reward`, `_done`, or
    `_info` to customise per-env behaviour.

    Parameters
    ----------
    config : MjxPlaygroundConfig (optional)
        Static configuration. Defaults to `MjxPlaygroundConfig()`.
    """

    _PLAYGROUND_NAME: str = ""

    def __init__(self, config: MjxPlaygroundConfig | None = None) -> None:
        if not self._PLAYGROUND_NAME:
            raise ValueError(f"{type(self).__name__} must set `_PLAYGROUND_NAME`.")

        super().__init__(config)
        self._env = mujoco_playground.registry.load(
            self._PLAYGROUND_NAME,
            config_overrides=self._resolve_overrides(),
        )
        _ = self.observation_space  # raises NotImplementedError for dict obs

    def _resolve_overrides(self) -> Dict[str, Any] | None:
        """
        Build the override dict passed to `mujoco_playground.registry.load`.

        Auto-sets `impl="jax"` on CPU-only systems unless the user has
        already pinned `impl` via `config.config_overrides`. Playground
        defaults to `impl="warp"` which requires a CUDA backend.

        Returns
        -------
        overrides : Dict[str, Any] | None
            Resolved overrides, or `None` when empty.
        """
        overrides = dict(self.config.config_overrides or {})
        if "impl" not in overrides and not _has_cuda():
            overrides["impl"] = "jax"

        return overrides or None

    def _extract_obs(self, pg_state: mjx_env.State) -> chex.Array:
        """
        Extract the observation array from a Playground state.

        Dict observations are rejected at construction time; this method
        narrows Playground's `Observation` union to a single array and
        guards against the dict case slipping through at runtime.

        Parameters
        ----------
        pg_state : mjx_env.State
            Playground state

        Returns
        -------
        obs : chex.Array
            Observation array

        Raises
        ------
        error : TypeError
            If `pg_state.obs` is not a single array.
        """
        obs = pg_state.obs
        if not isinstance(obs, jax.Array):
            raise TypeError(
                f"Expected `pg_state.obs` to be a `jax.Array`, got "
                f"{type(obs).__name__}. Dict observations are not supported "
                "in this release."
            )

        return obs

    @property
    def observation_space(self) -> Box:
        """Returns the observation space."""
        size = self._env.observation_size

        if not isinstance(size, int):
            raise NotImplementedError(
                f"{type(self).__name__} produces dict-shaped observations "
                f"({size}); not supported in this release."
            )

        return Box(
            low=-jnp.inf,
            high=jnp.inf,
            shape=(size,),
            dtype=jnp.float32,
        )

    @property
    def action_space(self) -> Box:
        """Returns the action space."""
        return Box(
            low=-1.0,
            high=1.0,
            shape=(self._env.action_size,),
            dtype=jnp.float32,
        )

    def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, MjxPlaygroundState]:
        """
        Set the environment to a starting state.

        Parameters
        ----------
        rng : chex.PRNGKey
            JAX PRNG key

        Returns
        -------
        obs : chex.Array
            Initial observation
        state : MjxPlaygroundState
            Initial environment state with `rng` embedded
        """
        rng, init_rng = jax.random.split(rng)
        pg_state = self._env.reset(init_rng)

        state = MjxPlaygroundState(
            rng=rng,
            step=jnp.int32(0),
            done=pg_state.done.astype(jnp.bool_),
            pg_state=pg_state,
        )

        return self._extract_obs(pg_state), state

    def step(
        self,
        state: MjxPlaygroundState,
        action: chex.Array,
    ) -> Tuple[chex.Array, MjxPlaygroundState, chex.Array, chex.Array, Dict[str, Any]]:
        """
        Take an action through the environment.

        Parameters
        ----------
        state : MjxPlaygroundState
            Current environment state
        action : chex.Array
            Action to take in the environment

        Returns
        -------
        obs : chex.Array
            Observation after the step
        new_state : MjxPlaygroundState
            Updated environment state
        reward : chex.Array
            Scalar reward
        done : chex.Array
            bool scalar — `True` when the episode has ended
        info : Dict[str, Any]
            Auxiliary diagnostic information
        """
        new_pg = self._env.step(state.pg_state, action)  # type: ignore
        new_step = state.step + jnp.int32(1)

        reward = self._reward(state, action, new_pg)
        done = self._done(state, new_pg, new_step)
        rng, _ = jax.random.split(state.rng)

        new_state = state.__replace__(
            rng=rng,
            step=new_step,
            done=done,
            pg_state=new_pg,
        )
        info = self._info(state, new_pg, new_step)

        return self._extract_obs(new_pg), new_state, reward, done, info

    def render(
        self,
        state: MjxPlaygroundState,
        height: int = 240,
        width: int = 320,
    ) -> np.ndarray:
        """
        Render the environment state as an RGB frame.

        Parameters
        ----------
        state : MjxPlaygroundState
            Current environment state to render
        height : int, default 240
            Output frame height in pixels
        width : int, default 320
            Output frame width in pixels

        Returns
        -------
        frame : np.ndarray
            uint8 RGB array of shape `(height, width, 3)`
        """
        frames = self._env.render([state.pg_state], height=height, width=width)
        return np.asarray(frames[0], dtype=np.uint8)

    def _reward(
        self,
        state: MjxPlaygroundState,
        action: chex.Array,
        new_pg: mjx_env.State,
    ) -> chex.Array:
        """
        Compute the reward for the most recent step.

        Defaults to Playground's own reward. Override to add shaping.

        Parameters
        ----------
        state : MjxPlaygroundState
            State before the step
        action : chex.Array
            Action just taken
        new_pg : mjx_env.State
            Playground state after the step

        Returns
        -------
        reward : chex.Array
            Scalar reward
        """
        return new_pg.reward

    def _done(
        self,
        state: MjxPlaygroundState,
        new_pg: mjx_env.State,
        new_step: chex.Array,
    ) -> chex.Array:
        """
        Compute the termination flag for the most recent step.

        Defaults to `new_pg.done OR new_step >= max_steps`.

        Parameters
        ----------
        state : MjxPlaygroundState
            State before the step
        new_pg : mjx_env.State
            Playground state after the step
        new_step : chex.Array
            Episode timestep after the step

        Returns
        -------
        done : chex.Array
            bool scalar — `True` when the episode has ended
        """
        return jnp.logical_or(
            new_pg.done.astype(jnp.bool_),
            new_step >= self.config.max_steps,
        )

    def _info(
        self,
        state: MjxPlaygroundState,
        new_pg: mjx_env.State,
        new_step: chex.Array,
    ) -> Dict[str, Any]:
        """
        Build the info dict returned from `step`.

        Parameters
        ----------
        state : MjxPlaygroundState
            State before the step
        new_pg : mjx_env.State
            Playground state after the step
        new_step : chex.Array
            Episode timestep after the step

        Returns
        -------
        info : Dict[str, Any]
            Auxiliary diagnostic information
        """
        return {
            "current_step": new_step,
            "metrics": new_pg.metrics,
            **new_pg.info,
        }

`observation_space` `property` ¶

Returns the observation space.

`action_space` `property` ¶

Returns the action space.

`reset(rng)` ¶

Set the environment to a starting state.

Parameters:

Name	Type	Description	Default
`rng`	`PRNGKey`	JAX PRNG key	required

Returns:

Name	Type	Description
`obs`	`Array`	Initial observation
`state`	`MjxPlaygroundState`	Initial environment state with `rng` embedded

Source code in mujorax/envs/_base.py

Python
def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, MjxPlaygroundState]:
    """
    Set the environment to a starting state.

    Parameters
    ----------
    rng : chex.PRNGKey
        JAX PRNG key

    Returns
    -------
    obs : chex.Array
        Initial observation
    state : MjxPlaygroundState
        Initial environment state with `rng` embedded
    """
    rng, init_rng = jax.random.split(rng)
    pg_state = self._env.reset(init_rng)

    state = MjxPlaygroundState(
        rng=rng,
        step=jnp.int32(0),
        done=pg_state.done.astype(jnp.bool_),
        pg_state=pg_state,
    )

    return self._extract_obs(pg_state), state

`step(state, action)` ¶

Take an action through the environment.

Parameters:

Name	Type	Description	Default
`state`	`MjxPlaygroundState`	Current environment state	required
`action`	`Array`	Action to take in the environment	required

Returns:

Name	Type	Description
`obs`	`Array`	Observation after the step
`new_state`	`MjxPlaygroundState`	Updated environment state
`reward`	`Array`	Scalar reward
`done`	`Array`	bool scalar — `True` when the episode has ended
`info`	`Dict[str, Any]`	Auxiliary diagnostic information

Source code in mujorax/envs/_base.py

Python
def step(
    self,
    state: MjxPlaygroundState,
    action: chex.Array,
) -> Tuple[chex.Array, MjxPlaygroundState, chex.Array, chex.Array, Dict[str, Any]]:
    """
    Take an action through the environment.

    Parameters
    ----------
    state : MjxPlaygroundState
        Current environment state
    action : chex.Array
        Action to take in the environment

    Returns
    -------
    obs : chex.Array
        Observation after the step
    new_state : MjxPlaygroundState
        Updated environment state
    reward : chex.Array
        Scalar reward
    done : chex.Array
        bool scalar — `True` when the episode has ended
    info : Dict[str, Any]
        Auxiliary diagnostic information
    """
    new_pg = self._env.step(state.pg_state, action)  # type: ignore
    new_step = state.step + jnp.int32(1)

    reward = self._reward(state, action, new_pg)
    done = self._done(state, new_pg, new_step)
    rng, _ = jax.random.split(state.rng)

    new_state = state.__replace__(
        rng=rng,
        step=new_step,
        done=done,
        pg_state=new_pg,
    )
    info = self._info(state, new_pg, new_step)

    return self._extract_obs(new_pg), new_state, reward, done, info

`render(state, height=240, width=320)` ¶

Render the environment state as an RGB frame.

Parameters:

Name	Type	Description	Default
`state`	`MjxPlaygroundState`	Current environment state to render	required
`height`	`int`	Output frame height in pixels	`240`
`width`	`int`	Output frame width in pixels	`320`

Returns:

Name	Type	Description
`frame`	`ndarray`	uint8 RGB array of shape `(height, width, 3)`

Source code in mujorax/envs/_base.py

Python
def render(
    self,
    state: MjxPlaygroundState,
    height: int = 240,
    width: int = 320,
) -> np.ndarray:
    """
    Render the environment state as an RGB frame.

    Parameters
    ----------
    state : MjxPlaygroundState
        Current environment state to render
    height : int, default 240
        Output frame height in pixels
    width : int, default 320
        Output frame width in pixels

    Returns
    -------
    frame : np.ndarray
        uint8 RGB array of shape `(height, width, 3)`
    """
    frames = self._env.render([state.pg_state], height=height, width=width)
    return np.asarray(frames[0], dtype=np.uint8)

`mujorax.MjxPlaygroundState` ¶

Bases: EnvState

Environment state for a wrapped MuJoCo Playground environment.

Parameters:

Name	Type	Description	Default
`rng`	`PRNGKey`	JAX PRNG key	required
`step`	`Array`	Current timestep within the episode	required
`done`	`Array`	bool scalar — episode termination flag	required
`pg_state`	`State`	Full Playground environment state	required

Source code in mujorax/envs/_base.py

Python
@chex.dataclass
class MjxPlaygroundState(EnvState):
    """
    Environment state for a wrapped MuJoCo Playground environment.

    Parameters
    ----------
    rng : chex.PRNGKey
        JAX PRNG key
    step : chex.Array
        Current timestep within the episode
    done : chex.Array
        bool scalar — episode termination flag
    pg_state : mjx_env.State
        Full Playground environment state
    """

    pg_state: mjx_env.State

`mujorax.MjxPlaygroundConfig` ¶

Bases: EnvConfig

Static configuration for a wrapped MuJoCo Playground environment.

Parameters:

Name	Type	Description	Default
`max_steps`	`int`	Maximum number of steps per episode. Default is 1000.	required
`config_overrides`	`Dict[str, Any]`	Flat overrides forwarded to `mujoco_playground.registry.load`. Use dotted keys for nested fields (e.g. `"reward_config.scale"`).	required

Source code in mujorax/envs/_base.py

Python
@chex.dataclass
class MjxPlaygroundConfig(EnvConfig):
    """
    Static configuration for a wrapped MuJoCo Playground environment.

    Parameters
    ----------
    max_steps : int
        Maximum number of steps per episode. Default is 1000.
    config_overrides : Dict[str, Any]
        Flat overrides forwarded to `mujoco_playground.registry.load`.
        Use dotted keys for nested fields (e.g. `"reward_config.scale"`).
    """

    config_overrides: Dict[str, Any] = field(default_factory=dict)

Base¶

mujorax.MjxPlaygroundEnv ¶

observation_space property ¶

action_space property ¶

reset(rng) ¶

step(state, action) ¶

render(state, height=240, width=320) ¶

mujorax.MjxPlaygroundState ¶

mujorax.MjxPlaygroundConfig ¶

`mujorax.MjxPlaygroundEnv` ¶

`observation_space` `property` ¶

`action_space` `property` ¶

`reset(rng)` ¶

`step(state, action)` ¶

`render(state, height=240, width=320)` ¶

`mujorax.MjxPlaygroundState` ¶

`mujorax.MjxPlaygroundConfig` ¶