Base class
Note
This is the base class for all the other classes in this module. It provides the basic functionality for the other classes. It is not intended to be used directly.
Mixin and inheritance
from typing import Union, Mapping, Sequence, Tuple, Any
import gym
import torch
class CustomMixin:
def __init__(self, clip_actions: bool = False, role: str = "") -> None:
"""
:param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: ``False``)
:type clip_actions: bool, optional
:param role: Role play by the model (default: ``""``)
:type role: str, optional
"""
# e.g. storage custom parameter
if not hasattr(self, "_custom_clip_actions"):
self._custom_clip_actions = {}
self._custom_clip_actions[role]
def act(self,
inputs: Mapping[str, Union[torch.Tensor, Any]],
role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]:
"""Act according to the specified behavior
:param inputs: Model inputs. The most common keys are:
- ``"states"``: state of the environment used to make the decision
- ``"taken_actions"``: actions taken by the policy for the given states
:type inputs: dict where the values are typically torch.Tensor
:param role: Role play by the model (default: ``""``)
:type role: str, optional
:return: Model output. The first component is the action to be taken by the agent.
The second component is the log of the probability density function for stochastic models
or None for deterministic models. The third component is a dictionary containing extra output values
:rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary
"""
# ==============================
# - act in response to the state
# ==============================
# e.g. retrieve clip actions according to role
clip_actions = self._custom_clip_actions[role] if role in self._custom_clip_actions else self._custom_clip_actions[""]
from typing import Union, Mapping, Sequence, Tuple, Any
import gym
import torch
from skrl.models.torch import Model # from . import Model
class CustomModel(Model):
def __init__(self,
observation_space: Union[int, Sequence[int], gym.Space],
action_space: Union[int, Sequence[int], gym.Space],
device: Union[str, torch.device] = "cuda:0") -> None:
"""
:param observation_space: Observation/state space or shape.
The ``num_observations`` property will contain the size of that space
:type observation_space: int, sequence of int, gym.Space
:param action_space: Action space or shape.
The ``num_actions`` property will contain the size of that space
:type action_space: int, sequence of int, gym.Space
:param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``)
:type device: str or torch.device, optional
"""
super().__init__(observation_space, action_space, device)
def act(self,
inputs: Mapping[str, Union[torch.Tensor, Any]],
role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]:
"""Act according to the specified behavior
:param inputs: Model inputs. The most common keys are:
- ``"states"``: state of the environment used to make the decision
- ``"taken_actions"``: actions taken by the policy for the given states
:type inputs: dict where the values are typically torch.Tensor
:param role: Role play by the model (default: ``""``)
:type role: str, optional
:return: Model output. The first component is the action to be taken by the agent.
The second component is the log of the probability density function for stochastic models
or None for deterministic models. The third component is a dictionary containing extra output values
:rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary
"""
# ==============================
# - act in response to the state
# ==============================
API
- class skrl.models.torch.base.Model(observation_space: Union[int, Sequence[int], gym.spaces.space.Space, gymnasium.spaces.space.Space], action_space: Union[int, Sequence[int], gym.spaces.space.Space, gymnasium.spaces.space.Space], device: Optional[Union[str, torch.device]] = None)
Bases:
torch.nn.modules.module.Module
- __init__(observation_space: Union[int, Sequence[int], gym.spaces.space.Space, gymnasium.spaces.space.Space], action_space: Union[int, Sequence[int], gym.spaces.space.Space, gymnasium.spaces.space.Space], device: Optional[Union[str, torch.device]] = None) None
Base class representing a function approximator
The following properties are defined:
device
(torch.device): Device to be used for the computationsobservation_space
(int, sequence of int, gym.Space, gymnasium.Space): Observation/state spaceaction_space
(int, sequence of int, gym.Space, gymnasium.Space): Action spacenum_observations
(int): Number of elements in the observation/state spacenum_actions
(int): Number of elements in the action space
- Parameters
observation_space (int, sequence of int, gym.Space, gymnasium.Space) – Observation/state space or shape. The
num_observations
property will contain the size of that spaceaction_space (int, sequence of int, gym.Space, gymnasium.Space) – Action space or shape. The
num_actions
property will contain the size of that spacedevice (str or torch.device, optional) – Device on which a torch tensor is or will be allocated (default:
None
). If None, the device will be either"cuda:0"
if available or"cpu"
Custom models should override the
act
method:import torch from skrl.models.torch import Model class CustomModel(Model): def __init__(self, observation_space, action_space, device="cuda:0"): Model.__init__(self, observation_space, action_space, device) self.layer_1 = nn.Linear(self.num_observations, 64) self.layer_2 = nn.Linear(64, self.num_actions) def act(self, inputs, role=""): x = F.relu(self.layer_1(inputs["states"])) x = F.relu(self.layer_2(x)) return x, None, {}
- property device
Device to be used for the computations
- property observation_space
Observation/state space. It is a replica of the class constructor parameter of the same name
- property action_space
Action space. It is a replica of the class constructor parameter of the same name
- property num_observations
Number of elements in the observation/state space
- property num_actions
Number of elements in the action space
- _get_space_size(space: Union[int, Sequence[int], gym.spaces.space.Space, gymnasium.spaces.space.Space], number_of_elements: bool = True) int
Get the size (number of elements) of a space
- Parameters
space (int, sequence of int, gym.Space, or gymnasium.Space) – Space or shape from which to obtain the number of elements
number_of_elements (bool, optional) – Whether the number of elements occupied by the space is returned (default:
True
). IfFalse
, the shape of the space is returned. It only affects Discrete spaces
- Raises
ValueError – If the space is not supported
- Returns
Size of the space (number of elements)
- Return type
Example:
# from int >>> model._get_space_size(2) 2 # from sequence of int >>> model._get_space_size([2, 3]) 6 # Box space >>> space = gym.spaces.Box(low=-1, high=1, shape=(2, 3)) >>> model._get_space_size(space) 6 # Discrete space >>> space = gym.spaces.Discrete(4) >>> model._get_space_size(space) 4 >>> model._get_space_size(space, number_of_elements=False) 1 # Dict space >>> space = gym.spaces.Dict({'a': gym.spaces.Box(low=-1, high=1, shape=(2, 3)), ... 'b': gym.spaces.Discrete(4)}) >>> model._get_space_size(space) 10 >>> model._get_space_size(space, number_of_elements=False) 7
- act(inputs: Mapping[str, Union[torch.Tensor, Any]], role: str = '') Tuple[torch.Tensor, Optional[torch.Tensor], Mapping[str, Union[torch.Tensor, Any]]]
Act according to the specified behavior (to be implemented by the inheriting classes)
Agents will call this method to obtain the decision to be taken given the state of the environment. This method is currently implemented by the helper models (GaussianModel, etc.). The classes that inherit from the latter must only implement the
.compute()
method- Parameters
inputs (dict where the values are typically torch.Tensor) –
Model inputs. The most common keys are:
"states"
: state of the environment used to make the decision"taken_actions"
: actions taken by the policy for the given states
role (str, optional) – Role play by the model (default:
""
)
- Raises
NotImplementedError – Child class must implement this method
- Returns
Model output. The first component is the action to be taken by the agent. The second component is the log of the probability density function for stochastic models or None for deterministic models. The third component is a dictionary containing extra output values
- Return type
tuple of torch.Tensor, torch.Tensor or None, and dictionary
- compute(inputs: Mapping[str, Union[torch.Tensor, Any]], role: str = '') Tuple[Union[torch.Tensor, Mapping[str, Union[torch.Tensor, Any]]]]
Define the computation performed (to be implemented by the inheriting classes) by the models
- Parameters
inputs (dict where the values are typically torch.Tensor) –
Model inputs. The most common keys are:
"states"
: state of the environment used to make the decision"taken_actions"
: actions taken by the policy for the given states
role (str, optional) – Role play by the model (default:
""
)
- Raises
NotImplementedError – Child class must implement this method
- Returns
Computation performed by the models
- Return type
tuple of torch.Tensor and dictionary
- forward(inputs: Mapping[str, Union[torch.Tensor, Any]], role: str = '') Tuple[torch.Tensor, Optional[torch.Tensor], Mapping[str, Union[torch.Tensor, Any]]]
Forward pass of the model
This method calls the
.act()
method and returns its outputs- Parameters
inputs (dict where the values are typically torch.Tensor) –
Model inputs. The most common keys are:
"states"
: state of the environment used to make the decision"taken_actions"
: actions taken by the policy for the given states
role (str, optional) – Role play by the model (default:
""
)
- Returns
Model output. The first component is the action to be taken by the agent. The second component is the log of the probability density function for stochastic models or None for deterministic models. The third component is a dictionary containing extra output values
- Return type
tuple of torch.Tensor, torch.Tensor or None, and dictionary
- freeze_parameters(freeze: bool = True) None
Freeze or unfreeze internal parameters
Freeze: disable gradient computation (
parameters.requires_grad = False
)Unfreeze: enable gradient computation (
parameters.requires_grad = True
)
- Parameters
freeze (bool, optional) – Freeze the internal parameters if True, otherwise unfreeze them (default:
True
)
Example:
# freeze model parameters >>> model.freeze_parameters(True) # unfreeze model parameters >>> model.freeze_parameters(False)
- get_specification() Mapping[str, Any]
Returns the specification of the model
The following keys are used by the agents for initialization:
"rnn"
: Recurrent Neural Network (RNN) specification for RNN, LSTM and GRU layers/cells"sizes"
: List of RNN shapes (number of layers, number of environments, number of features in the RNN state). There must be as many tuples as there are states in the recurrent layer/cell. E.g., LSTM has 2 states (hidden and cell).
- Returns
Dictionary containing advanced specification of the model
- Return type
Example:
# model with a LSTM layer. # - number of layers: 1 # - number of environments: 4 # - number of features in the RNN state: 64 >>> model.get_specification() {'rnn': {'sizes': [(1, 4, 64), (1, 4, 64)]}}
- init_biases(method_name: str = 'constant_', *args, **kwargs) None
Initialize the model biases according to the specified method name
Method names are from the torch.nn.init module. Allowed method names are uniform_, normal_, constant_, etc.
The following layers will be initialized: - torch.nn.Linear
- Parameters
method_name (str, optional) –
torch.nn.init method name (default:
"constant_"
)args (tuple, optional) – Positional arguments of the method to be called
kwargs (dict, optional) – Key-value arguments of the method to be called
Example:
# initialize all biases with a constant value (0) >>> model.init_biases(method_name="constant_", val=0) # initialize all biases with normal distribution with mean 0 and standard deviation 0.25 >>> model.init_biases(method_name="normal_", mean=0.0, std=0.25)
- init_parameters(method_name: str = 'normal_', *args, **kwargs) None
Initialize the model parameters according to the specified method name
Method names are from the torch.nn.init module. Allowed method names are uniform_, normal_, constant_, etc.
- Parameters
method_name (str, optional) –
torch.nn.init method name (default:
"normal_"
)args (tuple, optional) – Positional arguments of the method to be called
kwargs (dict, optional) – Key-value arguments of the method to be called
Example:
# initialize all parameters with an orthogonal distribution with a gain of 0.5 >>> model.init_parameters("orthogonal_", gain=0.5) # initialize all parameters as a sparse matrix with a sparsity of 0.1 >>> model.init_parameters("sparse_", sparsity=0.1)
- init_weights(method_name: str = 'orthogonal_', *args, **kwargs) None
Initialize the model weights according to the specified method name
Method names are from the torch.nn.init module. Allowed method names are uniform_, normal_, constant_, etc.
The following layers will be initialized: - torch.nn.Linear
- Parameters
method_name (str, optional) –
torch.nn.init method name (default:
"orthogonal_"
)args (tuple, optional) – Positional arguments of the method to be called
kwargs (dict, optional) – Key-value arguments of the method to be called
Example:
# initialize all weights with uniform distribution in range [-0.1, 0.1] >>> model.init_weights(method_name="uniform_", a=-0.1, b=0.1) # initialize all weights with normal distribution with mean 0 and standard deviation 0.25 >>> model.init_weights(method_name="normal_", mean=0.0, std=0.25)
- load(path: str) None
Load the model from the specified path
The final storage device is determined by the constructor of the model
- Parameters
path (str) – Path to load the model from
Example:
# load the model onto the CPU >>> model = Model(observation_space, action_space, device="cpu") >>> model.load("model.pt") # load the model onto the GPU 1 >>> model = Model(observation_space, action_space, device="cuda:1") >>> model.load("model.pt")
- migrate(state_dict: Optional[Mapping[str, torch.Tensor]] = None, path: Optional[str] = None, name_map: Mapping[str, str] = {}, auto_mapping: bool = True, verbose: bool = False) bool
Migrate the specified extrernal model’s state dict to the current model
The final storage device is determined by the constructor of the model
Only one of
state_dict
orpath
can be specified. Thepath
parameter allows automatic loading thestate_dict
only from files generated by the rl_games and stable-baselines3 libraries at the momentFor ambiguous models (where 2 or more parameters, for source or current model, have equal shape) it is necessary to define the
name_map
, at least for those parameters, to perform the migration successfully- Parameters
state_dict (Mapping[str, torch.Tensor], optional) – External model’s state dict to migrate from (default:
None
)path (str, optional) – Path to the external checkpoint to migrate from (default:
None
)name_map (Mapping[str, str], optional) – Name map to use for the migration (default:
{}
). Keys are the current parameter names and values are the external parameter namesauto_mapping (bool, optional) – Automatically map the external state dict to the current state dict (default:
True
)verbose (bool, optional) – Show model names and migration (default:
False
)
- Raises
ValueError – If neither or both of
state_dict
andpath
parameters have been setValueError – If the correct file type cannot be identified from the
path
parameter
- Returns
True if the migration was successful, False otherwise. Migration is successful if all parameters of the current model are found in the external model
- Return type
Example:
# migrate a rl_games checkpoint with unambiguous state_dict >>> model.migrate(path="./runs/Ant/nn/Ant.pth") True # migrate a rl_games checkpoint with ambiguous state_dict >>> model.migrate(path="./runs/Cartpole/nn/Cartpole.pth", verbose=False) [skrl:WARNING] Ambiguous match for log_std_parameter <- [value_mean_std.running_mean, value_mean_std.running_var, a2c_network.sigma] [skrl:WARNING] Ambiguous match for net.0.bias <- [a2c_network.actor_mlp.0.bias, a2c_network.actor_mlp.2.bias] [skrl:WARNING] Ambiguous match for net.2.bias <- [a2c_network.actor_mlp.0.bias, a2c_network.actor_mlp.2.bias] [skrl:WARNING] Ambiguous match for net.4.weight <- [a2c_network.value.weight, a2c_network.mu.weight] [skrl:WARNING] Ambiguous match for net.4.bias <- [a2c_network.value.bias, a2c_network.mu.bias] [skrl:WARNING] Multiple use of a2c_network.actor_mlp.0.bias -> [net.0.bias, net.2.bias] [skrl:WARNING] Multiple use of a2c_network.actor_mlp.2.bias -> [net.0.bias, net.2.bias] False >>> name_map = {"log_std_parameter": "a2c_network.sigma", ... "net.0.bias": "a2c_network.actor_mlp.0.bias", ... "net.2.bias": "a2c_network.actor_mlp.2.bias", ... "net.4.weight": "a2c_network.mu.weight", ... "net.4.bias": "a2c_network.mu.bias"} >>> model.migrate(path="./runs/Cartpole/nn/Cartpole.pth", name_map=name_map, verbose=True) [skrl:INFO] Models [skrl:INFO] |-- current: 7 items [skrl:INFO] | |-- log_std_parameter : torch.Size([1]) [skrl:INFO] | |-- net.0.weight : torch.Size([32, 4]) [skrl:INFO] | |-- net.0.bias : torch.Size([32]) [skrl:INFO] | |-- net.2.weight : torch.Size([32, 32]) [skrl:INFO] | |-- net.2.bias : torch.Size([32]) [skrl:INFO] | |-- net.4.weight : torch.Size([1, 32]) [skrl:INFO] | |-- net.4.bias : torch.Size([1]) [skrl:INFO] |-- source: 15 items [skrl:INFO] | |-- value_mean_std.running_mean : torch.Size([1]) [skrl:INFO] | |-- value_mean_std.running_var : torch.Size([1]) [skrl:INFO] | |-- value_mean_std.count : torch.Size([]) [skrl:INFO] | |-- running_mean_std.running_mean : torch.Size([4]) [skrl:INFO] | |-- running_mean_std.running_var : torch.Size([4]) [skrl:INFO] | |-- running_mean_std.count : torch.Size([]) [skrl:INFO] | |-- a2c_network.sigma : torch.Size([1]) [skrl:INFO] | |-- a2c_network.actor_mlp.0.weight : torch.Size([32, 4]) [skrl:INFO] | |-- a2c_network.actor_mlp.0.bias : torch.Size([32]) [skrl:INFO] | |-- a2c_network.actor_mlp.2.weight : torch.Size([32, 32]) [skrl:INFO] | |-- a2c_network.actor_mlp.2.bias : torch.Size([32]) [skrl:INFO] | |-- a2c_network.value.weight : torch.Size([1, 32]) [skrl:INFO] | |-- a2c_network.value.bias : torch.Size([1]) [skrl:INFO] | |-- a2c_network.mu.weight : torch.Size([1, 32]) [skrl:INFO] | |-- a2c_network.mu.bias : torch.Size([1]) [skrl:INFO] Migration [skrl:INFO] |-- map: log_std_parameter <- a2c_network.sigma [skrl:INFO] |-- auto: net.0.weight <- a2c_network.actor_mlp.0.weight [skrl:INFO] |-- map: net.0.bias <- a2c_network.actor_mlp.0.bias [skrl:INFO] |-- auto: net.2.weight <- a2c_network.actor_mlp.2.weight [skrl:INFO] |-- map: net.2.bias <- a2c_network.actor_mlp.2.bias [skrl:INFO] |-- map: net.4.weight <- a2c_network.mu.weight [skrl:INFO] |-- map: net.4.bias <- a2c_network.mu.bias False # migrate a stable-baselines3 checkpoint with unambiguous state_dict >>> model.migrate(path="./ddpg_pendulum.zip") True # migrate from any exported model by loading its state_dict (unambiguous state_dict) >>> state_dict = torch.load("./external_model.pt") >>> model.migrate(state_dict=state_dict) True
- random_act(inputs: Mapping[str, Union[torch.Tensor, Any]], role: str = '') Tuple[torch.Tensor, None, Mapping[str, Union[torch.Tensor, Any]]]
Act randomly according to the action space
- Parameters
inputs (dict where the values are typically torch.Tensor) –
Model inputs. The most common keys are:
"states"
: state of the environment used to make the decision"taken_actions"
: actions taken by the policy for the given states
role (str, optional) – Role play by the model (default:
""
)
- Raises
NotImplementedError – Unsupported action space
- Returns
Model output. The first component is the action to be taken by the agent
- Return type
tuple of torch.Tensor, None, and dictionary
- save(path: str, state_dict: Optional[dict] = None) None
Save the model to the specified path
- Parameters
Example:
# save the current model to the specified path >>> model.save("/tmp/model.pt") # save an older version of the model to the specified path >>> old_state_dict = copy.deepcopy(model.state_dict()) >>> # ... >>> model.save("/tmp/model.pt", old_state_dict)
- set_mode(mode: str) None
Set the model mode (training or evaluation)
- Parameters
mode (str) – Mode:
"train"
for training or"eval"
for evaluation. See torch.nn.Module.train- Raises
ValueError – If the mode is not
"train"
or"eval"
- tensor_to_space(tensor: torch.Tensor, space: Union[gym.spaces.space.Space, gymnasium.spaces.space.Space], start: int = 0) Union[torch.Tensor, dict]
Map a flat tensor to a Gym/Gymnasium space
The mapping is done in the following way:
Tensors belonging to Discrete spaces are returned without modification
Tensors belonging to Box spaces are reshaped to the corresponding space shape keeping the first dimension (number of samples) as they are
Tensors belonging to Dict spaces are mapped into a dictionary with the same keys as the original space
- Parameters
tensor (torch.Tensor) – Tensor to map from
space (gym.Space or gymnasium.Space) – Space to map the tensor to
start (int, optional) – Index of the first element of the tensor to map (default:
0
)
- Raises
ValueError – If the space is not supported
- Returns
Mapped tensor or dictionary
- Return type
torch.Tensor or dict
Example:
>>> space = gym.spaces.Dict({'a': gym.spaces.Box(low=-1, high=1, shape=(2, 3)), ... 'b': gym.spaces.Discrete(4)}) >>> tensor = torch.tensor([[-0.3, -0.2, -0.1, 0.1, 0.2, 0.3, 2]]) >>> >>> model.tensor_to_space(tensor, space) {'a': tensor([[[-0.3000, -0.2000, -0.1000], [ 0.1000, 0.2000, 0.3000]]]), 'b': tensor([[2.]])}
- update_parameters(model: torch.nn.modules.module.Module, polyak: float = 1) None
Update internal parameters by hard or soft (polyak averaging) update
Hard update: \(\theta = \theta_{net}\)
Soft (polyak averaging) update: \(\theta = (1 - \rho) \theta + \rho \theta_{net}\)
- Parameters
model (torch.nn.Module (skrl.models.torch.Model)) – Model used to update the internal parameters
polyak (float, optional) – Polyak hyperparameter between 0 and 1 (default:
1
). A hard update is performed when its value is 1
Example:
# hard update (from source model) >>> model.update_parameters(source_model) # soft update (from source model) >>> model.update_parameters(source_model, polyak=0.005)