Manual trainer#
Train agents by manually controlling the training/evaluation loop.
Concept#
Usage#
from skrl.trainers.torch import ManualTrainer
# assuming there is an environment called 'env'
# and an agent or a list of agents called 'agents'
# create a sequential trainer
cfg = {"timesteps": 50000, "headless": False}
trainer = ManualTrainer(env=env, agents=agents, cfg=cfg)
# train the agent(s)
for timestep in range(cfg["timesteps"]):
trainer.train(timestep=timestep)
# evaluate the agent(s)
for timestep in range(cfg["timesteps"]):
trainer.eval(timestep=timestep)
from skrl.trainers.jax import ManualTrainer
# assuming there is an environment called 'env'
# and an agent or a list of agents called 'agents'
# create a sequential trainer
cfg = {"timesteps": 50000, "headless": False}
trainer = ManualTrainer(env=env, agents=agents, cfg=cfg)
# train the agent(s)
for timestep in range(cfg["timesteps"]):
trainer.train(timestep=timestep)
# evaluate the agent(s)
for timestep in range(cfg["timesteps"]):
trainer.eval(timestep=timestep)
Configuration#
1 "timesteps": 100000, # number of timesteps to train for
2 "headless": False, # whether to use headless mode (no rendering)
3 "disable_progressbar": False, # whether to disable the progressbar. If None, disable on non-TTY
4 "close_environment_at_exit": True, # whether to close the environment on normal program termination
5}
6
API (PyTorch)#
- skrl.trainers.torch.manual.MANUAL_TRAINER_DEFAULT_CONFIG#
alias of {‘close_environment_at_exit’: True, ‘disable_progressbar’: False, ‘headless’: False, ‘timesteps’: 100000}
- class skrl.trainers.torch.manual.ManualTrainer(env: Wrapper, agents: Agent | List[Agent], agents_scope: List[int] | None = None, cfg: dict | None = None)#
Bases:
Trainer
- __init__(env: Wrapper, agents: Agent | List[Agent], agents_scope: List[int] | None = None, cfg: dict | None = None) None #
Manual trainer
Train agents by manually controlling the training/evaluation loop
- Parameters:
env (skrl.envs.wrappers.torch.Wrapper) – Environment to train on
agents_scope (tuple or list of int, optional) – Number of environments for each agent to train on (default:
None
)cfg (dict, optional) – Configuration dictionary (default:
None
). See MANUAL_TRAINER_DEFAULT_CONFIG for default values
- eval(timestep: int | None = None, timesteps: int | None = None) None #
Evaluate the agents sequentially
This method executes the following steps in loop:
Compute actions (sequentially if num_simultaneous_agents > 1)
Interact with the environments
Render scene
Reset environments
- multi_agent_eval() None #
Evaluate multi-agents
This method executes the following steps in loop:
Compute actions (sequentially)
Interact with the environments
Render scene
Reset environments
- multi_agent_train() None #
Train multi-agents
This method executes the following steps in loop:
Pre-interaction
Compute actions
Interact with the environments
Render scene
Record transitions
Post-interaction
Reset environments
- single_agent_eval() None #
Evaluate agent
This method executes the following steps in loop:
Compute actions (sequentially)
Interact with the environments
Render scene
Reset environments
- single_agent_train() None #
Train agent
This method executes the following steps in loop:
Pre-interaction
Compute actions
Interact with the environments
Render scene
Record transitions
Post-interaction
Reset environments
- train(timestep: int | None = None, timesteps: int | None = None) None #
Execute a training iteration
This method executes the following steps once:
Pre-interaction (sequentially if num_simultaneous_agents > 1)
Compute actions (sequentially if num_simultaneous_agents > 1)
Interact with the environments
Render scene
Record transitions (sequentially if num_simultaneous_agents > 1)
Post-interaction (sequentially if num_simultaneous_agents > 1)
Reset environments
API (JAX)#
- skrl.trainers.jax.manual.MANUAL_TRAINER_DEFAULT_CONFIG#
alias of {‘close_environment_at_exit’: True, ‘disable_progressbar’: False, ‘headless’: False, ‘timesteps’: 100000}
- class skrl.trainers.jax.manual.ManualTrainer(env: Wrapper, agents: Agent | List[Agent], agents_scope: List[int] | None = None, cfg: dict | None = None)#
Bases:
Trainer
- __init__(env: Wrapper, agents: Agent | List[Agent], agents_scope: List[int] | None = None, cfg: dict | None = None) None #
Manual trainer
Train agents by manually controlling the training/evaluation loop
- Parameters:
env (skrl.envs.wrappers.jax.Wrapper) – Environment to train on
agents_scope (tuple or list of int, optional) – Number of environments for each agent to train on (default:
None
)cfg (dict, optional) – Configuration dictionary (default:
None
). See MANUAL_TRAINER_DEFAULT_CONFIG for default values
- eval(timestep: int | None = None, timesteps: int | None = None) None #
Evaluate the agents sequentially
This method executes the following steps in loop:
Compute actions (sequentially if num_simultaneous_agents > 1)
Interact with the environments
Render scene
Reset environments
- multi_agent_eval() None #
Evaluate multi-agents
This method executes the following steps in loop:
Compute actions (sequentially)
Interact with the environments
Render scene
Reset environments
- multi_agent_train() None #
Train multi-agents
This method executes the following steps in loop:
Pre-interaction
Compute actions
Interact with the environments
Render scene
Record transitions
Post-interaction
Reset environments
- single_agent_eval() None #
Evaluate agent
This method executes the following steps in loop:
Compute actions (sequentially)
Interact with the environments
Render scene
Reset environments
- single_agent_train() None #
Train agent
This method executes the following steps in loop:
Pre-interaction
Compute actions
Interact with the environments
Render scene
Record transitions
Post-interaction
Reset environments
- train(timestep: int | None = None, timesteps: int | None = None) None #
Execute a training iteration
This method executes the following steps once:
Pre-interaction (sequentially if num_simultaneous_agents > 1)
Compute actions (sequentially if num_simultaneous_agents > 1)
Interact with the environments
Render scene
Record transitions (sequentially if num_simultaneous_agents > 1)
Post-interaction (sequentially if num_simultaneous_agents > 1)
Reset environments