Simulating an environment in pyRDDLGym with a custom policy.#
This follow-up example illustrates how to define a custom policy to interact with an environment.
First install and import the required packages:
pip install --quiet --upgrade pip pyRDDLGym rddlrepository
Note: you may need to restart the kernel to use updated packages.
Import the required packages:
import warnings
warnings.filterwarnings('ignore')
import sys
import numpy as np
import pyRDDLGym
from pyRDDLGym.core.policy import RandomAgent, BaseAgent
We will attempt to first run the Elevators domain with the random policy:
env = pyRDDLGym.make('Elevators', '0', enforce_action_constraints=True)
agent = RandomAgent(action_space=env.action_space, num_actions=env.max_allowed_actions)
try:
return_stats = agent.evaluate(env, episodes=20)
except Exception as e:
print(e, file=sys.stderr)
Precondition 0 is not satisfied for actions {'move-current-dir': array([ True, False]), 'open-door': array([ True, False]), 'close-door': array([False, False])}.
>> ( forall_{?e: elevator} [ ( ( open-door(?e) + close-door(?e) ) + move-current-dir(?e) ) <= 1 ] )
pyRDDLGym provides some support for parsing bounds on action constraints, but unfortunately it cannot parse arbitrarily complex non-box constraints. This means the built-in random policy will sample actions that do not satisfy the required constraints. In this case, the action-precondition requires that, for each elevator, at most one non-noop action can be chosen.
To solve this, we will create a subclass of the BasePolicy that does respect the constraint on each elevator during sampling:
class ElevatorsPolicy(BaseAgent):
def __init__(self, elevators):
self.elevators = elevators
def sample_action(self, state):
action = {}
for e in self.elevators:
random = np.random.uniform()
if random < 0.25: # choose to move elevator
action[f'move-current-dir___{e}'] = True
elif random < 0.5: # choose to open door
action[f'open-door___{e}'] = True
elif random < 0.75: # choose to close door
action[f'close-door___{e}'] = True
else: # choose to do nothing
pass
return action
Next we will create an instance of this policy by passing in the objects of type “elevator”:
elevators = env.model.type_to_objects['elevator']
agent = ElevatorsPolicy(elevators)
We are now ready to simulate! Let’s simulate one trial and check that the actions sampled respect the constraints in the RDDL:
env.horizon = 10 # let's just see the first 10 steps
agent.evaluate(env, episodes=1, verbose=True)
initial state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 0
num-person-waiting___f3 = 0 num-person-waiting___f4 = 0 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = True elevator-dir-up___e1 = True
elevator-closed___e0 = True elevator-closed___e1 = True elevator-at-floor___e0__f0 = True
elevator-at-floor___e0__f1 = False elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = True elevator-at-floor___e1__f1 = False
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
------------------------------------------------------------------------------------------------------------------------
step = 0
action =
close-door___e1 = True
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 0
num-person-waiting___f3 = 0 num-person-waiting___f4 = 2 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = True elevator-dir-up___e1 = True
elevator-closed___e0 = True elevator-closed___e1 = True elevator-at-floor___e0__f0 = True
elevator-at-floor___e0__f1 = False elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = True elevator-at-floor___e1__f1 = False
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = 0.0
done = False
------------------------------------------------------------------------------------------------------------------------
step = 1
action =
{}
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 1
num-person-waiting___f3 = 0 num-person-waiting___f4 = 2 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = True elevator-dir-up___e1 = True
elevator-closed___e0 = True elevator-closed___e1 = True elevator-at-floor___e0__f0 = True
elevator-at-floor___e0__f1 = False elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = True elevator-at-floor___e1__f1 = False
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = -6.0
done = False
------------------------------------------------------------------------------------------------------------------------
step = 2
action =
move-current-dir___e0 = True move-current-dir___e1 = True
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 1
num-person-waiting___f3 = 0 num-person-waiting___f4 = 3 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = True elevator-dir-up___e1 = True
elevator-closed___e0 = True elevator-closed___e1 = True elevator-at-floor___e0__f0 = False
elevator-at-floor___e0__f1 = True elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = False elevator-at-floor___e1__f1 = True
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = -9.0
done = False
------------------------------------------------------------------------------------------------------------------------
step = 3
action =
open-door___e0 = True
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 1
num-person-waiting___f3 = 0 num-person-waiting___f4 = 3 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = False elevator-dir-up___e1 = True
elevator-closed___e0 = False elevator-closed___e1 = True elevator-at-floor___e0__f0 = False
elevator-at-floor___e0__f1 = True elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = False elevator-at-floor___e1__f1 = True
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = -12.0
done = False
------------------------------------------------------------------------------------------------------------------------
step = 4
action =
open-door___e1 = True
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 1
num-person-waiting___f3 = 0 num-person-waiting___f4 = 3 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = False elevator-dir-up___e1 = False
elevator-closed___e0 = False elevator-closed___e1 = False elevator-at-floor___e0__f0 = False
elevator-at-floor___e0__f1 = True elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = False elevator-at-floor___e1__f1 = True
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = -12.0
done = False
------------------------------------------------------------------------------------------------------------------------
step = 5
action =
close-door___e0 = True close-door___e1 = True
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 1
num-person-waiting___f3 = 0 num-person-waiting___f4 = 3 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = False elevator-dir-up___e1 = False
elevator-closed___e0 = True elevator-closed___e1 = True elevator-at-floor___e0__f0 = False
elevator-at-floor___e0__f1 = True elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = False elevator-at-floor___e1__f1 = True
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = -12.0
done = False
------------------------------------------------------------------------------------------------------------------------
step = 6
action =
open-door___e0 = True
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 1
num-person-waiting___f3 = 0 num-person-waiting___f4 = 3 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = False elevator-dir-up___e1 = False
elevator-closed___e0 = False elevator-closed___e1 = True elevator-at-floor___e0__f0 = False
elevator-at-floor___e0__f1 = True elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = False elevator-at-floor___e1__f1 = True
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = -12.0
done = False
------------------------------------------------------------------------------------------------------------------------
step = 7
action =
move-current-dir___e0 = True move-current-dir___e1 = True
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 1
num-person-waiting___f3 = 0 num-person-waiting___f4 = 3 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = False elevator-dir-up___e1 = False
elevator-closed___e0 = False elevator-closed___e1 = True elevator-at-floor___e0__f0 = False
elevator-at-floor___e0__f1 = True elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = True elevator-at-floor___e1__f1 = False
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = -12.0
done = False
------------------------------------------------------------------------------------------------------------------------
step = 8
action =
open-door___e0 = True move-current-dir___e1 = True
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 1
num-person-waiting___f3 = 0 num-person-waiting___f4 = 3 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = False elevator-dir-up___e1 = True
elevator-closed___e0 = False elevator-closed___e1 = True elevator-at-floor___e0__f0 = False
elevator-at-floor___e0__f1 = True elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = True elevator-at-floor___e1__f1 = False
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = -12.0
done = False
------------------------------------------------------------------------------------------------------------------------
step = 9
action =
open-door___e0 = True move-current-dir___e1 = True
state =
num-person-waiting___f0 = 0 num-person-waiting___f1 = 0 num-person-waiting___f2 = 1
num-person-waiting___f3 = 0 num-person-waiting___f4 = 4 num-person-in-elevator___e0 = 0
num-person-in-elevator___e1 = 0 elevator-dir-up___e0 = False elevator-dir-up___e1 = True
elevator-closed___e0 = False elevator-closed___e1 = True elevator-at-floor___e0__f0 = False
elevator-at-floor___e0__f1 = True elevator-at-floor___e0__f2 = False elevator-at-floor___e0__f3 = False
elevator-at-floor___e0__f4 = False elevator-at-floor___e1__f0 = False elevator-at-floor___e1__f1 = True
elevator-at-floor___e1__f2 = False elevator-at-floor___e1__f3 = False elevator-at-floor___e1__f4 = False
reward = -12.0
done = True
episode 1 ended with return -99.0
========================================================================================================================
{'mean': -99.0, 'median': -99.0, 'min': -99.0, 'max': -99.0, 'std': 0.0}