Simulating an environment in pyRDDLGym with a custom policy.

Simulating an environment in pyRDDLGym with a custom policy.#

This follow-up example illustrates how to define a custom policy to interact with an environment.

First install and import the required packages:

pip install --quiet --upgrade pip pyRDDLGym rddlrepository
Note: you may need to restart the kernel to use updated packages.

Import the required packages:

import warnings
warnings.filterwarnings('ignore')
import sys
import numpy as np

import pyRDDLGym
from pyRDDLGym.core.policy import RandomAgent, BaseAgent

We will attempt to first run the Elevators domain with the random policy:

env = pyRDDLGym.make('Elevators', '0', enforce_action_constraints=True)
agent = RandomAgent(action_space=env.action_space, num_actions=env.max_allowed_actions)
try:
    return_stats = agent.evaluate(env, episodes=20)
except Exception as e:
    print(e, file=sys.stderr)
Precondition 0 is not satisfied for actions {'move-current-dir': array([ True, False]), 'open-door': array([ True, False]), 'close-door': array([False, False])}.
>> ( forall_{?e: elevator} [ ( ( open-door(?e) + close-door(?e) ) + move-current-dir(?e) ) <= 1 ] )

pyRDDLGym provides some support for parsing bounds on action constraints, but unfortunately it cannot parse arbitrarily complex non-box constraints. This means the built-in random policy will sample actions that do not satisfy the required constraints. In this case, the action-precondition requires that, for each elevator, at most one non-noop action can be chosen.

To solve this, we will create a subclass of the BasePolicy that does respect the constraint on each elevator during sampling:

class ElevatorsPolicy(BaseAgent):
    
    def __init__(self, elevators):
        self.elevators = elevators
        
    def sample_action(self, state):
        action = {}
        for e in self.elevators:
            random = np.random.uniform()
            if random < 0.25:  # choose to move elevator
                action[f'move-current-dir___{e}'] = True
            elif random < 0.5:  # choose to open door
                action[f'open-door___{e}'] = True
            elif random < 0.75:  # choose to close door
                action[f'close-door___{e}'] = True
            else:  # choose to do nothing
                pass
        return action

Next we will create an instance of this policy by passing in the objects of type “elevator”:

elevators = env.model.type_to_objects['elevator']
agent = ElevatorsPolicy(elevators)

We are now ready to simulate! Let’s simulate one trial and check that the actions sampled respect the constraints in the RDDL:

env.horizon = 10   # let's just see the first 10 steps
agent.evaluate(env, episodes=1, verbose=True)
initial state = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 0     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 0      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = True          elevator-dir-up___e1 = True  
            elevator-closed___e0 = True          elevator-closed___e1 = True    elevator-at-floor___e0__f0 = True  
      elevator-at-floor___e0__f1 = False   elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = True    elevator-at-floor___e1__f1 = False 
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
------------------------------------------------------------------------------------------------------------------------
step   = 0
action = 
     close-door___e1 = True 
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 0     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 2      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = True          elevator-dir-up___e1 = True  
            elevator-closed___e0 = True          elevator-closed___e1 = True    elevator-at-floor___e0__f0 = True  
      elevator-at-floor___e0__f1 = False   elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = True    elevator-at-floor___e1__f1 = False 
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = 0.0
done   = False
------------------------------------------------------------------------------------------------------------------------
step   = 1
action = 
{}
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 1     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 2      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = True          elevator-dir-up___e1 = True  
            elevator-closed___e0 = True          elevator-closed___e1 = True    elevator-at-floor___e0__f0 = True  
      elevator-at-floor___e0__f1 = False   elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = True    elevator-at-floor___e1__f1 = False 
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = -6.0
done   = False
------------------------------------------------------------------------------------------------------------------------
step   = 2
action = 
     move-current-dir___e0 = True  move-current-dir___e1 = True 
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 1     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 3      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = True          elevator-dir-up___e1 = True  
            elevator-closed___e0 = True          elevator-closed___e1 = True    elevator-at-floor___e0__f0 = False 
      elevator-at-floor___e0__f1 = True    elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = False   elevator-at-floor___e1__f1 = True  
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = -9.0
done   = False
------------------------------------------------------------------------------------------------------------------------
step   = 3
action = 
     open-door___e0 = True 
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 1     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 3      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = False         elevator-dir-up___e1 = True  
            elevator-closed___e0 = False         elevator-closed___e1 = True    elevator-at-floor___e0__f0 = False 
      elevator-at-floor___e0__f1 = True    elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = False   elevator-at-floor___e1__f1 = True  
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = -12.0
done   = False
------------------------------------------------------------------------------------------------------------------------
step   = 4
action = 
     open-door___e1 = True 
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 1     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 3      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = False         elevator-dir-up___e1 = False 
            elevator-closed___e0 = False         elevator-closed___e1 = False   elevator-at-floor___e0__f0 = False 
      elevator-at-floor___e0__f1 = True    elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = False   elevator-at-floor___e1__f1 = True  
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = -12.0
done   = False
------------------------------------------------------------------------------------------------------------------------
step   = 5
action = 
     close-door___e0 = True  close-door___e1 = True 
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 1     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 3      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = False         elevator-dir-up___e1 = False 
            elevator-closed___e0 = True          elevator-closed___e1 = True    elevator-at-floor___e0__f0 = False 
      elevator-at-floor___e0__f1 = True    elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = False   elevator-at-floor___e1__f1 = True  
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = -12.0
done   = False
------------------------------------------------------------------------------------------------------------------------
step   = 6
action = 
     open-door___e0 = True 
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 1     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 3      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = False         elevator-dir-up___e1 = False 
            elevator-closed___e0 = False         elevator-closed___e1 = True    elevator-at-floor___e0__f0 = False 
      elevator-at-floor___e0__f1 = True    elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = False   elevator-at-floor___e1__f1 = True  
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = -12.0
done   = False
------------------------------------------------------------------------------------------------------------------------
step   = 7
action = 
     move-current-dir___e0 = True  move-current-dir___e1 = True 
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 1     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 3      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = False         elevator-dir-up___e1 = False 
            elevator-closed___e0 = False         elevator-closed___e1 = True    elevator-at-floor___e0__f0 = False 
      elevator-at-floor___e0__f1 = True    elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = True    elevator-at-floor___e1__f1 = False 
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = -12.0
done   = False
------------------------------------------------------------------------------------------------------------------------
step   = 8
action = 
            open-door___e0 = True  move-current-dir___e1 = True 
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 1     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 3      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = False         elevator-dir-up___e1 = True  
            elevator-closed___e0 = False         elevator-closed___e1 = True    elevator-at-floor___e0__f0 = False 
      elevator-at-floor___e0__f1 = True    elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = True    elevator-at-floor___e1__f1 = False 
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = -12.0
done   = False
------------------------------------------------------------------------------------------------------------------------
step   = 9
action = 
            open-door___e0 = True  move-current-dir___e1 = True 
state  = 
         num-person-waiting___f0 = 0          num-person-waiting___f1 = 0          num-person-waiting___f2 = 1     
         num-person-waiting___f3 = 0          num-person-waiting___f4 = 4      num-person-in-elevator___e0 = 0     
     num-person-in-elevator___e1 = 0             elevator-dir-up___e0 = False         elevator-dir-up___e1 = True  
            elevator-closed___e0 = False         elevator-closed___e1 = True    elevator-at-floor___e0__f0 = False 
      elevator-at-floor___e0__f1 = True    elevator-at-floor___e0__f2 = False   elevator-at-floor___e0__f3 = False 
      elevator-at-floor___e0__f4 = False   elevator-at-floor___e1__f0 = False   elevator-at-floor___e1__f1 = True  
      elevator-at-floor___e1__f2 = False   elevator-at-floor___e1__f3 = False   elevator-at-floor___e1__f4 = False 
    
reward = -12.0
done   = True

episode 1 ended with return -99.0
========================================================================================================================
{'mean': -99.0, 'median': -99.0, 'min': -99.0, 'max': -99.0, 'std': 0.0}