2. Implementing the CoreEnv¶
The complete code for this part of the tutorial can be found here
# file structure
- cutting_2d
- main.py
- env
- core_env.py
- inventory.py
- maze_state.py
- maze_action.py
Page Overview
2.1. CoreEnv¶
The first component we need to implement is the Core Environment which defines the main mechanics and functionality of the environment.
For this example we will call it Cutting2DCoreEnvironment
.
As for any other Gym environment we need to implement several methods according to the
CoreEnv
interface.
We will start with the very basic components and add more and more features (complexity) throughout this tutorial:
step()
: Implements the cutting mechanics.reset()
: Resets the environment as well as the piece inventory.seed()
: Sets the random state of the environment for reproducibility.close()
: Can be used for cleanup.get_maze_state()
: Returns the current MazeState of the environment.
You can find the implementation of the basic version of the Cutting2DCoreEnvironment
below.
from typing import Union, Tuple, Dict, Any
import numpy as np
from maze.core.env.core_env import CoreEnv
from maze.core.env.structured_env import ActorID
from .maze_state import Cutting2DMazeState
from .maze_action import Cutting2DMazeAction
from .inventory import Inventory
class Cutting2DCoreEnvironment(CoreEnv):
"""Environment for cutting 2D pieces based on the customer demand. Works as follows:
- Keeps inventory of 2D pieces available for cutting and fulfilling the demand.
- Produces a new demand for one piece in every step (here a static demand).
- The agent should decide which piece from inventory to cut (and how) to fulfill the given demand.
- What remains from the cut piece is put back in inventory.
- All the time, one raw (full-size) piece is available in inventory.
(If it gets cut, it is replenished in the next step.)
- Rewards are calculated to motivate the agent to consume as few raw pieces as possible.
- If inventory gets full, the oldest pieces get discarded.
:param max_pieces_in_inventory: Size of the inventory.
:param raw_piece_size: Size of a fresh raw (= full-size) piece.
:param static_demand: Order to issue in each step.
"""
def __init__(self, max_pieces_in_inventory: int, raw_piece_size: (int, int),
static_demand: (int, int)):
super().__init__()
self.max_pieces_in_inventory = max_pieces_in_inventory
self.raw_piece_size = tuple(raw_piece_size)
self.current_demand = static_demand
# setup environment
self._setup_env()
def _setup_env(self):
"""Setup environment."""
self.inventory = Inventory(self.max_pieces_in_inventory, self.raw_piece_size)
self.inventory.replenish_piece()
def step(self, maze_action: Cutting2DMazeAction) \
-> Tuple[Cutting2DMazeState, np.array, bool, Dict[Any, Any]]:
"""Summary of the step (simplified, not necessarily respecting the actual order in the code):
1. Check if the selected piece to cut is valid (i.e. in inventory, large enough etc.)
2. Attempt the cutting
3. Replenish a fresh piece if needed and return an appropriate reward
:param maze_action: Cutting MazeAction to take.
:return: maze_state, reward, done, info
"""
info, reward = {}, 0
replenishment_needed = False
# check if valid piece id was selected
if maze_action.piece_id >= self.inventory.size():
info['error'] = 'piece_id_out_of_bounds'
# perform cutting
else:
piece_to_cut = self.inventory.pieces[maze_action.piece_id]
# attempt the cut
if self.inventory.cut(maze_action, self.current_demand):
info['msg'] = "valid_cut"
replenishment_needed = piece_to_cut == self.raw_piece_size
else:
# assign a negative reward for invalid cutting attempts
info['error'] = "invalid_cut"
reward = -2
# check if replenishment is required
if replenishment_needed:
self.inventory.replenish_piece()
# assign negative reward if a piece has to be replenished
reward = -1
# compile env state
maze_state = self.get_maze_state()
return maze_state, reward, False, info
def get_maze_state(self) -> Cutting2DMazeState:
"""Returns the current Cutting2DMazeState of the environment."""
return Cutting2DMazeState(self.inventory.pieces, self.max_pieces_in_inventory,
self.current_demand, self.raw_piece_size)
def reset(self) -> Cutting2DMazeState:
"""Resets the environment to initial state."""
self._setup_env()
return self.get_maze_state()
def close(self):
"""No additional cleanup necessary."""
def seed(self, seed: int) -> None:
"""Seed random state of environment."""
# No randomness in the env at this point
pass
# --- lets ignore everything below this line for now ---
def get_renderer(self) -> Any:
pass
def get_serializable_components(self) -> Dict[str, Any]:
pass
def is_actor_done(self) -> bool:
pass
def actor_id(self) -> ActorID:
pass
def agent_counts_dict(self) -> Dict[Union[str, int], int]:
pass
2.2. Environment Components¶
To keep the implementation of the core environment short and clean
we introduces a dedicated Inventory
class providing functionality for:
maintaining the inventory of available cutting pieces
replenishing new raw inventory pieces if required
the cutting logic of the environment
from .maze_action import Cutting2DMazeAction
class Inventory:
"""Holds the inventory of 2D pieces and performs cutting.
:param max_pieces_in_inventory: Size of the inventory. If full, the oldest pieces get discarded.
:param raw_piece_size: Size of a fresh raw (= full-size) piece.
"""
def __init__(self, max_pieces_in_inventory: int, raw_piece_size: (int, int)):
self.max_pieces_in_inventory = max_pieces_in_inventory
self.raw_piece_size = raw_piece_size
self.pieces = []
# == Inventory management ==
def is_full(self) -> bool:
"""Checks weather all slots in the inventory are in use."""
return len(self.pieces) == self.max_pieces_in_inventory
def store_piece(self, piece: (int, int)) -> None:
"""Store the given piece.
:param piece: Piece to store.
"""
# If we would run out of storage space, discard the oldest piece first
if self.is_full():
self.pieces.pop(0)
self.pieces.append(piece)
def replenish_piece(self) -> None:
"""Add a fresh raw piece to inventory."""
self.store_piece(self.raw_piece_size)
# == Cutting ==
def cut(self, maze_action: Cutting2DMazeAction, ordered_piece: (int, int)) -> bool:
"""Attempt to perform the cutting. Remains of the cut piece are put back to inventory.
:param maze_action: the cutting maze_action to perform
:param ordered_piece: Dimensions of the piece that we should produce
:return True if the cutting was successful, False on error.
"""
if maze_action.rotate:
ordered_piece = ordered_piece[::-1]
# Check the piece ID is valid
if maze_action.piece_id >= len(self.pieces):
return False
# Check whether the cut is possible
if any([ordered_piece[dim] > available_size for dim, available_size
in enumerate(self.pieces[maze_action.piece_id])]):
return False
# Perform the cut
cutting_order = [1, 0] if maze_action.reverse_cutting_order else [0, 1]
piece_to_cut = list(self.pieces.pop(maze_action.piece_id))
for dim in cutting_order:
residual = piece_to_cut.copy()
residual[dim] = piece_to_cut[dim] - ordered_piece[dim]
piece_to_cut[dim] = ordered_piece[dim]
if residual[dim] > 0:
self.store_piece(tuple(residual))
return True
# == State representation ==
def size(self) -> int:
"""Current size of the inventory."""
return len(self.pieces)
2.3. MazeState and MazeAction¶
As motivated and explained in more detail in our tutorial on Customizing Core and Maze Envs CoreEnvs rely on MazeState and MazeAction objects for interacting with an agent.
For the present case this is a Cutting2DMazeState
class Cutting2DMazeState:
"""Cutting 2D environment MazeState representation.
:param inventory: A list of pieces in inventory.
:param max_pieces_in_inventory: Max number of pieces in inventory (inventory size).
:param current_demand: Piece that should be produced in the next step.
:param raw_piece_size: Size of a raw piece.
"""
def __init__(self, inventory: [(int, int)], max_pieces_in_inventory: int,
current_demand: (int, int), raw_piece_size: (int, int)):
self.inventory = inventory.copy()
self.max_pieces_in_inventory = max_pieces_in_inventory
self.current_demand = current_demand
self.raw_piece_size = raw_piece_size
and a Cutting2DMazeAction
defining which inventory piece
to cut in which cutting order and orientation.
class Cutting2DMazeAction:
"""Environment cutting MazeAction object.
:param piece_id: ID of the piece to cut.
:param rotate: Whether to rotate the ordered piece.
:param reverse_cutting_order: Whether to cut along Y axis first (not X first as normal).
"""
def __init__(self, piece_id: int, rotate: bool, reverse_cutting_order: bool):
self.piece_id = piece_id
self.rotate = rotate
self.reverse_cutting_order = reverse_cutting_order
These two classes are utilized in the CoreEnv code above.
2.4. Test Script¶
The following snippet will instantiate the environment and run it for 15 steps.
""" Test script CoreEnv """
from tutorial_maze_env.part01_core_env.env.core_env import Cutting2DCoreEnvironment
from tutorial_maze_env.part01_core_env.env.maze_action import Cutting2DMazeAction
def main():
# init and reset core environment
core_env = Cutting2DCoreEnvironment(max_pieces_in_inventory=200, raw_piece_size=[100, 100],
static_demand=(30, 15))
maze_state = core_env.reset()
# run interaction loop
for i in range(15):
# create cutting maze_action
maze_action = Cutting2DMazeAction(piece_id=0, rotate=False, reverse_cutting_order=False)
# take actual environment step
maze_state, reward, done, info = core_env.step(maze_action)
print(f"reward {reward} | done {done} | info {info}")
if __name__ == "__main__":
""" main """
main()
When running the script you should get the following command line output:
reward -1 | done False | info {'msg': 'valid_cut'}
reward 0 | done False | info {'msg': 'valid_cut'}
reward 0 | done False | info {'msg': 'valid_cut'}
...