Source code for jobshoplab.env.factories.rewards
from abc import ABC, abstractmethod
from logging import Logger
from jobshoplab.types import Config, InstanceConfig, StateMachineResult
from jobshoplab.types.state_types import NoTime
from jobshoplab.utils import calculate_lower_bound
from jobshoplab.utils.exceptions import InvalidValue
from jobshoplab.utils.logger import get_logger
[docs]
class RewardFactory(ABC):
"""
Abstract base class for reward factories.
Args:
loglevel (int): The log level.
config (Config): The configuration object.
instance (InstanceConfig): The instance configuration object.
Raises:
NotImplementedError: If the method has not been implemented.
"""
[docs]
@abstractmethod
def __init__(
self, loglevel: int | str, config: Config, instance: InstanceConfig, *args, **kwargs
):
"""
Initialize the RewardFactory.
Args:
loglevel (int): The log level.
config (Config): The configuration object.
"""
self.logger: Logger = get_logger(__name__, loglevel)
self.config: Config = config
self.instance: InstanceConfig = instance
[docs]
@abstractmethod
def make(self, state: StateMachineResult, terminated: bool, truncated: bool) -> float:
"""
Create a reward based on the given state.
Args:
state (State): The state to create the reward from.
Returns:
float: The created reward.
"""
@abstractmethod
def __repr__(self) -> str:
"""
Return a string representation of the RewardFactory.
Returns:
str: The string representation of the RewardFactory.
"""
return ""
[docs]
class DummyRewardFactory(RewardFactory):
"""
A dummy reward factory for testing purposes.
"""
[docs]
def __init__(self, loglevel: int, config: Config, instance: InstanceConfig, *args, **kwargs):
"""
Initialize the DummyRewardFactory.
Args:
loglevel (int): The log level.
config (Config): The configuration object.
instance (InstanceConfig): The instance configuration object.
"""
super().__init__(loglevel, config, instance)
self.logger.info("DummyRewardFactory initialized.")
[docs]
def make(self, state: StateMachineResult, done: bool) -> float:
"""
Create a dummy reward.
Args:
state (State): The state to create the reward from.
Returns:
float: The created reward.
"""
self.logger.debug("Creating dummy reward.")
return 0.0
[docs]
class BinaryActionJsspReward(RewardFactory):
[docs]
def __init__(
self,
loglevel: int | str,
config: Config,
instance: InstanceConfig,
sparse_bias: float,
dense_bias: float,
truncation_bias: float,
max_allowed_time: int,
):
self.sparse_bias = sparse_bias
self.dense_bias = dense_bias
self.truncation_bias = truncation_bias
self.max_allowed_time = max_allowed_time
self.lower_bound = calculate_lower_bound(instance)
self.instance = instance
self.no_op_counter = 0
self.total_no_ops = 0
self.total_actions = 0
self.num_operations = len(
[o for job in instance.instance.specification for o in job.operations]
)
super().__init__(loglevel, config, instance)
def _truncation_reward(self) -> float:
return 1
def _sparse_reward(self, state: StateMachineResult, terminated, truncated) -> float:
if truncated:
return (
self.truncation_bias * self._truncation_reward()
) / self.sparse_bias # devided by sparse bias to make sure not to overlay sparse bias (gets multiplied by sparse bias in make method)
if not terminated:
return 0.0
# terminated
if isinstance(state.state.time, NoTime):
raise InvalidValue("time", state.state.time, "NoTime")
time = state.state.time.time
# makespan based
return (self.max_allowed_time - time) / (self.max_allowed_time - self.lower_bound)
def _dense_reward(self, state: StateMachineResult) -> float:
self.total_actions += 1
if len(state.action.transitions) == 0:
self.no_op_counter += 1
self.total_no_ops += 1
else:
self.no_op_counter = 0
return (
-int(self.no_op_counter >= len(self.instance.instance.specification))
/ self.num_operations
)
# ) + (int(len(state.action.transitions) == 0) / self.num_operations)
[docs]
def make(self, state: StateMachineResult, terminated, truncated) -> float:
s_reward = self._sparse_reward(state, terminated, truncated)
d_reward = self._dense_reward(state)
return s_reward * self.sparse_bias + d_reward * self.dense_bias
def __repr__(self) -> str:
return (
f"BinaryActionJsspReward(sparse_bias={self.sparse_bias}, dense_bias={self.dense_bias})"
)