State

State#

State Configuration#

Before creating State or BatchedStates instances, you need to set up the abstract class StateConfig.

from linguaml.rl.state import StateConfig

StateConfig.lookback = 5

The lookback attribute is the number of hyperparameters the agent will look back before selecting an action.

State Unit#

from linguaml.rl.action import ActionConfig, Action
from linguaml.tolearn.family import Family
from linguaml.tolearn.hp.bounds import NumericHPBounds
from rich import print


ActionConfig.family = Family.SVC
ActionConfig.numeric_hp_bounds = NumericHPBounds.from_dict({
    "C": (0.1, 100),
    "gamma": (0.1, 100),
    "tol": (1e-5, 1e-3)
})

action = Action({
    "C": 0.1,
    "gamma": 0.5,
    "tol": 0.1,
    "kernel": 0,
    "decision_function_shape": 1
})

print(action)

{'C': 0.1, 'gamma': 0.5, 'tol': 0.1, 'kernel': 0, 'decision_function_shape': 1}

from linguaml.rl.state import StateUnit

state_unit = StateUnit.from_action_and_reward(action, 0.7)

print(state_unit.data)

[0.1 0.5 0.1 1.  0.  0.  0.  0.  1.  0.7]

from linguaml.tolearn.performance import PerformanceResult

performance_result = PerformanceResult(
    hp_config=action.to_hp_config(),
    accuracy=0.7
)

print(performance_result)

PerformanceResult(
    hp_config=SVCConfig(
        C=10.090000000000002,
        kernel='linear',
        gamma=50.050000000000004,
        tol=0.000109,
        decision_function_shape='ovr'
    ),
    accuracy=0.7
)

print(StateUnit.from_performance_result(performance_result).data)

[0.1 0.5 0.1 1.  0.  0.  0.  0.  1.  0.7]

Single State#

from linguaml.rl.action import BatchedActions
import numpy as np

# Generate random actions
actions = BatchedActions.from_dict({
    "C": np.random.random(StateConfig.lookback),
    "kernel": np.random.randint(0, 4, size=StateConfig.lookback),
    "gamma": np.random.random(StateConfig.lookback),
    "tol": np.random.random(StateConfig.lookback),
    "decision_function_shape": np.random.randint(0, 2, size=StateConfig.lookback)
}).to_actions()

# Generate random rewards
rewards = np.random.random(StateConfig.lookback)

print("actinos:")
print(actions)

print("rewards: ")
print(rewards)

actinos:

[
    {
        'C': 0.6825775208926002,
        'kernel': 2,
        'gamma': 0.13638174258056224,
        'tol': 0.4780416403171879,
        'decision_function_shape': 0
    },
    {
        'C': 0.23265524742923316,
        'kernel': 0,
        'gamma': 0.10378297263227121,
        'tol': 0.11241113457753771,
        'decision_function_shape': 0
    },
    {
        'C': 0.4898338475826851,
        'kernel': 3,
        'gamma': 0.9900253689106882,
        'tol': 0.4375667352064053,
        'decision_function_shape': 0
    },
    {
        'C': 0.4554767391166977,
        'kernel': 2,
        'gamma': 0.11806533636847161,
        'tol': 0.8105868302619496,
        'decision_function_shape': 1
    },
    {
        'C': 0.11504021318468338,
        'kernel': 1,
        'gamma': 0.7601909534247265,
        'tol': 0.18433109624367716,
        'decision_function_shape': 0
    }
]

rewards:

[0.34778118 0.39122254 0.50390384 0.50052495 0.34659101]

It is recommended to construct a State instance using the from_actions_and_rewards or from_action_reward_pairs class methods.

From Actions and Rewards#

Construct a state via from_actions_and_rewards:

from linguaml.rl.state import State

# Construct a state
state = State.from_actions_and_rewards(actions, rewards)

# Check that state's data
state.data

array([[0.68257752, 0.13638174, 0.47804164, 0.        , 0.        ,
        1.        , 0.        , 1.        , 0.        , 0.34778118],
       [0.23265525, 0.10378297, 0.11241113, 1.        , 0.        ,
        0.        , 0.        , 1.        , 0.        , 0.39122254],
       [0.48983385, 0.99002537, 0.43756674, 0.        , 0.        ,
        0.        , 1.        , 1.        , 0.        , 0.50390384],
       [0.45547674, 0.11806534, 0.81058683, 0.        , 0.        ,
        1.        , 0.        , 0.        , 1.        , 0.50052495],
       [0.11504021, 0.76019095, 0.1843311 , 0.        , 1.        ,
        0.        , 0.        , 1.        , 0.        , 0.34659101]])

Construct a state via from_action_reward_pairs:

action_reward_pairs = list(zip(actions, rewards))
print(action_reward_pairs)

[
    (
        {
            'C': 0.6825775208926002,
            'kernel': 2,
            'gamma': 0.13638174258056224,
            'tol': 0.4780416403171879,
            'decision_function_shape': 0
        },
        0.34778118005581027
    ),
    (
        {
            'C': 0.23265524742923316,
            'kernel': 0,
            'gamma': 0.10378297263227121,
            'tol': 0.11241113457753771,
            'decision_function_shape': 0
        },
        0.39122253937936624
    ),
    (
        {
            'C': 0.4898338475826851,
            'kernel': 3,
            'gamma': 0.9900253689106882,
            'tol': 0.4375667352064053,
            'decision_function_shape': 0
        },
        0.50390383582081
    ),
    (
        {
            'C': 0.4554767391166977,
            'kernel': 2,
            'gamma': 0.11806533636847161,
            'tol': 0.8105868302619496,
            'decision_function_shape': 1
        },
        0.5005249473431244
    ),
    (
        {
            'C': 0.11504021318468338,
            'kernel': 1,
            'gamma': 0.7601909534247265,
            'tol': 0.18433109624367716,
            'decision_function_shape': 0
        },
        0.3465910074359839
    )
]

state = State.from_action_and_reward_pairs(action_reward_pairs)

print(state.data)

[[0.68257752 0.13638174 0.47804164 0.         0.         1.
  0.         1.         0.         0.34778118]
 [0.23265525 0.10378297 0.11241113 1.         0.         0.
  0.         1.         0.         0.39122254]
 [0.48983385 0.99002537 0.43756674 0.         0.         0.
  1.         1.         0.         0.50390384]
 [0.45547674 0.11806534 0.81058683 0.         0.         1.
  0.         0.         1.         0.50052495]
 [0.11504021 0.76019095 0.1843311  0.         1.         0.
  0.         1.         0.         0.34659101]]

From Performance Results#

The internal implementation of from_performance_results first converts each PerformanceResult to actions and rewards, and then calls from_actions_and_rewards.

Hence, it is recommended to use from_actions_and_rewards directly. But if the actions and rewards are not available, there is no choice but to use from_performance_results.

performance_results = [
    PerformanceResult(
        hp_config=action.to_hp_config(),
        accuracy=reward
    )
    for action, reward in action_reward_pairs
]

print(performance_results)

[
    PerformanceResult(
        hp_config=SVCConfig(
            C=68.28949433717075,
            kernel='rbf',
            gamma=13.724536083798167,
            tol=0.00048326122391401604,
            decision_function_shape='ovo'
        ),
        accuracy=0.34778118005581027
    ),
    PerformanceResult(
        hp_config=SVCConfig(
            C=23.342259218180395,
            kernel='linear',
            gamma=10.467918965963895,
            tol=0.00012128702323176233,
            decision_function_shape='ovo'
        ),
        accuracy=0.39122253937936624
    ),
    PerformanceResult(
        hp_config=SVCConfig(
            C=49.03440137351025,
            kernel='sigmoid',
            gamma=99.00353435417775,
            tol=0.0004431910678543413,
            decision_function_shape='ovo'
        ),
        accuracy=0.50390383582081
    ),
    PerformanceResult(
        hp_config=SVCConfig(
            C=45.60212623775811,
            kernel='rbf',
            gamma=11.894727103210315,
            tol=0.0008124809619593301,
            decision_function_shape='ovr'
        ),
        accuracy=0.5005249473431244
    ),
    PerformanceResult(
        hp_config=SVCConfig(
            C=11.59251729714987,
            kernel='poly',
            gamma=76.04307624713017,
            tol=0.0001924877852812404,
            decision_function_shape='ovo'
        ),
        accuracy=0.3465910074359839
    )
]

state = State.from_performance_results(performance_results)

print(state.data)

[[0.68257752 0.13638174 0.47804164 0.         0.         1.
  0.         1.         0.         0.34778118]
 [0.23265525 0.10378297 0.11241113 1.         0.         0.
  0.         1.         0.         0.39122254]
 [0.48983385 0.99002537 0.43756674 0.         0.         0.
  1.         1.         0.         0.50390384]
 [0.45547674 0.11806534 0.81058683 0.         0.         1.
  0.         0.         1.         0.50052495]
 [0.11504021 0.76019095 0.1843311  0.         1.         0.
  0.         1.         0.         0.34659101]]

To PyTorch Tensor#

print(state.to_tensor())

tensor([[0.6826, 0.1364, 0.4780, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000,
         0.3478],
        [0.2327, 0.1038, 0.1124, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000,
         0.3912],
        [0.4898, 0.9900, 0.4376, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 0.0000,
         0.5039],
        [0.4555, 0.1181, 0.8106, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000,
         0.5005],
        [0.1150, 0.7602, 0.1843, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000,
         0.3466]])

Batched States#

from linguaml.rl.state import BatchedStates

batched_states = BatchedStates.from_states([state, state])

print(batched_states.data)
print(f"shape: {batched_states.data.shape}")

[[[0.68257752 0.13638174 0.47804164 0.         0.         1.
   0.         1.         0.         0.34778118]
  [0.23265525 0.10378297 0.11241113 1.         0.         0.
   0.         1.         0.         0.39122254]
  [0.48983385 0.99002537 0.43756674 0.         0.         0.
   1.         1.         0.         0.50390384]
  [0.45547674 0.11806534 0.81058683 0.         0.         1.
   0.         0.         1.         0.50052495]
  [0.11504021 0.76019095 0.1843311  0.         1.         0.
   0.         1.         0.         0.34659101]]

 [[0.68257752 0.13638174 0.47804164 0.         0.         1.
   0.         1.         0.         0.34778118]
  [0.23265525 0.10378297 0.11241113 1.         0.         0.
   0.         1.         0.         0.39122254]
  [0.48983385 0.99002537 0.43756674 0.         0.         0.
   1.         1.         0.         0.50390384]
  [0.45547674 0.11806534 0.81058683 0.         0.         1.
   0.         0.         1.         0.50052495]
  [0.11504021 0.76019095 0.1843311  0.         1.         0.
   0.         1.         0.         0.34659101]]]

shape: (2, 5, 10)

print(batched_states.to_tensor())

tensor([[[0.6826, 0.1364, 0.4780, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000,
          0.0000, 0.3478],
         [0.2327, 0.1038, 0.1124, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000,
          0.0000, 0.3912],
         [0.4898, 0.9900, 0.4376, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000,
          0.0000, 0.5039],
         [0.4555, 0.1181, 0.8106, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000,
          1.0000, 0.5005],
         [0.1150, 0.7602, 0.1843, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000,
          0.0000, 0.3466]],

        [[0.6826, 0.1364, 0.4780, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000,
          0.0000, 0.3478],
         [0.2327, 0.1038, 0.1124, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000,
          0.0000, 0.3912],
         [0.4898, 0.9900, 0.4376, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000,
          0.0000, 0.5039],
         [0.4555, 0.1181, 0.8106, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000,
          1.0000, 0.5005],
         [0.1150, 0.7602, 0.1843, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000,
          0.0000, 0.3466]]])