State#
State Configuration#
Before creating State or BatchedStates instances, you need to set up the abstract class StateConfig.
from linguaml.rl.state import StateConfig
StateConfig.lookback = 5
The lookback attribute is the number of hyperparameters the agent will look back before selecting an action.
State Unit#
from linguaml.rl.action import ActionConfig, Action
from linguaml.tolearn.family import Family
from linguaml.tolearn.hp.bounds import NumericHPBounds
from rich import print
ActionConfig.family = Family.SVC
ActionConfig.numeric_hp_bounds = NumericHPBounds.from_dict({
"C": (0.1, 100),
"gamma": (0.1, 100),
"tol": (1e-5, 1e-3)
})
action = Action({
"C": 0.1,
"gamma": 0.5,
"tol": 0.1,
"kernel": 0,
"decision_function_shape": 1
})
print(action)
{'C': 0.1, 'gamma': 0.5, 'tol': 0.1, 'kernel': 0, 'decision_function_shape': 1}
from linguaml.rl.state import StateUnit
state_unit = StateUnit.from_action_and_reward(action, 0.7)
print(state_unit.data)
[0.1 0.5 0.1 1. 0. 0. 0. 0. 1. 0.7]
from linguaml.tolearn.performance import PerformanceResult
performance_result = PerformanceResult(
hp_config=action.to_hp_config(),
accuracy=0.7
)
print(performance_result)
PerformanceResult( hp_config=SVCConfig( C=10.090000000000002, kernel='linear', gamma=50.050000000000004, tol=0.000109, decision_function_shape='ovr' ), accuracy=0.7 )
print(StateUnit.from_performance_result(performance_result).data)
[0.1 0.5 0.1 1. 0. 0. 0. 0. 1. 0.7]
Single State#
from linguaml.rl.action import BatchedActions
import numpy as np
# Generate random actions
actions = BatchedActions.from_dict({
"C": np.random.random(StateConfig.lookback),
"kernel": np.random.randint(0, 4, size=StateConfig.lookback),
"gamma": np.random.random(StateConfig.lookback),
"tol": np.random.random(StateConfig.lookback),
"decision_function_shape": np.random.randint(0, 2, size=StateConfig.lookback)
}).to_actions()
# Generate random rewards
rewards = np.random.random(StateConfig.lookback)
print("actinos:")
print(actions)
print("rewards: ")
print(rewards)
actinos:
[ { 'C': 0.6825775208926002, 'kernel': 2, 'gamma': 0.13638174258056224, 'tol': 0.4780416403171879, 'decision_function_shape': 0 }, { 'C': 0.23265524742923316, 'kernel': 0, 'gamma': 0.10378297263227121, 'tol': 0.11241113457753771, 'decision_function_shape': 0 }, { 'C': 0.4898338475826851, 'kernel': 3, 'gamma': 0.9900253689106882, 'tol': 0.4375667352064053, 'decision_function_shape': 0 }, { 'C': 0.4554767391166977, 'kernel': 2, 'gamma': 0.11806533636847161, 'tol': 0.8105868302619496, 'decision_function_shape': 1 }, { 'C': 0.11504021318468338, 'kernel': 1, 'gamma': 0.7601909534247265, 'tol': 0.18433109624367716, 'decision_function_shape': 0 } ]
rewards:
[0.34778118 0.39122254 0.50390384 0.50052495 0.34659101]
It is recommended to construct a State instance using the from_actions_and_rewards or from_action_reward_pairs class methods.
From Actions and Rewards#
Construct a state via from_actions_and_rewards:
from linguaml.rl.state import State
# Construct a state
state = State.from_actions_and_rewards(actions, rewards)
# Check that state's data
state.data
array([[0.68257752, 0.13638174, 0.47804164, 0. , 0. ,
1. , 0. , 1. , 0. , 0.34778118],
[0.23265525, 0.10378297, 0.11241113, 1. , 0. ,
0. , 0. , 1. , 0. , 0.39122254],
[0.48983385, 0.99002537, 0.43756674, 0. , 0. ,
0. , 1. , 1. , 0. , 0.50390384],
[0.45547674, 0.11806534, 0.81058683, 0. , 0. ,
1. , 0. , 0. , 1. , 0.50052495],
[0.11504021, 0.76019095, 0.1843311 , 0. , 1. ,
0. , 0. , 1. , 0. , 0.34659101]])
Construct a state via from_action_reward_pairs:
action_reward_pairs = list(zip(actions, rewards))
print(action_reward_pairs)
[ ( { 'C': 0.6825775208926002, 'kernel': 2, 'gamma': 0.13638174258056224, 'tol': 0.4780416403171879, 'decision_function_shape': 0 }, 0.34778118005581027 ), ( { 'C': 0.23265524742923316, 'kernel': 0, 'gamma': 0.10378297263227121, 'tol': 0.11241113457753771, 'decision_function_shape': 0 }, 0.39122253937936624 ), ( { 'C': 0.4898338475826851, 'kernel': 3, 'gamma': 0.9900253689106882, 'tol': 0.4375667352064053, 'decision_function_shape': 0 }, 0.50390383582081 ), ( { 'C': 0.4554767391166977, 'kernel': 2, 'gamma': 0.11806533636847161, 'tol': 0.8105868302619496, 'decision_function_shape': 1 }, 0.5005249473431244 ), ( { 'C': 0.11504021318468338, 'kernel': 1, 'gamma': 0.7601909534247265, 'tol': 0.18433109624367716, 'decision_function_shape': 0 }, 0.3465910074359839 ) ]
state = State.from_action_and_reward_pairs(action_reward_pairs)
print(state.data)
[[0.68257752 0.13638174 0.47804164 0. 0. 1. 0. 1. 0. 0.34778118] [0.23265525 0.10378297 0.11241113 1. 0. 0. 0. 1. 0. 0.39122254] [0.48983385 0.99002537 0.43756674 0. 0. 0. 1. 1. 0. 0.50390384] [0.45547674 0.11806534 0.81058683 0. 0. 1. 0. 0. 1. 0.50052495] [0.11504021 0.76019095 0.1843311 0. 1. 0. 0. 1. 0. 0.34659101]]
From Performance Results#
The internal implementation of from_performance_results first converts each PerformanceResult to actions and rewards, and then calls from_actions_and_rewards.
Hence, it is recommended to use from_actions_and_rewards directly. But if the actions and rewards are not available, there is no choice but to use from_performance_results.
performance_results = [
PerformanceResult(
hp_config=action.to_hp_config(),
accuracy=reward
)
for action, reward in action_reward_pairs
]
print(performance_results)
[ PerformanceResult( hp_config=SVCConfig( C=68.28949433717075, kernel='rbf', gamma=13.724536083798167, tol=0.00048326122391401604, decision_function_shape='ovo' ), accuracy=0.34778118005581027 ), PerformanceResult( hp_config=SVCConfig( C=23.342259218180395, kernel='linear', gamma=10.467918965963895, tol=0.00012128702323176233, decision_function_shape='ovo' ), accuracy=0.39122253937936624 ), PerformanceResult( hp_config=SVCConfig( C=49.03440137351025, kernel='sigmoid', gamma=99.00353435417775, tol=0.0004431910678543413, decision_function_shape='ovo' ), accuracy=0.50390383582081 ), PerformanceResult( hp_config=SVCConfig( C=45.60212623775811, kernel='rbf', gamma=11.894727103210315, tol=0.0008124809619593301, decision_function_shape='ovr' ), accuracy=0.5005249473431244 ), PerformanceResult( hp_config=SVCConfig( C=11.59251729714987, kernel='poly', gamma=76.04307624713017, tol=0.0001924877852812404, decision_function_shape='ovo' ), accuracy=0.3465910074359839 ) ]
state = State.from_performance_results(performance_results)
print(state.data)
[[0.68257752 0.13638174 0.47804164 0. 0. 1. 0. 1. 0. 0.34778118] [0.23265525 0.10378297 0.11241113 1. 0. 0. 0. 1. 0. 0.39122254] [0.48983385 0.99002537 0.43756674 0. 0. 0. 1. 1. 0. 0.50390384] [0.45547674 0.11806534 0.81058683 0. 0. 1. 0. 0. 1. 0.50052495] [0.11504021 0.76019095 0.1843311 0. 1. 0. 0. 1. 0. 0.34659101]]
To PyTorch Tensor#
print(state.to_tensor())
tensor([[0.6826, 0.1364, 0.4780, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.3478], [0.2327, 0.1038, 0.1124, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.3912], [0.4898, 0.9900, 0.4376, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 0.0000, 0.5039], [0.4555, 0.1181, 0.8106, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.5005], [0.1150, 0.7602, 0.1843, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.3466]])
Batched States#
from linguaml.rl.state import BatchedStates
batched_states = BatchedStates.from_states([state, state])
print(batched_states.data)
print(f"shape: {batched_states.data.shape}")
[[[0.68257752 0.13638174 0.47804164 0. 0. 1. 0. 1. 0. 0.34778118] [0.23265525 0.10378297 0.11241113 1. 0. 0. 0. 1. 0. 0.39122254] [0.48983385 0.99002537 0.43756674 0. 0. 0. 1. 1. 0. 0.50390384] [0.45547674 0.11806534 0.81058683 0. 0. 1. 0. 0. 1. 0.50052495] [0.11504021 0.76019095 0.1843311 0. 1. 0. 0. 1. 0. 0.34659101]] [[0.68257752 0.13638174 0.47804164 0. 0. 1. 0. 1. 0. 0.34778118] [0.23265525 0.10378297 0.11241113 1. 0. 0. 0. 1. 0. 0.39122254] [0.48983385 0.99002537 0.43756674 0. 0. 0. 1. 1. 0. 0.50390384] [0.45547674 0.11806534 0.81058683 0. 0. 1. 0. 0. 1. 0.50052495] [0.11504021 0.76019095 0.1843311 0. 1. 0. 0. 1. 0. 0.34659101]]]
shape: (2, 5, 10)
print(batched_states.to_tensor())
tensor([[[0.6826, 0.1364, 0.4780, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.3478], [0.2327, 0.1038, 0.1124, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.3912], [0.4898, 0.9900, 0.4376, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 0.0000, 0.5039], [0.4555, 0.1181, 0.8106, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.5005], [0.1150, 0.7602, 0.1843, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.3466]], [[0.6826, 0.1364, 0.4780, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.3478], [0.2327, 0.1038, 0.1124, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.3912], [0.4898, 0.9900, 0.4376, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 0.0000, 0.5039], [0.4555, 0.1181, 0.8106, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.5005], [0.1150, 0.7602, 0.1843, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.3466]]])