# Agent

In [38]:
from rich import print
import random

from linguaml.tolearn.family import Family
from linguaml.tolearn.hp.bounds import NumericHPBounds
from linguaml.rl.action import ActionConfig, Action
from linguaml.rl.state import StateConfig, State

family = Family.SVC

numeric_hp_bounds = NumericHPBounds.from_dict({
    "C": (0.1, 100),
    "gamma": (0.1, 100),
    "tol": (1e-5, 1e-3)
})

ActionConfig.family = family
ActionConfig.numeric_hp_bounds = numeric_hp_bounds

StateConfig.lookback = 5

def generate_random_action():
    
    # Create an empty action
    action = Action()
    
    # Continuous actions
    for hp_name in family.numeric_hp_names():
            
        # Generate a random number in [0, 1]
        action[hp_name] = random.random()
        
    # Discrete actions
    for hp_name in family.categorical_hp_names():
        
        # Get the number of levels in the category
        n_levels = family.n_levels_in_category(hp_name)
        
        # Generate a random integer in [0, n_levels - 1]
        action[hp_name] = random.randint(0, n_levels - 1)
    
    return action

def generate_random_actions(n: int) -> list[Action]:
    
    actions = [
        generate_random_action()
        for _ in range(n)
    ]
    
    return actions

def generate_random_state() -> State:
    
    actions = generate_random_actions(StateConfig.lookback)
    
    rewards = [
        random.random()
        for _ in range(StateConfig.lookback)
    ]
    
    state = State.from_actions_and_rewards(actions, rewards)
    
    return state

def generate_random_states(n: int) -> list[State]:
    
    states = [
        generate_random_state()
        for _ in range(n)
    ]
    
    return states


In [34]:
generate_random_action()

{'C': 0.709065814257011,
 'gamma': 0.908919155394657,
 'tol': 0.8326055084880106,
 'kernel': 3,
 'decision_function_shape': 2}

## Selecting Actions

### Random Actions

In [9]:
from linguaml.rl.agent import Agent, ContinuousDistributionFamily
from linguaml.tolearn.family import Family

agent = Agent(
    family=Family.SVC,
    numeric_hp_bounds={
        "C": (0.1, 100),
        "gamma": (0.1, 100),
        "tol": (1e-5, 1e-3)
    },
    hidden_size=128,
    cont_dist_family=ContinuousDistributionFamily.NORMAL
)

print(agent.select_random_action())

### Single Action

Create an agent:

In [45]:
from linguaml.rl.agent import Agent, ContinuousDistributionFamily
from linguaml.tolearn.family import Family

# Create an agent
agent = Agent(
    family=Family.SVC,
    numeric_hp_bounds={
        "C": (0.1, 100),
        "gamma": (0.1, 100),
        "tol": (1e-5, 1e-3)
    },
    hidden_size=128,
    cont_dist_family=ContinuousDistributionFamily.NORMAL
)

# Generate a random state
state = generate_random_state()

# Select an action
action = agent.select_action(state)

print(action)

### Batched Actions

In [26]:
from linguaml.rl.agent import Agent, ContinuousDistributionFamily
from linguaml.tolearn.family import Family

agent = Agent(
    family=Family.SVC,
    numeric_hp_bounds={
        "C": (0.1, 100),
        "gamma": (0.1, 100),
        "tol": (1e-5, 1e-3)
    },
    hidden_size=128,
    cont_dist_family=ContinuousDistributionFamily.NORMAL
)

In [50]:
from linguaml.rl.state import StateConfig, State, BatchedStates
import random

# Set the look back period
StateConfig.lookback = 5

# Generate a list of random states
states = generate_random_states(10)

# Convert to batched states
batched_states = BatchedStates.from_states(states)
print(f"shape of data of batched states: {batched_states.data.shape}")

# Select actions
batched_actions = agent.select_action(batched_states)
print(batched_actions)
print(batched_actions.to_hp_configs())

## Log-Probabilities

### Single Data

In [39]:
from linguaml.rl.agent import Agent, ContinuousDistributionFamily
from linguaml.tolearn.family import Family

# Create an agent
agent = Agent(
    family=Family.SVC,
    numeric_hp_bounds={
        "C": (0.1, 100),
        "gamma": (0.1, 100),
        "tol": (1e-5, 1e-3)
    },
    hidden_size=128,
    cont_dist_family=ContinuousDistributionFamily.NORMAL
)

# Create a random state
state = generate_random_state()

# Select an action
action = agent.select_action(state)

Without providing the argument `state` in agent's method `log_prob`, we compute the log-probability of the action taken based on the latest state:

In [42]:
# Get the log probability of the action based on the latest state
log_prob = agent.log_prob(action)

print(log_prob)

Of course, this is equivalent to:

In [43]:
# Get the log probability of the action based on the provided state
log_prob = agent.log_prob(action, state)

print(log_prob)

But by passing the `state`, the agent regenerated the distributions for selecting the actions by calling the `forward` method. Hence, you may neglect the argument `state` and save some time if you indeed want to compute the log-probability based on the latest state.

### Batched Data

In [44]:
from linguaml.rl.agent import Agent
from linguaml.tolearn.family import Family
from linguaml.rl.state import BatchedStates

# Create an agent
agent = Agent(
    family=Family.SVC,
    numeric_hp_bounds={
        "C": (0.1, 100),
        "gamma": (0.1, 100),
        "tol": (1e-5, 1e-3)
    },
    hidden_size=128,
    cont_dist_family=ContinuousDistributionFamily.NORMAL
)


# Create a random batch of states
batched_states = BatchedStates.from_states(generate_random_states(10))

# Select batched actions
batched_actions = agent.select_action(batched_states)

# Compute the log probabilities of the batched actions
log_probs = agent.log_prob(batched_actions)

print(log_probs)