Learning and Choice functions

These functions define the main learning and choice processes. The learning function rw.fun() is the Rescorla-Wagner prediction error learning rule. The choice function softmax.fun() is softmax.

# Rescorla-Wagner prediction error updating function
rw.fun <- function(exp.prior = c(5, 3, 7),      # A vector of prior expectations
                   new.inf = c(NA, 2, NA),        # A vector of new information (NAs except for selected option)
                   alpha = .3) {   # Updating rate
  
  # Save new expectations as prior
  exp.new <- exp.prior
  
  # Determine which option was selected
  selection <- which(is.finite(new.inf))
  
  # Update expectation of selected option
  exp.new[selection] <- exp.prior[selection] + alpha * (new.inf[selection] - exp.prior[selection])
  
  return(exp.new)
  
}

# Softmax selection function
softmax.fun <- function(exp.current = c(5, 3, 6),
                        theta = .5
) {
  
  output <- exp(exp.current * theta) / sum(exp(exp.current * theta))
  
  return(output)
  
}

Main simulation function

The main simulation funciton is rl.sim.fun(). The function returns a dataframe containing the main results of the agent (and plots the agent’s cumulative earnings when plot = TRUE)

# Create main simulation function
rl.sim.fun <- function(n.trials = 100,     # Trials in game
                       option.mean = c(1, -1, 0),   # Mean of each option
                       option.sd = c(2, 10, .01),   # SD of each option
                       prior.exp.start = rep(0, 3), 
                       theta = .5, 
                       alpha = .2, 
                       plot = TRUE) {

# Get some game parameters from inputs
n.options <- length(option.mean)

# Create outcome matrix giving the outcome on each trial for each option
outcome.mtx <- matrix(NA, nrow = n.trials, ncol = n.options)

for(option.i in 1:n.options) {
  
  outcome.mtx[,option.i] <- rnorm(n = n.trials, 
                                  mean = option.mean[option.i], 
                                  sd = option.sd[option.i])
  
}

# Create exp.prior and exp.new matrices
#  These will hold the agent's expectation (either prior or new) 
#   of each option on each trial

exp.prior.mtx <- matrix(NA, nrow = n.trials, ncol = n.options)
exp.prior.mtx[1,] <- prior.exp.start
exp.new.mtx <- matrix(NA, nrow = n.trials, ncol = n.options)

# Now create some matrices to store values

selection.v <- rep(NA, n.trials)      # Actual selections
outcome.v <- rep(NA, n.trials)        # Actual outcomes
selprob.mtx <- matrix(NA,             # Selection probabilities
                      nrow = n.trials, 
                      ncol = n.options)

# RUN SIMULATION!

for(trial.i in 1:n.trials) {
  
  # STEP 0: Get prior expectations for current trial
  
  exp.prior.i <- exp.prior.mtx[trial.i,]
  
  # STEP 1: SELECT AN OPTION
  
  # Selection probabilities
  
  selprob.i <- softmax.fun(exp.current = exp.prior.i, 
                           theta = theta)
  
  # Select an option
  
  selection.i <- sample(1:n.options, size = 1, prob = selprob.i)
  
  # Get outcome from selected option
  
  outcome.i <- outcome.mtx[trial.i, selection.i]
  
  # STEP 3: CREATE NEW EXPECTANCIES
  
  # Create a new.inf vector with NAs except for outcome of selected option
  
  new.inf <- rep(NA, n.options)
  new.inf[selection.i] <- outcome.i
  
  # Get new expectancies
  new.exp.i <- rw.fun(exp.prior = exp.prior.i,
                      new.inf = new.inf,
                      alpha = alpha)
  
  # assign new expectatations to exp.new.mtx[trial.i,]
  #  and prior.expecation.mtx[trial.i + 1,]
  
  exp.new.mtx[trial.i,] <- new.exp.i
  
  if(trial.i < n.trials) {
  
  exp.prior.mtx[trial.i + 1,] <- new.exp.i
  
  }
  
  # Save some values
  
  selprob.mtx[trial.i,] <- selprob.i  # Selection probabilities
  selection.v[trial.i] <- selection.i # Actual selection
  outcome.v[trial.i] <- outcome.i     # Actual outcome
}

# Put main results in a single dataframe called sim.result.df

sim.result.df <- data.frame("selection" = selection.v, 
                           "outcome" = outcome.v,
                           "outcome.cum" = cumsum(outcome.v),
                           stringsAsFactors = FALSE)

# Should simulation be plotted?

if(plot == TRUE) {
  
  plot(1, 
       xlim = c(1, n.trials), 
       ylim = c(min(option.mean) * n.trials,  # Set limits to worst and best exepected performance
                max(option.mean) * n.trials), 
       type = "n",
       ylab = "Cumulative Rewards",
       xlab = "Trial",
       main = paste0("alpha = ", alpha, ", theta = ", theta))
  
  rect(-1e3, -1e3, 1e3, 1e3, col = gray(.96))
  abline(h = seq(-1e3, 1e3, by = 10), 
         v = seq(-1e3, 1e3, by = 10),
         lwd = c(2, 1), col = "white")
  
  points(x = 1:n.trials,
         y = sim.result.df$outcome.cum,
         type = "b")
  
}


# Now return the main simulation dataframe

return(sim.result.df)
}

Testing the simulation

Now let’s test some different values of theta.

Here’s a high exploiter with theta = 1

set.seed(100) # For replicability
exploiter.sim <- rl.sim.fun(theta = 1)

Here’s a high–explorer with theta = .1

set.seed(100) # For replicability
explorer.sim <- rl.sim.fun(theta = .1)

Now let’s try a totally random person with theta = 0

set.seed(100) # For replicability
random.sim <- rl.sim.fun(theta = 0)

Simulating multiple agents

We can easily simulate multiple agents performance by creating another function called many.agent.fun()

# simulate many agents with the same theta value

many.agent.fun <- function(n.agent,      # Number of agents
                           theta) {      # Agent's theta
  
  sapply(1:n.agent, FUN = function(x) {
  
  sim.result.i <- rl.sim.fun(theta = theta, 
                             plot = FALSE)
  
  final.reward.i <- sim.result.i$outcome.cum[nrow(sim.result.i)]
  
  return(final.reward.i)
  
}
  )
}

Now let’s use our many.agent.fun() to simulate the result of 100 agents using either a high exploitation theta = 1, medium exploration theta = .5, or extreme exploration (e.g.; random) theta = 0 strategy:

# Number of agents
n.agent <- 100

# Create a vector of exploit, explore, and random agent outcomes
exploit.agents <- many.agent.fun(n.agent, theta = 1)
explore.agents <- many.agent.fun(n.agent, theta = .5)
random.agents <- many.agent.fun(n.agent, theta = 0)

# Put results together
many.agent.df <- data.frame(outcome = c(exploit.agents, explore.agents, random.agents),
                            group = rep(c("exploit", "explore", "random"), each = n.agent),
                            stringsAsFactors = FALSE
                            )

# Plot simulation results!
library(yarrr)
pirateplot(formula = outcome ~ group, 
           data = many.agent.df)

Example reinforcement Learning Code

Nathaniel Phillips

10 November 2016

Learning and Choice functions

Main simulation function

Testing the simulation

Simulating multiple agents