Model

Let’s assume an environment with four options to choose from (i.e. a four armed bandit). You have a given time intervall in which you can choose from the options (i.e. a fixed number of trials). When the time limit is reached you need to have a minimum number of points cumulated from the choices made during the time intervall. You will only survive (i.e. receive a bonus) if your number of accumulated points exceeds or is at least as high as the minimum (goal). The model assumes that you draw a number of samples from your experience (i.e. all the draws you made from one option). These samples will be multiplied by the number of trials left and the proportion of the ones that exceed or are equal to the difference from your current state of points to the goal. The option with the highest proportion of success is then chosen with the highest probability (i.e. the proportions are fed into a softmax). If two proportions are the same you choose the one with the higher mean.

# choice sensitivity function
theta.fun <- function(trial, gamma) log10(trial) * gamma 

#---------------------------------------

# Softmax selection function
softmax.fun <- function(exp.current = c(5, 3, 6),
                        theta = .5) {
  
  output <- exp(exp.current * theta) / sum(exp(exp.current * theta))
  
  return(output)
  
}

#---------------------------------------

sim.fun <- function(n.trials = 50,
                    option.means = c(10, 8, 6),
                    option.sd = c(3, 3, 3),
                    option.tau = c(0, 0, .05),
                    gamma = .2,
                    alpha = .3,
                    sample.cap = 5,
                    goal = 250,
                    prior.exp.start = rep(0.5, 3),
                    plot = FALSE){
  
  n.options <- length(option.means)
  
  # Create outcome matrix giving the outcome on each trial for each option
  outcome.mtx <- matrix(NA, nrow = n.trials, ncol = n.options)
  
  for(option.i in 1:n.options) {
    
    outcome.mtx[,option.i] <- rexgauss(n = n.trials, 
                                    mu = option.means[option.i], 
                                    sigma = option.sd[option.i],
                                    tau = option.tau[option.i],
                                    positive = FALSE)
    
  }
  
  # Create exp.prior and exp.new matrices
  #  These will hold the agent's expectation (either prior or new) 
  #   of each option on each trial
  
  goal.prob.mat <- matrix(NA, nrow = n.trials, ncol = n.options)
  goal.prob.mat[1,] <- prior.exp.start
  sample.mat <- matrix(NA, nrow = n.trials, ncol = n.options)
  sample.mat[1,] <- rep(0, n.options)
  mean.mat <- matrix(NA, nrow = n.trials, ncol = n.options)
  mean.mat[1,] <- rep(0, n.options)
  
  # Now create some matrices to store values
  
  selection.v <- rep(NA, n.trials)      # Actual selections
  outcome.v <- rep(NA, n.trials)        # Actual outcomes
  selprob.mtx <- matrix(NA,             # Selection probabilities
                        nrow = n.trials, 
                        ncol = n.options)
  
  # RUN SIMULATION!
  
  for(trial.i in 1:n.trials) {
    
    # STEP 0: Get prior expectations for current trial
    
    goal.prob.i <- goal.prob.mat[trial.i,]
    mean.mat.i <- mean.mat[trial.i,]
    

    # STEP 1: SELECT AN OPTION

      
      theta <- theta.fun(trial.i, gamma)
      selprob.i <- softmax.fun(exp.current = goal.prob.i, 
                               theta = theta) 
    

    
    # Select an option
    if (length(unique(goal.prob.i)) == n.options){
      
      selection.i <- sample(1:n.options, size = 1, prob = selprob.i)
      
    }
    
    if(length(unique(goal.prob.i)) != n.options){
        
        selection.i <- sample(which(mean.mat.i ==
                                      max(mean.mat.i[which(selprob.i == max(selprob.i))])), 1)
      
    }  
      
    # Get outcome from selected option
    
    outcome.i <- outcome.mtx[trial.i, selection.i]
    sample.mat[trial.i, selection.i] <- outcome.i
    
    # STEP 3: CREATE NEW EXPECTANCIES
    
    
    # Get new expectancies
    goal.dist <- goal - sum(sample.mat, na.rm = T)
    deadline.dist <- n.trials - trial.i
    prop.vec <- NULL
    
    # loop through samples/ distributions
    for (sample.i in 1:n.options){
      
      # create a vector with the samples of one distribution
      temp.sample <- sample.mat[, sample.i]
      if(sum(is.na(temp.sample)) > 0){
      temp.sample <- temp.sample[-which(is.na(temp.sample))]
    }
      # draw a number of samples, according to the memory capacity, from your experienced outcomes
      memory.sample <- sample(temp.sample, sample.cap, replace = TRUE)
      if(trial.i < n.trials){
      mean.mat[trial.i + 1, sample.i] <- mean(memory.sample)
      }
      # multiply the samples by number of trials left till the deadline is reached
      memory.sample <- memory.sample * deadline.dist
      
      # save the proportion of options  that reached the goal
      prop.goal.reached <- mean(memory.sample >= goal.dist)
      
      if(trial.i < n.trials){
        goal.prob.mat[trial.i + 1, sample.i] <- prop.goal.reached
      }
    }
    
    
    
    # Save some values
    
    selprob.mtx[trial.i,] <- selprob.i  # Selection probabilities
    selection.v[trial.i] <- selection.i # Actual selection
    outcome.v[trial.i] <- outcome.i
  }
  
  # Put main results in a single dataframe called sim.result.df
  
  sim.result.df <- data.frame("selection" = selection.v, 
                              "outcome" = outcome.v,
                              "outcome.cum" = cumsum(outcome.v),
                              
                              stringsAsFactors = FALSE)
  
  # Should simulation be plotted?
  
  if(plot == TRUE) {
    
    plot(1, 
         xlim = c(1, n.trials), 
         ylim = c(min(option.means) ,  # Set limits to worst and best exepected performance
                  goal + 3*max(option.means)), 
         type = "n",
         ylab = "Cumulative Rewards",
         xlab = "Trial",
         main = paste0("alpha = ", alpha, ", sample cap = ", sample.cap, ", gamma = ", gamma))
    
    rect(-1e3, -1e3, 1e3, 1e3, col = gray(.96))
    abline(h = seq(-1e3, 1e3, by = 10), 
           v = seq(-1e3, 1e3, by = 10),
           lwd = c(2, 1), col = "white")
    
    abline(h = goal, lty = 2)
    
    points(x = 1:n.trials,
           y = sim.result.df$outcome.cum,
           type = "b")
    text(x = 1:n.trials,
         y = sim.result.df$outcome.cum,
         sim.result.df$selection,
         pos = 3)
    
  }
  
  # Now return the main simulation dataframe
  
  return(sim.result.df)

}


sim1 <- sim.fun(n.trials = 50,
                    option.means = c(8, 8, 6),
                    option.sd = c(3, 3, 3),
                    option.tau = c(0, 0, 0.005),
                    gamma = .8,
                    alpha = .3,
                    sample.cap = 5,
                    goal = 500,
                    prior.exp.start = rep(0.5, 3),
                    plot = TRUE)

#---------------------------------------

# simulate many agents with the same theta value

many.agent.fun <- function(n.agent,      # Number of agents
                           ...) {      # Agent's theta
  
  sapply(1:n.agent, FUN = function(x) {
    
    sim.result.i <- sim.fun(...)
    
    final.reward.i <- sim.result.i$outcome.cum[nrow(sim.result.i)]
    
    return(final.reward.i)
    
  }
  )
}

Running the Simulations

I ran two simulations. In one I varied gamma in the other I varied the sampling capacity. The code to run them is displayed below.

# simulate different SAMPLE CAPACITY values
n.trials <- 50
goal = 450
n.agent <- 1000

# Create a vector of exploit, explore, and random agent outcomes
# different parameter combinations:
alphas <- .3
gammas <- .1
sample.caps <- c(2, 4, 6, 8, 10, 12, 14, 16, 50)
par.combin <- expand.grid(alphas, gammas, sample.caps)

# create matrices to store the outcome values for the different environments
outc.mat <- matrix(NA, nrow = n.agent, ncol = nrow(par.combin))

# create matrices with different distributions (means, sd and tau)


option.means.mat <- matrix(c(10, 10, 9, 7,
                             9, 9, 8, 7,
                             8, 8, 7, 5),
                           ncol = 4, byrow = T)
option.sd.mat <- matrix(c(7, 7, 7, 12, 
                          7, 7, 7, 12,
                          7, 7, 7, 12),
                        ncol = 4, byrow = T)

option.tau.mat <- matrix(c(0, 0, 0, .15, 
                           0, 0, 0, .15, 
                           0, 0, 0, .15),
                         ncol = 4, byrow = T)


# loop through distributions
for (dist in 1:nrow(option.means.mat)){
  
  # prepare values that are used
  option.mean <- option.means.mat[dist,]
  option.sd <- option.sd.mat[dist,]
  option.tau <- option.tau.mat[dist,]
  
  
  # loop through all parameter combinations
  for(pars in 1:nrow(par.combin)){
    
    # for each parameter combination run n.agent times the simulation
    outc.mat[,pars] <- many.agent.fun(n.agent = n.agent, sample.cap = par.combin[pars, 3], gamma = par.combin[pars, 2],
                                      alpha = par.combin[pars, 1], option.mean = option.mean,
                                      option.sd = option.sd, option.tau = option.tau, n.trials = n.trials,
                                      goal = goal, prior.exp.start = rep(.5, 4))
  }
  
  
  save(outc.mat,
       file = paste("data/memSampSC", dist,
                    ".RData", sep = ''))
  
  
}



#---------------------------------------
# simulate different sample capacity values
n.trials <- 50
goal = 450
n.agent <- 1000
#library(yarrr)
# Create a vector of exploit, explore, and random agent outcomes
# different parameter combinations:
alphas <- .3
gammas <- seq(0, 3, .05)
sample.caps <- 4
par.combin <- expand.grid(alphas, gammas, sample.caps)

# create matrices to store the outcome values for the different environments
outc.mat <- matrix(NA, nrow = n.agent, ncol = nrow(par.combin))

# create matrices with different distributions (means and sds)
# create matrices with different distributions (means and sds)

option.means.mat <- matrix(c(10, 10, 9, 7,
                             9, 9, 8, 7,
                             8, 8, 7, 5),
                           ncol = 4, byrow = T)
option.sd.mat <- matrix(c(7, 7, 7, 12, 
                          7, 7, 7, 12,
                          7, 7, 7, 12),
                        ncol = 4, byrow = T)

option.tau.mat <- matrix(c(0, 0, 0, .15, 
                           0, 0, 0, .15, 
                           0, 0, 0, .15),
                         ncol = 4, byrow = T)

# set the number of trials per game

# loop through distributions
for (dist in 1:nrow(option.means.mat)){
  
  # prepare values that are used
  option.mean <- option.means.mat[dist,]
  option.sd <- option.sd.mat[dist,]
  option.tau <- option.tau.mat[dist,]
  
  
  # loop through all parameter combinations
  for(pars in 1:nrow(par.combin)){
    
    # for each parameter combination run n.agent times the simulation
    outc.mat[,pars] <- many.agent.fun(n.agent = n.agent, sample.cap = par.combin[pars, 3], gamma = par.combin[pars, 2],
                                      alpha = par.combin[pars, 1], option.mean = option.mean,
                                      option.sd = option.sd, option.tau = option.tau, n.trials = n.trials,
                                      goal = goal, prior.exp.start = rep(.5, 4))
  }
  
  
  save(outc.mat,
       file = paste("data/memSampGamma", dist,
                    ".RData", sep = ''))
  
  
}

Now plot the values

In the first three plots the sampling capacity is varied, in the last three plots the gamma values are varied.

 n.trials <- 50
ntrials <- n.trials
goal = 450
n.agent <- 1000
library(yarrr)
library(stringr)
# Create a vector of exploit, explore, and random agent outcomes
# different parameter combinations:
alphas <- .3
gammas <- .1
sample.caps <- c(2, 4, 6, 8, 10, 12, 14, 16, 50)
par.combin <- expand.grid(alphas, gammas, sample.caps)

# create matrices to store the outcome values for the different environments
outc.mat <- matrix(NA, nrow = n.agent, ncol = nrow(par.combin))

# create matrices with different distributions (means and sds)
# create matrices with different distributions (means and sds)

option.means.mat <- matrix(c(10, 10, 9, 7,
                             9, 9, 8, 7,
                             8, 8, 7, 5),
                           ncol = 4, byrow = T)
option.sd.mat <- matrix(c(7, 7, 7, 12, 
                          7, 7, 7, 12,
                          7, 7, 7, 12),
                        ncol = 4, byrow = T)

option.tau.mat <- matrix(c(0, 0, 0, .15, 
                           0, 0, 0, .15, 
                           0, 0, 0, .15),
                         ncol = 4, byrow = T)

  for(env in 1:nrow(option.means.mat)){
  # prepare an empty variable to later store the expected outcomes in
  option.means <- option.means.mat[env,]
  option.sd <- option.sd.mat[env,]
  option.tau <- option.tau.mat[env,]
  
  mean.outc <- NULL
  
  # loop 1000 times
  for(xx in 1:1000){
    
    # prepare variable
    outc <- NULL
    
    # loop through number of options/ bandit arms
    for(yy in 1:ntrials){
      
        samp <- sample(1:(length(option.means)), 1)
        # draw from distributions from each arm
        
        outc <- c(outc, rexgauss(n = 1, 
                           mu = option.means[samp], 
                           sigma = option.sd[samp],
                           tau = option.tau[samp],
                           positive = FALSE))
        
      }
    
    # save sum of drawn values
    mean.outc[xx] <- sum(outc)
  }
  
  # compute the value according to that goal
  prop.rand.surv <- mean(mean.outc >= goal)
  random.mean <- mean(mean.outc)
  
  load(paste0('data/memSampSC', env,'.RData'))
  goal = 450
  # compute proportion of survivors (how many reached threshold)
  prop.surv <- colMeans(outc.mat >= goal)
  
  # look up which is the 95% threshold
  mean.outc.value <- colMeans(outc.mat)
  m <- rbind(1, 1, 1, 1, 1, 2, 2)
  layout(m)
  par(mar = c(4, 4, 4.5, 4))
  plot(1, xlim = c(0,50), ylim = c(min(prop.surv)-.05, ifelse(max(prop.surv) <= .95,
                                                              max(prop.surv) + .05, 1)), type = 'n',
       xlab ='sample cap values', ylab = 'prop survived', main = paste0('prop surv. and mean outc for sample capacity'),
       cex.lab = 1.5)
  paste(option.means, ',')
  mtext(paste0('Environment: M: ', toString(option.means), '; sd: ', option.sd[1], '; tau: ', toString(option.tau)))
  grid()
  lines(par.combin[,3], prop.surv, col = transparent('red', .5),
        lwd = 1.5)
  abline(h = prop.rand.surv, col = transparent('red', .5), lty = 2)
  
  par(new = T)
  plot(par.combin[,3], mean.outc.value, col = transparent('blue', .5), lwd = 1.5,
       xlab = NA, ylab = NA, type = 'l', axes = F)
  axis(side = 4)
  mtext(side = 4, line = 3, 'mean outcome')
  abline(h = random.mean, col = transparent('blue', .5), lty = 2)
  par(mar = c(0, 0, 0, 0))
  plot(1, type = 'n', axes = F, xlab = NA, ylab = NA)
  legend('center', c('prop threshold reached', 'mean outcomes', 
                     'prop threshold reached random',
                     'mean outcomes random'),
         col = rep(c(transparent('red', .5), transparent('blue', .5)), 2), lty = c(1, 1, 2, 2), cex = 1.5)

  
  }

#------------------------------------------------------------
n.trials <- 50
ntrials <- n.trials
goal = 450
n.agent <- 1000
library(yarrr)
library(stringr)
# Create a vector of exploit, explore, and random agent outcomes
# different parameter combinations:
alphas <- .3
gammas <- seq(0, 3, .05)
sample.caps <- 4
par.combin <- expand.grid(alphas, gammas, sample.caps)

# create matrices to store the outcome values for the different environments
outc.mat <- matrix(NA, nrow = n.agent, ncol = nrow(par.combin))

# create matrices with different distributions (means and sds)
# create matrices with different distributions (means and sds)

option.means.mat <- matrix(c(10, 10, 9, 7,
                             9, 9, 8, 7,
                             8, 8, 7, 5),
                           ncol = 4, byrow = T)
option.sd.mat <- matrix(c(7, 7, 7, 12, 
                          7, 7, 7, 12,
                          7, 7, 7, 12),
                        ncol = 4, byrow = T)

option.tau.mat <- matrix(c(0, 0, 0, .15, 
                           0, 0, 0, .15, 
                           0, 0, 0, .15),
                         ncol = 4, byrow = T)

for(env in 1:nrow(option.means.mat)){
  # prepare an empty variable to later store the expected outcomes in
  option.means <- option.means.mat[env,]
  option.sd <- option.sd.mat[env,]
  option.tau <- option.tau.mat[env,]
  
  mean.outc <- NULL
  
  # loop 1000 times
  for(xx in 1:1000){
    
    # prepare variable
    outc <- NULL
    
    # loop through number of options/ bandit arms
    for(yy in 1:ntrials){
      
      samp <- sample(1:(length(option.means)), 1)
      # draw from distributions from each arm
      
      outc <- c(outc, rexgauss(n = 1, 
                               mu = option.means[samp], 
                               sigma = option.sd[samp],
                               tau = option.tau[samp],
                               positive = FALSE))
      
    }
    
    # save sum of drawn values
    mean.outc[xx] <- sum(outc)
  }
  
  # compute the value according to that goal
  prop.rand.surv <- mean(mean.outc >= goal)
  random.mean <- mean(mean.outc)
  
  load(paste0('data/memSampGamma', env,'.RData'))
  goal = 450
  # compute proportion of survivors (how many reached threshold)
  prop.surv <- colMeans(outc.mat >= goal)
  
  # look up which is the 95% threshold
  mean.outc.value <- colMeans(outc.mat)
  m <- rbind(1, 1, 1, 1, 1, 2, 2)
  layout(m)
  par(mar = c(4, 4, 4.5, 4))
  plot(1, xlim = c(0,3), ylim = c(min(prop.surv)-.05, ifelse(max(prop.surv) <= .95,
                                                              max(prop.surv) + .05, 1)), type = 'n',
       xlab ='gamma values', ylab = 'prop survived', main = paste0('prop surv. and mean outc for gamma'),
       cex.lab = 1.5)
  paste(option.means, ',')
  mtext(paste0('Environment: M: ', toString(option.means), '; sd: ', option.sd[1], '; tau: ', toString(option.tau)))
  grid()
  lines(par.combin[,2], prop.surv, col = transparent('red', .5),
        lwd = 1.5)
  abline(h = prop.rand.surv, col = transparent('red', .5), lty = 2)
  
  par(new = T)
  plot(par.combin[,2], mean.outc.value, col = transparent('blue', .5), lwd = 1.5,
       xlab = NA, ylab = NA, type = 'l', axes = F)
  axis(side = 4)
  mtext(side = 4, line = 3, 'mean outcome')
  abline(h = random.mean, col = transparent('blue', .5), lty = 2)
  par(mar = c(0, 0, 0, 0))
  plot(1, type = 'n', axes = F, xlab = NA, ylab = NA)
  legend('center', c('prop threshold reached', 'mean outcomes', 
                     'prop threshold reached random',
                     'mean outcomes random'),
         col = rep(c(transparent('red', .5), transparent('blue', .5)), 2), lty = c(1, 1, 2, 2), cex = 1.5)
  
  
}