Let’s assume an environment with four options to choose from (i.e. a four armed bandit). You have a given time intervall in which you can choose from the options (i.e. a fixed number of trials). When the time limit is reached you need to have a minimum number of points cumulated from the choices made during the time intervall. You will only survive (i.e. receive a bonus) if your number of accumulated points exceeds or is at least as high as the minimum (goal). The model assumes that you draw a number of samples from your experience (i.e. all the draws you made from one option). These samples will be multiplied by the number of trials left and the proportion of the ones that exceed or are equal to the difference from your current state of points to the goal. The option with the highest proportion of success is then chosen with the highest probability (i.e. the proportions are fed into a softmax). If two proportions are the same you choose the one with the higher mean.
# choice sensitivity function
theta.fun <- function(trial, gamma) log10(trial) * gamma
#---------------------------------------
# Softmax selection function
softmax.fun <- function(exp.current = c(5, 3, 6),
theta = .5) {
output <- exp(exp.current * theta) / sum(exp(exp.current * theta))
return(output)
}
#---------------------------------------
sim.fun <- function(n.trials = 50,
option.means = c(10, 8, 6),
option.sd = c(3, 3, 3),
option.tau = c(0, 0, .05),
gamma = .2,
alpha = .3,
sample.cap = 5,
goal = 250,
prior.exp.start = rep(0.5, 3),
plot = FALSE){
n.options <- length(option.means)
# Create outcome matrix giving the outcome on each trial for each option
outcome.mtx <- matrix(NA, nrow = n.trials, ncol = n.options)
for(option.i in 1:n.options) {
outcome.mtx[,option.i] <- rexgauss(n = n.trials,
mu = option.means[option.i],
sigma = option.sd[option.i],
tau = option.tau[option.i],
positive = FALSE)
}
# Create exp.prior and exp.new matrices
# These will hold the agent's expectation (either prior or new)
# of each option on each trial
goal.prob.mat <- matrix(NA, nrow = n.trials, ncol = n.options)
goal.prob.mat[1,] <- prior.exp.start
sample.mat <- matrix(NA, nrow = n.trials, ncol = n.options)
sample.mat[1,] <- rep(0, n.options)
mean.mat <- matrix(NA, nrow = n.trials, ncol = n.options)
mean.mat[1,] <- rep(0, n.options)
# Now create some matrices to store values
selection.v <- rep(NA, n.trials) # Actual selections
outcome.v <- rep(NA, n.trials) # Actual outcomes
selprob.mtx <- matrix(NA, # Selection probabilities
nrow = n.trials,
ncol = n.options)
# RUN SIMULATION!
for(trial.i in 1:n.trials) {
# STEP 0: Get prior expectations for current trial
goal.prob.i <- goal.prob.mat[trial.i,]
mean.mat.i <- mean.mat[trial.i,]
# STEP 1: SELECT AN OPTION
theta <- theta.fun(trial.i, gamma)
selprob.i <- softmax.fun(exp.current = goal.prob.i,
theta = theta)
# Select an option
if (length(unique(goal.prob.i)) == n.options){
selection.i <- sample(1:n.options, size = 1, prob = selprob.i)
}
if(length(unique(goal.prob.i)) != n.options){
selection.i <- sample(which(mean.mat.i ==
max(mean.mat.i[which(selprob.i == max(selprob.i))])), 1)
}
# Get outcome from selected option
outcome.i <- outcome.mtx[trial.i, selection.i]
sample.mat[trial.i, selection.i] <- outcome.i
# STEP 3: CREATE NEW EXPECTANCIES
# Get new expectancies
goal.dist <- goal - sum(sample.mat, na.rm = T)
deadline.dist <- n.trials - trial.i
prop.vec <- NULL
# loop through samples/ distributions
for (sample.i in 1:n.options){
# create a vector with the samples of one distribution
temp.sample <- sample.mat[, sample.i]
if(sum(is.na(temp.sample)) > 0){
temp.sample <- temp.sample[-which(is.na(temp.sample))]
}
# draw a number of samples, according to the memory capacity, from your experienced outcomes
memory.sample <- sample(temp.sample, sample.cap, replace = TRUE)
if(trial.i < n.trials){
mean.mat[trial.i + 1, sample.i] <- mean(memory.sample)
}
# multiply the samples by number of trials left till the deadline is reached
memory.sample <- memory.sample * deadline.dist
# save the proportion of options that reached the goal
prop.goal.reached <- mean(memory.sample >= goal.dist)
if(trial.i < n.trials){
goal.prob.mat[trial.i + 1, sample.i] <- prop.goal.reached
}
}
# Save some values
selprob.mtx[trial.i,] <- selprob.i # Selection probabilities
selection.v[trial.i] <- selection.i # Actual selection
outcome.v[trial.i] <- outcome.i
}
# Put main results in a single dataframe called sim.result.df
sim.result.df <- data.frame("selection" = selection.v,
"outcome" = outcome.v,
"outcome.cum" = cumsum(outcome.v),
stringsAsFactors = FALSE)
# Should simulation be plotted?
if(plot == TRUE) {
plot(1,
xlim = c(1, n.trials),
ylim = c(min(option.means) , # Set limits to worst and best exepected performance
goal + 3*max(option.means)),
type = "n",
ylab = "Cumulative Rewards",
xlab = "Trial",
main = paste0("alpha = ", alpha, ", sample cap = ", sample.cap, ", gamma = ", gamma))
rect(-1e3, -1e3, 1e3, 1e3, col = gray(.96))
abline(h = seq(-1e3, 1e3, by = 10),
v = seq(-1e3, 1e3, by = 10),
lwd = c(2, 1), col = "white")
abline(h = goal, lty = 2)
points(x = 1:n.trials,
y = sim.result.df$outcome.cum,
type = "b")
text(x = 1:n.trials,
y = sim.result.df$outcome.cum,
sim.result.df$selection,
pos = 3)
}
# Now return the main simulation dataframe
return(sim.result.df)
}
sim1 <- sim.fun(n.trials = 50,
option.means = c(8, 8, 6),
option.sd = c(3, 3, 3),
option.tau = c(0, 0, 0.005),
gamma = .8,
alpha = .3,
sample.cap = 5,
goal = 500,
prior.exp.start = rep(0.5, 3),
plot = TRUE)
#---------------------------------------
# simulate many agents with the same theta value
many.agent.fun <- function(n.agent, # Number of agents
...) { # Agent's theta
sapply(1:n.agent, FUN = function(x) {
sim.result.i <- sim.fun(...)
final.reward.i <- sim.result.i$outcome.cum[nrow(sim.result.i)]
return(final.reward.i)
}
)
}
I ran two simulations. In one I varied gamma in the other I varied the sampling capacity. The code to run them is displayed below.
# simulate different SAMPLE CAPACITY values
n.trials <- 50
goal = 450
n.agent <- 1000
# Create a vector of exploit, explore, and random agent outcomes
# different parameter combinations:
alphas <- .3
gammas <- .1
sample.caps <- c(2, 4, 6, 8, 10, 12, 14, 16, 50)
par.combin <- expand.grid(alphas, gammas, sample.caps)
# create matrices to store the outcome values for the different environments
outc.mat <- matrix(NA, nrow = n.agent, ncol = nrow(par.combin))
# create matrices with different distributions (means, sd and tau)
option.means.mat <- matrix(c(10, 10, 9, 7,
9, 9, 8, 7,
8, 8, 7, 5),
ncol = 4, byrow = T)
option.sd.mat <- matrix(c(7, 7, 7, 12,
7, 7, 7, 12,
7, 7, 7, 12),
ncol = 4, byrow = T)
option.tau.mat <- matrix(c(0, 0, 0, .15,
0, 0, 0, .15,
0, 0, 0, .15),
ncol = 4, byrow = T)
# loop through distributions
for (dist in 1:nrow(option.means.mat)){
# prepare values that are used
option.mean <- option.means.mat[dist,]
option.sd <- option.sd.mat[dist,]
option.tau <- option.tau.mat[dist,]
# loop through all parameter combinations
for(pars in 1:nrow(par.combin)){
# for each parameter combination run n.agent times the simulation
outc.mat[,pars] <- many.agent.fun(n.agent = n.agent, sample.cap = par.combin[pars, 3], gamma = par.combin[pars, 2],
alpha = par.combin[pars, 1], option.mean = option.mean,
option.sd = option.sd, option.tau = option.tau, n.trials = n.trials,
goal = goal, prior.exp.start = rep(.5, 4))
}
save(outc.mat,
file = paste("data/memSampSC", dist,
".RData", sep = ''))
}
#---------------------------------------
# simulate different sample capacity values
n.trials <- 50
goal = 450
n.agent <- 1000
#library(yarrr)
# Create a vector of exploit, explore, and random agent outcomes
# different parameter combinations:
alphas <- .3
gammas <- seq(0, 3, .05)
sample.caps <- 4
par.combin <- expand.grid(alphas, gammas, sample.caps)
# create matrices to store the outcome values for the different environments
outc.mat <- matrix(NA, nrow = n.agent, ncol = nrow(par.combin))
# create matrices with different distributions (means and sds)
# create matrices with different distributions (means and sds)
option.means.mat <- matrix(c(10, 10, 9, 7,
9, 9, 8, 7,
8, 8, 7, 5),
ncol = 4, byrow = T)
option.sd.mat <- matrix(c(7, 7, 7, 12,
7, 7, 7, 12,
7, 7, 7, 12),
ncol = 4, byrow = T)
option.tau.mat <- matrix(c(0, 0, 0, .15,
0, 0, 0, .15,
0, 0, 0, .15),
ncol = 4, byrow = T)
# set the number of trials per game
# loop through distributions
for (dist in 1:nrow(option.means.mat)){
# prepare values that are used
option.mean <- option.means.mat[dist,]
option.sd <- option.sd.mat[dist,]
option.tau <- option.tau.mat[dist,]
# loop through all parameter combinations
for(pars in 1:nrow(par.combin)){
# for each parameter combination run n.agent times the simulation
outc.mat[,pars] <- many.agent.fun(n.agent = n.agent, sample.cap = par.combin[pars, 3], gamma = par.combin[pars, 2],
alpha = par.combin[pars, 1], option.mean = option.mean,
option.sd = option.sd, option.tau = option.tau, n.trials = n.trials,
goal = goal, prior.exp.start = rep(.5, 4))
}
save(outc.mat,
file = paste("data/memSampGamma", dist,
".RData", sep = ''))
}
In the first three plots the sampling capacity is varied, in the last three plots the gamma values are varied.
n.trials <- 50
ntrials <- n.trials
goal = 450
n.agent <- 1000
library(yarrr)
library(stringr)
# Create a vector of exploit, explore, and random agent outcomes
# different parameter combinations:
alphas <- .3
gammas <- .1
sample.caps <- c(2, 4, 6, 8, 10, 12, 14, 16, 50)
par.combin <- expand.grid(alphas, gammas, sample.caps)
# create matrices to store the outcome values for the different environments
outc.mat <- matrix(NA, nrow = n.agent, ncol = nrow(par.combin))
# create matrices with different distributions (means and sds)
# create matrices with different distributions (means and sds)
option.means.mat <- matrix(c(10, 10, 9, 7,
9, 9, 8, 7,
8, 8, 7, 5),
ncol = 4, byrow = T)
option.sd.mat <- matrix(c(7, 7, 7, 12,
7, 7, 7, 12,
7, 7, 7, 12),
ncol = 4, byrow = T)
option.tau.mat <- matrix(c(0, 0, 0, .15,
0, 0, 0, .15,
0, 0, 0, .15),
ncol = 4, byrow = T)
for(env in 1:nrow(option.means.mat)){
# prepare an empty variable to later store the expected outcomes in
option.means <- option.means.mat[env,]
option.sd <- option.sd.mat[env,]
option.tau <- option.tau.mat[env,]
mean.outc <- NULL
# loop 1000 times
for(xx in 1:1000){
# prepare variable
outc <- NULL
# loop through number of options/ bandit arms
for(yy in 1:ntrials){
samp <- sample(1:(length(option.means)), 1)
# draw from distributions from each arm
outc <- c(outc, rexgauss(n = 1,
mu = option.means[samp],
sigma = option.sd[samp],
tau = option.tau[samp],
positive = FALSE))
}
# save sum of drawn values
mean.outc[xx] <- sum(outc)
}
# compute the value according to that goal
prop.rand.surv <- mean(mean.outc >= goal)
random.mean <- mean(mean.outc)
load(paste0('data/memSampSC', env,'.RData'))
goal = 450
# compute proportion of survivors (how many reached threshold)
prop.surv <- colMeans(outc.mat >= goal)
# look up which is the 95% threshold
mean.outc.value <- colMeans(outc.mat)
m <- rbind(1, 1, 1, 1, 1, 2, 2)
layout(m)
par(mar = c(4, 4, 4.5, 4))
plot(1, xlim = c(0,50), ylim = c(min(prop.surv)-.05, ifelse(max(prop.surv) <= .95,
max(prop.surv) + .05, 1)), type = 'n',
xlab ='sample cap values', ylab = 'prop survived', main = paste0('prop surv. and mean outc for sample capacity'),
cex.lab = 1.5)
paste(option.means, ',')
mtext(paste0('Environment: M: ', toString(option.means), '; sd: ', option.sd[1], '; tau: ', toString(option.tau)))
grid()
lines(par.combin[,3], prop.surv, col = transparent('red', .5),
lwd = 1.5)
abline(h = prop.rand.surv, col = transparent('red', .5), lty = 2)
par(new = T)
plot(par.combin[,3], mean.outc.value, col = transparent('blue', .5), lwd = 1.5,
xlab = NA, ylab = NA, type = 'l', axes = F)
axis(side = 4)
mtext(side = 4, line = 3, 'mean outcome')
abline(h = random.mean, col = transparent('blue', .5), lty = 2)
par(mar = c(0, 0, 0, 0))
plot(1, type = 'n', axes = F, xlab = NA, ylab = NA)
legend('center', c('prop threshold reached', 'mean outcomes',
'prop threshold reached random',
'mean outcomes random'),
col = rep(c(transparent('red', .5), transparent('blue', .5)), 2), lty = c(1, 1, 2, 2), cex = 1.5)
}
#------------------------------------------------------------
n.trials <- 50
ntrials <- n.trials
goal = 450
n.agent <- 1000
library(yarrr)
library(stringr)
# Create a vector of exploit, explore, and random agent outcomes
# different parameter combinations:
alphas <- .3
gammas <- seq(0, 3, .05)
sample.caps <- 4
par.combin <- expand.grid(alphas, gammas, sample.caps)
# create matrices to store the outcome values for the different environments
outc.mat <- matrix(NA, nrow = n.agent, ncol = nrow(par.combin))
# create matrices with different distributions (means and sds)
# create matrices with different distributions (means and sds)
option.means.mat <- matrix(c(10, 10, 9, 7,
9, 9, 8, 7,
8, 8, 7, 5),
ncol = 4, byrow = T)
option.sd.mat <- matrix(c(7, 7, 7, 12,
7, 7, 7, 12,
7, 7, 7, 12),
ncol = 4, byrow = T)
option.tau.mat <- matrix(c(0, 0, 0, .15,
0, 0, 0, .15,
0, 0, 0, .15),
ncol = 4, byrow = T)
for(env in 1:nrow(option.means.mat)){
# prepare an empty variable to later store the expected outcomes in
option.means <- option.means.mat[env,]
option.sd <- option.sd.mat[env,]
option.tau <- option.tau.mat[env,]
mean.outc <- NULL
# loop 1000 times
for(xx in 1:1000){
# prepare variable
outc <- NULL
# loop through number of options/ bandit arms
for(yy in 1:ntrials){
samp <- sample(1:(length(option.means)), 1)
# draw from distributions from each arm
outc <- c(outc, rexgauss(n = 1,
mu = option.means[samp],
sigma = option.sd[samp],
tau = option.tau[samp],
positive = FALSE))
}
# save sum of drawn values
mean.outc[xx] <- sum(outc)
}
# compute the value according to that goal
prop.rand.surv <- mean(mean.outc >= goal)
random.mean <- mean(mean.outc)
load(paste0('data/memSampGamma', env,'.RData'))
goal = 450
# compute proportion of survivors (how many reached threshold)
prop.surv <- colMeans(outc.mat >= goal)
# look up which is the 95% threshold
mean.outc.value <- colMeans(outc.mat)
m <- rbind(1, 1, 1, 1, 1, 2, 2)
layout(m)
par(mar = c(4, 4, 4.5, 4))
plot(1, xlim = c(0,3), ylim = c(min(prop.surv)-.05, ifelse(max(prop.surv) <= .95,
max(prop.surv) + .05, 1)), type = 'n',
xlab ='gamma values', ylab = 'prop survived', main = paste0('prop surv. and mean outc for gamma'),
cex.lab = 1.5)
paste(option.means, ',')
mtext(paste0('Environment: M: ', toString(option.means), '; sd: ', option.sd[1], '; tau: ', toString(option.tau)))
grid()
lines(par.combin[,2], prop.surv, col = transparent('red', .5),
lwd = 1.5)
abline(h = prop.rand.surv, col = transparent('red', .5), lty = 2)
par(new = T)
plot(par.combin[,2], mean.outc.value, col = transparent('blue', .5), lwd = 1.5,
xlab = NA, ylab = NA, type = 'l', axes = F)
axis(side = 4)
mtext(side = 4, line = 3, 'mean outcome')
abline(h = random.mean, col = transparent('blue', .5), lty = 2)
par(mar = c(0, 0, 0, 0))
plot(1, type = 'n', axes = F, xlab = NA, ylab = NA)
legend('center', c('prop threshold reached', 'mean outcomes',
'prop threshold reached random',
'mean outcomes random'),
col = rep(c(transparent('red', .5), transparent('blue', .5)), 2), lty = c(1, 1, 2, 2), cex = 1.5)
}