Main simulation function
The main simulation funciton is rl.sim.fun()
. The function returns a dataframe containing the main results of the agent (and plots the agent’s cumulative earnings when plot = TRUE
)
# Create main simulation function
rl.sim.fun <- function(n.trials = 100, # Trials in game
option.mean = c(1, -1, 0), # Mean of each option
option.sd = c(2, 10, .01), # SD of each option
prior.exp.start = rep(0, 3),
theta = .5,
alpha = .2,
plot = TRUE) {
# Get some game parameters from inputs
n.options <- length(option.mean)
# Create outcome matrix giving the outcome on each trial for each option
outcome.mtx <- matrix(NA, nrow = n.trials, ncol = n.options)
for(option.i in 1:n.options) {
outcome.mtx[,option.i] <- rnorm(n = n.trials,
mean = option.mean[option.i],
sd = option.sd[option.i])
}
# Create exp.prior and exp.new matrices
# These will hold the agent's expectation (either prior or new)
# of each option on each trial
exp.prior.mtx <- matrix(NA, nrow = n.trials, ncol = n.options)
exp.prior.mtx[1,] <- prior.exp.start
exp.new.mtx <- matrix(NA, nrow = n.trials, ncol = n.options)
# Now create some matrices to store values
selection.v <- rep(NA, n.trials) # Actual selections
outcome.v <- rep(NA, n.trials) # Actual outcomes
selprob.mtx <- matrix(NA, # Selection probabilities
nrow = n.trials,
ncol = n.options)
# RUN SIMULATION!
for(trial.i in 1:n.trials) {
# STEP 0: Get prior expectations for current trial
exp.prior.i <- exp.prior.mtx[trial.i,]
# STEP 1: SELECT AN OPTION
# Selection probabilities
selprob.i <- softmax.fun(exp.current = exp.prior.i,
theta = theta)
# Select an option
selection.i <- sample(1:n.options, size = 1, prob = selprob.i)
# Get outcome from selected option
outcome.i <- outcome.mtx[trial.i, selection.i]
# STEP 3: CREATE NEW EXPECTANCIES
# Create a new.inf vector with NAs except for outcome of selected option
new.inf <- rep(NA, n.options)
new.inf[selection.i] <- outcome.i
# Get new expectancies
new.exp.i <- rw.fun(exp.prior = exp.prior.i,
new.inf = new.inf,
alpha = alpha)
# assign new expectatations to exp.new.mtx[trial.i,]
# and prior.expecation.mtx[trial.i + 1,]
exp.new.mtx[trial.i,] <- new.exp.i
if(trial.i < n.trials) {
exp.prior.mtx[trial.i + 1,] <- new.exp.i
}
# Save some values
selprob.mtx[trial.i,] <- selprob.i # Selection probabilities
selection.v[trial.i] <- selection.i # Actual selection
outcome.v[trial.i] <- outcome.i # Actual outcome
}
# Put main results in a single dataframe called sim.result.df
sim.result.df <- data.frame("selection" = selection.v,
"outcome" = outcome.v,
"outcome.cum" = cumsum(outcome.v),
stringsAsFactors = FALSE)
# Should simulation be plotted?
if(plot == TRUE) {
plot(1,
xlim = c(1, n.trials),
ylim = c(min(option.mean) * n.trials, # Set limits to worst and best exepected performance
max(option.mean) * n.trials),
type = "n",
ylab = "Cumulative Rewards",
xlab = "Trial",
main = paste0("alpha = ", alpha, ", theta = ", theta))
rect(-1e3, -1e3, 1e3, 1e3, col = gray(.96))
abline(h = seq(-1e3, 1e3, by = 10),
v = seq(-1e3, 1e3, by = 10),
lwd = c(2, 1), col = "white")
points(x = 1:n.trials,
y = sim.result.df$outcome.cum,
type = "b")
}
# Now return the main simulation dataframe
return(sim.result.df)
}