Forgetting function looks like this:
\[p = a(bt + 1)^{-c}\] p is proportion of items retained averaged across all participants, t is time. a, b, c are parameters that will be estimated using computational model; and the above equation will be used to generate predictions.
#discrepancy for power forgetting function
powdiscrep <- function(parms, rec, ri) {
if (any(parms<0)||any(parms>1)) return (1e6)
pow_pred <- parms['a'] * (parms['b']*ri + 1) ^(-parms['c'])
return (sqrt(sum((pow_pred - rec) ^2)/length(ri)))
}
#Carpenter et al., (2008) Experiment 1
rec <- c(.93, .88, .86, .66, .47, .34)
ri <- c(.0035, 1, 2, 7, 14, 42)
#initialize starting values
sparms <- c(1, .05, .7)
names(sparms) <- c('a', 'b', 'c')
#obtain best-fitting estimates
pout <- optim(sparms, powdiscrep, rec=rec, ri=ri)
pout
## $par
## a b c
## 0.9480230 0.1320698 0.5817508
##
## $value
## [1] 0.02620418
##
## $counts
## function gradient
## 132 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
pow_pred <- pout$par['a'] *
(pout$par['b'] * c(0:max(ri)) + 1)^(-pout$par['c'])
#predicted retention at different intervals. Because ri here is a continuous measure, that's why we are seeing more than 6 discrete values.
pow_pred
## [1] 0.9480230 0.8820193 0.8271791 0.7807172 0.7407281 0.7058586 0.6751189
## [8] 0.6477661 0.6232307 0.6010679 0.5809249 0.5625174 0.5456140 0.5300240
## [15] 0.5155885 0.5021743 0.4896684 0.4779746 0.4670102 0.4567037 0.4469931
## [22] 0.4378241 0.4291491 0.4209261 0.4131178 0.4056912 0.3986168 0.3918680
## [29] 0.3854211 0.3792546 0.3733492 0.3676874 0.3622530 0.3570316 0.3520100
## [36] 0.3471758 0.3425180 0.3380262 0.3336911 0.3295039 0.3254565 0.3215415
## [43] 0.3177520
#plot data and best-fitting predictions
par(cex.axis=1.2, cex.lab=1.4)
par(mar=(c(5,5,3,2) + 0.1), las = 1)
plot(ri, rec,
xlab = 'Retention Inverval (Days)',
ylab = 'Proportion Items Retained',
ylim = c(.3, 1),
xlim = c(0, 43),
xaxt = 'n', #suppress default x-axis
type= 'n')
lines(c(0:max(ri)), pow_pred, lwd = 2, col = 'purple')
points(ri, rec, pch=21, bg = 'red', cex = 2)
#only select the predicted values that correspond to time existed in observed data in order to draw vertical discrepancy lines (green line in graph).
dev <- pow_pred[ri + 1]
for (x in c(1:length(ri))) {
#create vertical lines show discrepancy between pred. and obs.
lines(c(ri[x], ri[x]), c(dev[x], rec[x]), lwd = 1, col = 'green')
}
axis(1, at=c(0:43))
#bootstrapping of parameter confidence level
ns <- 55
nbs <- 1000
bsparms <- matrix(NA, nbs, length(sparms)) #generate 1000 x 3 matrix of NAs
bspow_pred <- pout$par['a']*(pout$par['b']*ri + 1)^(-pout$par['c']) #ri is time
for (i in c(1:nbs)) {
#for each i, 55 participants retention of items at 6 time points data was created, it was then used to evaluate best a,b,c parameters through disrepancy function, and assigned as a row in bsparms.
recsynth <- vapply(bspow_pred,
FUN = function(x) mean(rbinom(ns, 1, x)), numeric(1))
bsparms[i,] <-
unlist(optim(pout$par, powdiscrep, rec=recsynth, ri=ri)$par)
}
histoplot <- function(x, l4x) {
hist(x, xlab=l4x, main='', xlim=c(0,1), cex.lab =1.5, cex.axis=1.5)
lq <- quantile(x, 0.025) #lower 95% CI bound
abline(v=lq, lty='dashed', lwd = 2)
uq <-quantile(x, 0.975) #upper 95% CI bound
abline(v=uq, lty='dashed', lwd = 2)
return(c(lq, uq))
}
par(mfcol=c(1, 3), las = 1)
for (i in c(1:dim(bsparms)[2])) {
print(histoplot(bsparms[,i], names(sparms)[i]))
}
## 2.5% 97.5%
## 0.8923324 1.0000000
## 2.5% 97.5%
## 0.0441258 0.6675566
## 2.5% 97.5%
## 0.2533074 1.0000000
For detailed explanations, refer to Computational Modeling CH 3.