To find a more rigurous and extensive explanation of what I say here in the book 'Modeling Psychophysical Data in R'.
Let's suppose that the participant needs to decide between 2 orientations values for 4 intensities and that the participant is tested 20 times for each intensity level. It is natural to consider the data arising from a binomial distribution. I create a data frame with some hypothetical data.
intensity <- c(1, 2, 3, 4)
nCorrect <- c(33, 39, 54, 57)
nIncorrect <- c(29, 21, 6, 3)
dat <- data.frame(intensity, nCorrect, nIncorrect)
dat
## intensity nCorrect nIncorrect
## 1 1 33 29
## 2 2 39 21
## 3 3 54 6
## 4 4 57 3
I add the proportion correct responses
dat$p <- dat$nCorrect/(dat$nCorrect + dat$nIncorrect)
I plot the data using ggplot
library("ggplot2")
p1 <- ggplot(data = dat, aes(x = intensity, y = p)) + geom_point()
p1
I fit the data with a cumulative normal curve using a generalized linear model, which in R is computed using the function glm. The standard link function 'probit' corresponds to cumulative normal function that asymptotes to 0. Because in our case the lower asymptote should be 0.5 (chance), I used a special probit link function provided in the library psyphy (mafc.probit(2)). The number 2 indicates that we are dealing with a-alternative forced choice paradigm.
library("psyphy")
model <- glm(cbind(dat$nCorrect, dat$nIncorrect) ~ intensity, family = binomial(mafc.probit(2)))
Notice that I used cbind because glm likes a matrix to enter the responses.
Now, I used the model to generate the psychometric function and add it to the plot
xseq <- seq(0, 5, len = 1000) #I used, for example, a 1000 points
yseq <- predict(model, data.frame(intensity = xseq), type = "response")
curve <- data.frame(xseq, yseq)
p1 <- p1 + geom_line(data = curve, aes(x = xseq, y = yseq))
p1
We can obtain the mean and standard deviation of the cumulative normal using the coefficients of the model
(m <- -coef(model)[[1]]/coef(model)[[2]]) #The brakets are to show the result
## [1] 2.402
(std <- 1/coef(model)[[2]])
## [1] 1.067
I calculate the intensity that produces 80% correct responses as follow
p <- 0.8
(thre <- qnorm((p - 0.5)/0.5, m, std)) #( p-.5)/.5 because the asymptote is .5
## [1] 2.673
threshold <- data.frame(p, thre)
p1 <- p1 + geom_linerange(data = threshold, aes(x = thre, ymin = 0.5, ymax = p))
p1
Another way to obtain the threshold is using the function threshold_slope from the library 'modelfree'. To calculate the threshold (and the slope), it takes the whole psychometric function
library("modelfree")
threshold_slope(yseq, xseq, 0.8)
## $x_th
## [1] 2.673
##
## $slope
## 536
## 0.1811
I calculate the confidence intervals using bootstrap. First, I create a 100 fake data frames.
library("plyr")
sampling <- function(df) {
n <- length(dat$intensity)
size <- dat$nCorrect + dat$nIncorrect
intensity <- dat$intensity
nCorrect <- rbinom(n, size, prob = dat$p)
nIncorrect <- size - nCorrect
data.frame(intensity, nCorrect, nIncorrect)
}
fake.dat <- ddply(data.frame(sample = 1:100), .(sample), sampling)
Then I fit a model for every fake data frame and calculate the threshold
calculate.thre <- function(df) {
model <- glm(cbind(df$nCorrect, df$nIncorrect) ~ df$intensity, family = binomial(mafc.probit(2)))
m <- -coef(model)[[1]]/coef(model)[[2]]
std <- 1/coef(model)[[2]]
p <- 0.8
thre <- qnorm((p - 0.5)/0.5, m, std)
data.frame(thre)
}
fake.thresholds <- ddply(fake.dat, .(sample), calculate.thre)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: algorithm did not converge
and calculate the confidence 95% intervals
ci <- quantile(fake.thresholds$thre, c(0.025, 0.975))
ci.df <- data.frame(p, rbind(ci))
p1 + geom_errorbarh(data = ci.df, aes(xmax = X97.5., xmin = X2.5.), height = 0.01)
Sometimes the fake data have very extreme values, so glm cannot fit properly the data and warnings or errors occurs. To avoid those cases I used the function tryCatch.
calculate.threshold <- function(df) {
tryCatch({
model <- glm(cbind(df$nCorrect, df$nIncorrect) ~ df$intensity, family = binomial(mafc.probit(2)))
m <- -coef(model)[[1]]/coef(model)[[2]]
std <- 1/coef(model)[[2]]
p <- 0.8
thre <- qnorm((p - 0.5)/0.5, m, std)
data.frame(thre)
}, error = function(e) message(paste("Fitting error in sample: ", unique(df$sample),
"\n")), warning = function(w) message(paste("Fitting warning in sample: ",
unique(df$sample), "\n")))
}
fake.thresholds <- ddply(fake.dat, .(sample), calculate.threshold)
## Fitting warning in sample: 27
## Fitting warning in sample: 80
If we have several conditions, the approach is very similar to what I did for calculating the bootstrap confidence intervals.
First, let's create a fake data frame with several conditions and plot the data
conditions <- expand.grid(condition1 = c("a", "b"), condition2 = c(1, 2, 3))
generate.values <- function(df) {
size <- 60
intensity <- c(1, 2, 3, 4)
nCorrect <- rbinom(4, size, prob = c(0.6, 0.7, 0.8, 0.9))
nIncorrect <- size - nCorrect
p <- nCorrect/(nCorrect + nIncorrect)
data.frame(intensity, nCorrect, nIncorrect, p)
}
dat <- ddply(conditions, .(condition1, condition2), generate.values)
p1 <- ggplot(data = dat, aes(x = intensity, y = p, color = condition1)) + facet_grid(. ~
condition2) + geom_point()
p1
I fit a model for each condition
fitting <- function(df) {
model <- glm(cbind(df$nCorrect, df$nIncorrect) ~ intensity, family = binomial(mafc.probit(2)))
xseq <- seq(0, 5, len = 1000)
yseq <- predict(model, data.frame(intensity = xseq), type = "response")
data.frame(xseq, yseq)
}
curves <- ddply(dat, .(condition1, condition2), fitting)
p1 <- p1 + geom_line(data = curves, aes(x = xseq, y = yseq, color = condition1))
p1
I calculate the thresholds
threshold <- function(df) {
thre <- threshold_slope(df$yseq, df$xseq, 0.8)$x_th
data.frame(p, thre)
}
thresholds <- ddply(curves, .(condition1, condition2), threshold)
p1 <- p1 + geom_linerange(data = thresholds, aes(x = thre, ymin = 0.5, ymax = p,
color = condition1))
p1
and the confidence intervals by bootstraping
sampling <- function(x) {
funct <- function(df) {
n <- length(df$intensity)
size <- df$nCorrect + df$nIncorrect
intensity <- df$intensity
nCorrect <- rbinom(n, size, prob = df$p)
nIncorrect <- size - nCorrect
data.frame(intensity, nCorrect, nIncorrect)
}
ddply(dat, .(condition1, condition2), funct)
}
fake.dat <- ddply(data.frame(sample = 1:100), .(sample), sampling)
calculate.thre <- function(df) {
tryCatch({
model <- glm(cbind(df$nCorrect, df$nIncorrect) ~ df$intensity, family = binomial(mafc.probit(2)))
m <- -coef(model)[[1]]/coef(model)[[2]]
std <- 1/coef(model)[[2]]
p <- 0.8
thre <- qnorm((p - 0.5)/0.5, m, std)
data.frame(thre)
}, error = function(e) message(paste("Fitting error in sample: ", unique(df$sample),
"\n")), warning = function(w) message(paste("Fitting warning in sample: ",
unique(df$sample), "\n")))
}
fake.thresholds <- ddply(fake.dat, .(sample, condition1, condition2), calculate.thre)
## Fitting warning in sample: 16
## Fitting warning in sample: 27
## Fitting warning in sample: 28
## Fitting warning in sample: 53
## Fitting warning in sample: 56
## Fitting warning in sample: 68
## Fitting warning in sample: 81
calculate.ci <- function(df) {
conf.int <- quantile(df$thre, c(0.025, 0.975))
inf <- conf.int[[1]]
sup <- conf.int[[2]]
data.frame(inf, sup)
}
cis <- ddply(fake.thresholds, .(condition1, condition2), calculate.ci)
p2 <- ggplot(data = thresholds, aes(x = condition1, y = thre, fill = factor(condition2))) +
geom_bar(position = "dodge", stat = "identity") + geom_errorbar(data = cis,
aes(x = condition1, ymin = inf, ymax = sup, fill = factor(condition2)),
position = "dodge", stat = "identity")
p2