Confident intervals

To find a more rigurous and extensive explanation of what I say here in the book 'Modeling Psychophysical Data in R'.

Let's suppose that the participant needs to decide between 2 orientations values for 4 intensities and that the participant is tested 20 times for each intensity level. It is natural to consider the data arising from a binomial distribution. I create a data frame with some hypothetical data.

intensity <- c(1, 2, 3, 4)
nCorrect <- c(33, 39, 54, 57)
nIncorrect <- c(29, 21, 6, 3)
dat <- data.frame(intensity, nCorrect, nIncorrect)
dat

##   intensity nCorrect nIncorrect
## 1         1       33         29
## 2         2       39         21
## 3         3       54          6
## 4         4       57          3

I add the proportion correct responses

dat$p <- dat$nCorrect/(dat$nCorrect + dat$nIncorrect)

I plot the data using ggplot

library("ggplot2")

p1 <- ggplot(data = dat, aes(x = intensity, y = p)) + geom_point()
p1

plot of chunk points

I fit the data with a cumulative normal curve using a generalized linear model, which in R is computed using the function glm. The standard link function 'probit' corresponds to cumulative normal function that asymptotes to 0. Because in our case the lower asymptote should be 0.5 (chance), I used a special probit link function provided in the library psyphy (mafc.probit(2)). The number 2 indicates that we are dealing with a-alternative forced choice paradigm.

library("psyphy")
model <- glm(cbind(dat$nCorrect, dat$nIncorrect) ~ intensity, family = binomial(mafc.probit(2)))

Notice that I used cbind because glm likes a matrix to enter the responses.

Now, I used the model to generate the psychometric function and add it to the plot

xseq <- seq(0, 5, len = 1000)  #I used, for example, a 1000 points
yseq <- predict(model, data.frame(intensity = xseq), type = "response")
curve <- data.frame(xseq, yseq)
p1 <- p1 + geom_line(data = curve, aes(x = xseq, y = yseq))
p1

plot of chunk curve

We can obtain the mean and standard deviation of the cumulative normal using the coefficients of the model

(m <- -coef(model)[[1]]/coef(model)[[2]])  #The brakets are to show the result

## [1] 2.402

(std <- 1/coef(model)[[2]])

## [1] 1.067

I calculate the intensity that produces 80% correct responses as follow

p <- 0.8
(thre <- qnorm((p - 0.5)/0.5, m, std))  #( p-.5)/.5 because the asymptote is .5

## [1] 2.673

threshold <- data.frame(p, thre)
p1 <- p1 + geom_linerange(data = threshold, aes(x = thre, ymin = 0.5, ymax = p))
p1

plot of chunk threshold

Another way to obtain the threshold is using the function threshold_slope from the library 'modelfree'. To calculate the threshold (and the slope), it takes the whole psychometric function

library("modelfree")
threshold_slope(yseq, xseq, 0.8)

## $x_th
## [1] 2.673
## 
## $slope
##    536 
## 0.1811

Confident intervals

I calculate the confidence intervals using bootstrap. First, I create a 100 fake data frames.

library("plyr")
sampling <- function(df) {
    n <- length(dat$intensity)
    size <- dat$nCorrect + dat$nIncorrect
    intensity <- dat$intensity
    nCorrect <- rbinom(n, size, prob = dat$p)
    nIncorrect <- size - nCorrect
    data.frame(intensity, nCorrect, nIncorrect)
}
fake.dat <- ddply(data.frame(sample = 1:100), .(sample), sampling)

Then I fit a model for every fake data frame and calculate the threshold

calculate.thre <- function(df) {
    model <- glm(cbind(df$nCorrect, df$nIncorrect) ~ df$intensity, family = binomial(mafc.probit(2)))
    m <- -coef(model)[[1]]/coef(model)[[2]]
    std <- 1/coef(model)[[2]]
    p <- 0.8
    thre <- qnorm((p - 0.5)/0.5, m, std)
    data.frame(thre)
}
fake.thresholds <- ddply(fake.dat, .(sample), calculate.thre)

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: algorithm did not converge

and calculate the confidence 95% intervals

ci <- quantile(fake.thresholds$thre, c(0.025, 0.975))
ci.df <- data.frame(p, rbind(ci))
p1 + geom_errorbarh(data = ci.df, aes(xmax = X97.5., xmin = X2.5.), height = 0.01)

plot of chunk ci

Sometimes the fake data have very extreme values, so glm cannot fit properly the data and warnings or errors occurs. To avoid those cases I used the function tryCatch.

calculate.threshold <- function(df) {
    tryCatch({
        model <- glm(cbind(df$nCorrect, df$nIncorrect) ~ df$intensity, family = binomial(mafc.probit(2)))
        m <- -coef(model)[[1]]/coef(model)[[2]]
        std <- 1/coef(model)[[2]]
        p <- 0.8
        thre <- qnorm((p - 0.5)/0.5, m, std)
        data.frame(thre)
    }, error = function(e) message(paste("Fitting error in sample: ", unique(df$sample), 
        "\n")), warning = function(w) message(paste("Fitting warning in sample: ", 
        unique(df$sample), "\n")))
}
fake.thresholds <- ddply(fake.dat, .(sample), calculate.threshold)

## Fitting warning in sample: 27

## Fitting warning in sample: 80

Several conditions

If we have several conditions, the approach is very similar to what I did for calculating the bootstrap confidence intervals.

First, let's create a fake data frame with several conditions and plot the data

conditions <- expand.grid(condition1 = c("a", "b"), condition2 = c(1, 2, 3))
generate.values <- function(df) {
    size <- 60
    intensity <- c(1, 2, 3, 4)
    nCorrect <- rbinom(4, size, prob = c(0.6, 0.7, 0.8, 0.9))
    nIncorrect <- size - nCorrect
    p <- nCorrect/(nCorrect + nIncorrect)
    data.frame(intensity, nCorrect, nIncorrect, p)
}
dat <- ddply(conditions, .(condition1, condition2), generate.values)

p1 <- ggplot(data = dat, aes(x = intensity, y = p, color = condition1)) + facet_grid(. ~ 
    condition2) + geom_point()
p1

plot of chunk points2

I fit a model for each condition

fitting <- function(df) {
    model <- glm(cbind(df$nCorrect, df$nIncorrect) ~ intensity, family = binomial(mafc.probit(2)))
    xseq <- seq(0, 5, len = 1000)
    yseq <- predict(model, data.frame(intensity = xseq), type = "response")
    data.frame(xseq, yseq)
}
curves <- ddply(dat, .(condition1, condition2), fitting)
p1 <- p1 + geom_line(data = curves, aes(x = xseq, y = yseq, color = condition1))
p1

plot of chunk curves

I calculate the thresholds

threshold <- function(df) {
    thre <- threshold_slope(df$yseq, df$xseq, 0.8)$x_th
    data.frame(p, thre)
}
thresholds <- ddply(curves, .(condition1, condition2), threshold)
p1 <- p1 + geom_linerange(data = thresholds, aes(x = thre, ymin = 0.5, ymax = p, 
    color = condition1))
p1

plot of chunk condThreshold

and the confidence intervals by bootstraping

sampling <- function(x) {
    funct <- function(df) {
        n <- length(df$intensity)
        size <- df$nCorrect + df$nIncorrect
        intensity <- df$intensity
        nCorrect <- rbinom(n, size, prob = df$p)
        nIncorrect <- size - nCorrect
        data.frame(intensity, nCorrect, nIncorrect)
    }
    ddply(dat, .(condition1, condition2), funct)
}
fake.dat <- ddply(data.frame(sample = 1:100), .(sample), sampling)

calculate.thre <- function(df) {
    tryCatch({
        model <- glm(cbind(df$nCorrect, df$nIncorrect) ~ df$intensity, family = binomial(mafc.probit(2)))
        m <- -coef(model)[[1]]/coef(model)[[2]]
        std <- 1/coef(model)[[2]]
        p <- 0.8
        thre <- qnorm((p - 0.5)/0.5, m, std)
        data.frame(thre)
    }, error = function(e) message(paste("Fitting error in sample: ", unique(df$sample), 
        "\n")), warning = function(w) message(paste("Fitting warning in sample: ", 
        unique(df$sample), "\n")))
}
fake.thresholds <- ddply(fake.dat, .(sample, condition1, condition2), calculate.thre)

## Fitting warning in sample: 16

## Fitting warning in sample: 27

## Fitting warning in sample: 28

## Fitting warning in sample: 53

## Fitting warning in sample: 56

## Fitting warning in sample: 68

## Fitting warning in sample: 81


calculate.ci <- function(df) {
    conf.int <- quantile(df$thre, c(0.025, 0.975))
    inf <- conf.int[[1]]
    sup <- conf.int[[2]]
    data.frame(inf, sup)
}
cis <- ddply(fake.thresholds, .(condition1, condition2), calculate.ci)

p2 <- ggplot(data = thresholds, aes(x = condition1, y = thre, fill = factor(condition2))) + 
    geom_bar(position = "dodge", stat = "identity") + geom_errorbar(data = cis, 
    aes(x = condition1, ymin = inf, ymax = sup, fill = factor(condition2)), 
    position = "dodge", stat = "identity")
p2

plot of chunk cis