Data generation for ROC demo

References

Load packages

library(ggplot2)
library(pROC)

Create data

## Create demonstration dataset
set.seed(20140718)
scoreChangeMeans <- c(35,20,5,-10,-20,-40,-55)
groupSizes       <- c(20,30,40,50,40,30,20)
scoreChange <- unlist(mapply(function(m,g) {rnorm(n = g, mean = m, sd = 30)}, scoreChangeMeans, groupSizes))
anchorCategories <- c("very much worse","much worse","minimally worse","no change","minimally improved","much improved","very much improved")
anchorCat <- rep(anchorCategories,groupSizes)
anchorCat <- factor(anchorCat, levels = anchorCategories, ordered = TRUE)
dat <- data.frame(scoreChange = scoreChange, anchorCat = anchorCat)

## 
head(dat)
##   scoreChange       anchorCat
## 1      68.423 very much worse
## 2      51.959 very much worse
## 3       7.674 very much worse
## 4      74.170 very much worse
## 5      -5.462 very much worse
## 6      19.864 very much worse

Plot

ggplot(data = dat,
       mapping = aes(x = anchorCat, y = scoreChange)) +
           layer(geom = "boxplot") +
           theme_bw() +
           theme(legend.key = element_blank())

plot of chunk unnamed-chunk-4

ROC method

## Drop very much worse and much worse
dat2 <- subset(dat, !anchorCat %in% anchorCategories[1:2])

## Consider much improved and very much improved as meaningful improvement
dat2$clinicallyImproved <- (dat2$anchorCat > "much improved")

## Plot distribution
ggplot(data = dat2,
       mapping = aes(x = scoreChange, color = clinicallyImproved)) +
           layer(geom = "density") +
           theme_bw() +
           theme(legend.key = element_blank())

plot of chunk unnamed-chunk-5

## Create an ROC and find the optimal threshold
roc1 <- roc(response = dat2$clinicallyImproved, predictor = dat2$scoreChange)
plot(roc1, print.thres = TRUE)

plot of chunk unnamed-chunk-5

## 
## Call:
## roc.default(response = dat2$clinicallyImproved, predictor = dat2$scoreChange)
## 
## Data: dat2$scoreChange in 160 controls (dat2$clinicallyImproved FALSE) > 20 cases (dat2$clinicallyImproved TRUE).
## Area under the curve: 0.854