library(ggplot2)
library(pROC)
## Create demonstration dataset
set.seed(20140718)
scoreChangeMeans <- c(35,20,5,-10,-20,-40,-55)
groupSizes <- c(20,30,40,50,40,30,20)
scoreChange <- unlist(mapply(function(m,g) {rnorm(n = g, mean = m, sd = 30)}, scoreChangeMeans, groupSizes))
anchorCategories <- c("very much worse","much worse","minimally worse","no change","minimally improved","much improved","very much improved")
anchorCat <- rep(anchorCategories,groupSizes)
anchorCat <- factor(anchorCat, levels = anchorCategories, ordered = TRUE)
dat <- data.frame(scoreChange = scoreChange, anchorCat = anchorCat)
##
head(dat)
## scoreChange anchorCat
## 1 68.423 very much worse
## 2 51.959 very much worse
## 3 7.674 very much worse
## 4 74.170 very much worse
## 5 -5.462 very much worse
## 6 19.864 very much worse
ggplot(data = dat,
mapping = aes(x = anchorCat, y = scoreChange)) +
layer(geom = "boxplot") +
theme_bw() +
theme(legend.key = element_blank())
## Drop very much worse and much worse
dat2 <- subset(dat, !anchorCat %in% anchorCategories[1:2])
## Consider much improved and very much improved as meaningful improvement
dat2$clinicallyImproved <- (dat2$anchorCat > "much improved")
## Plot distribution
ggplot(data = dat2,
mapping = aes(x = scoreChange, color = clinicallyImproved)) +
layer(geom = "density") +
theme_bw() +
theme(legend.key = element_blank())
## Create an ROC and find the optimal threshold
roc1 <- roc(response = dat2$clinicallyImproved, predictor = dat2$scoreChange)
plot(roc1, print.thres = TRUE)
##
## Call:
## roc.default(response = dat2$clinicallyImproved, predictor = dat2$scoreChange)
##
## Data: dat2$scoreChange in 160 controls (dat2$clinicallyImproved FALSE) > 20 cases (dat2$clinicallyImproved TRUE).
## Area under the curve: 0.854