# load library
library(irr) # agreement
library(ggplot2) # plots
library(plyr) # ddply
library(psych) # correlation
# set wd setwd('~/src/r/quality-rating')
# load data
qr <- read.csv("ratings.csv")
qr$note <- as.factor(qr$note)
qr$course <- as.factor(qr$course)
qr$week <- as.factor(qr$week)
# prepare data
var <- na.omit(qr[, c("id1", "id2", "rater1", "rater2")])
var$rater1 <- as.character(var$rater1)
var$rater2 <- as.character(var$rater2)
var.t <- as.matrix(t(var[, c("id1", "id2")]))
# compute alpha
kripp.alpha(var.t, "ordinal")
## Krippendorff's alpha
##
## Subjects = 702
## Raters = 2
## alpha = 0.366
kripp.alpha(var.t, "interval")
## Krippendorff's alpha
##
## Subjects = 702
## Raters = 2
## alpha = 0.393
diff <- abs(var$id1 - var$id2)
# summary of difference
summary(diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 1.00 2.00 1.89 3.00 6.00
# plot it
qplot(diff)
Disagrement within pairs of raters
# merge pairs in different orders, e.g. 'rater 1 - 2' with 'rater 2 - 1'
to.order <- var[var$rater1 > var$rater2, ]
var[var$rater1 > var$rater2, ]$rater1 <- to.order$rater2
var[var$rater1 == var$rater2, ]$rater2 <- to.order$rater1
# summarize difference
diff.pairs <- ddply(var, .(rater1, rater2), summarise, diff = mean(abs(id1 -
id2)), diff.sd = sd(abs(id1 - id2)))
diff.pairs
## rater1 rater2 diff diff.sd
## 1 rater 1 rater 2 2.246 1.638
## 2 rater 1 rater 3 2.173 1.367
## 3 rater 1 rater 4 2.009 1.417
## 4 rater 2 rater 3 1.569 1.280
## 5 rater 2 rater 4 1.800 1.340
## 6 rater 3 rater 4 1.556 1.128
# plot
diff.pairs$id <- paste(diff.pairs$rater1, diff.pairs$rater2, sep = " - ")
ggplot(diff.pairs, aes(x = id, y = diff, fill = id)) + geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = diff - diff.sd, ymax = diff + diff.sd), width = 0.1) +
xlab("Rater pairs") + ylab("Difference") + guides(fill = FALSE)
# prepare data
var <- na.omit(qr[, c("cc1", "cc2", "rater1", "rater2")])
var$rater1 <- as.character(var$rater1)
var$rater2 <- as.character(var$rater2)
var.t <- as.matrix(t(var[, c("cc1", "cc2")]))
# compute alpha
kripp.alpha(var.t, "ordinal")
## Krippendorff's alpha
##
## Subjects = 702
## Raters = 2
## alpha = -0.0117
kripp.alpha(var.t, "interval")
## Krippendorff's alpha
##
## Subjects = 702
## Raters = 2
## alpha = 0.0858
diff <- abs(var$cc1 - var$cc2)
# summary of difference
summary(diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 1.00 1.00 1.82 3.00 9.00
# plot it
qplot(diff)
Disagrement within pairs of raters
# merge pairs in different orders, e.g. 'rater 1 - 2' with 'rater 2 - 1'
to.order <- var[var$rater1 > var$rater2, ]
var[var$rater1 > var$rater2, ]$rater1 <- to.order$rater2
var[var$rater1 == var$rater2, ]$rater2 <- to.order$rater1
# summarize difference
diff.pairs <- ddply(var, .(rater1, rater2), summarise, diff = mean(abs(cc1 -
cc2)), diff.sd = sd(abs(cc1 - cc2)))
diff.pairs
## rater1 rater2 diff diff.sd
## 1 rater 1 rater 2 1.7538 1.685
## 2 rater 1 rater 3 0.7545 1.235
## 3 rater 1 rater 4 1.6068 1.252
## 4 rater 2 rater 3 2.0603 1.899
## 5 rater 2 rater 4 3.2952 2.112
## 6 rater 3 rater 4 1.5484 1.205
# plot
diff.pairs$id <- paste(diff.pairs$rater1, diff.pairs$rater2, sep = " - ")
ggplot(diff.pairs, aes(x = id, y = diff, fill = id)) + geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = diff - diff.sd, ymax = diff + diff.sd), width = 0.1) +
xlab("Rater pairs") + ylab("Difference") + guides(fill = FALSE)
# prepare data
var <- na.omit(qr[, c("asi1", "asi2", "rater1", "rater2")])
var$rater1 <- as.character(var$rater1)
var$rater2 <- as.character(var$rater2)
var.t <- as.matrix(t(var[, c("asi1", "asi2")]))
# compute alpha
kripp.alpha(var.t, "ordinal")
## Krippendorff's alpha
##
## Subjects = 702
## Raters = 2
## alpha = 0.203
kripp.alpha(var.t, "interval")
## Krippendorff's alpha
##
## Subjects = 702
## Raters = 2
## alpha = 0.362
diff <- abs(var$asi1 - var$asi2)
# summary of difference
summary(diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 1.00 1.00 1.82 3.00 9.00
# plot it
qplot(diff)
Disagrement within pairs of raters
# merge pairs in different orders, e.g. 'rater 1 - 2' with 'rater 2 - 1'
to.order <- var[var$rater1 > var$rater2, ]
var[var$rater1 > var$rater2, ]$rater1 <- to.order$rater2
var[var$rater1 == var$rater2, ]$rater2 <- to.order$rater1
# summarize difference
diff.pairs <- ddply(var, .(rater1, rater2), summarise, diff = mean(abs(asi1 -
asi2)), diff.sd = sd(abs(asi1 - asi2)))
diff.pairs
## rater1 rater2 diff diff.sd
## 1 rater 1 rater 2 0.9538 1.187
## 2 rater 1 rater 3 1.1727 1.360
## 3 rater 1 rater 4 1.8547 1.248
## 4 rater 2 rater 3 1.5603 1.470
## 5 rater 2 rater 4 2.4857 1.846
## 6 rater 3 rater 4 2.9274 1.947
# plot
diff.pairs$id <- paste(diff.pairs$rater1, diff.pairs$rater2, sep = " - ")
ggplot(diff.pairs, aes(x = id, y = diff, fill = id)) + geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = diff - diff.sd, ymax = diff + diff.sd), width = 0.1) +
xlab("Rater pairs") + ylab("Difference") + guides(fill = FALSE)
# prepare data
var <- na.omit(qr[, c("pi1", "pi2", "rater1", "rater2")])
var$rater1 <- as.character(var$rater1)
var$rater2 <- as.character(var$rater2)
var.t <- as.matrix(t(var[, c("pi1", "pi2")]))
# compute alpha
kripp.alpha(var.t, "ordinal")
## Krippendorff's alpha
##
## Subjects = 702
## Raters = 2
## alpha = 0.162
kripp.alpha(var.t, "interval")
## Krippendorff's alpha
##
## Subjects = 702
## Raters = 2
## alpha = 0.17
# rating difference
diff <- abs(var$pi1 - var$pi2)
# summary of difference
summary(diff)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 1.00 2.00 2.46 4.00 8.00
# plot it
qplot(diff)
Disagrement within pairs of raters
# merge pairs in different orders, e.g. 'rater 1 - 2' with 'rater 2 - 1'
to.order <- var[var$rater1 > var$rater2, ]
var[var$rater1 > var$rater2, ]$rater1 <- to.order$rater2
var[var$rater1 == var$rater2, ]$rater2 <- to.order$rater1
# summarize difference
diff.pairs <- ddply(var, .(rater1, rater2), summarise, diff = mean(abs(pi1 -
pi2)), diff.sd = sd(abs(pi1 - pi2)))
diff.pairs
## rater1 rater2 diff diff.sd
## 1 rater 1 rater 2 2.769 1.866
## 2 rater 1 rater 3 2.482 1.325
## 3 rater 1 rater 4 1.547 1.256
## 4 rater 2 rater 3 1.655 1.313
## 5 rater 2 rater 4 3.200 1.963
## 6 rater 3 rater 4 3.129 1.803
# plot
diff.pairs$id <- paste(diff.pairs$rater1, diff.pairs$rater2, sep = " - ")
ggplot(diff.pairs, aes(x = id, y = diff, fill = id)) + geom_bar(stat = "identity") +
geom_errorbar(aes(ymin = diff - diff.sd, ymax = diff + diff.sd), width = 0.1) +
xlab("Rater pairs") + ylab("Difference") + guides(fill = FALSE)
It is of interest to check whether quality rating scores are correlated with each other. Because we don't have a good way to deal with low inter-rater agreement at this moment, average scores are used at this moment.
# use average score
qr$id <- (qr$id1 + qr$id2)/2
qr$cc <- (qr$cc1 + qr$cc2)/2
qr$asi <- (qr$asi1 + qr$asi2)/2
qr$pi <- (qr$pi1 + qr$pi2)/2
scores <- data.frame(qr$id, qr$cc, qr$asi, qr$pi)
# compute correlations
corr <- corr.test(na.omit(scores))
corr
## Call:corr.test(x = na.omit(scores))
## Correlation matrix
## qr.id qr.cc qr.asi qr.pi
## qr.id 1.00 0.20 0.51 0.81
## qr.cc 0.20 1.00 0.23 0.35
## qr.asi 0.51 0.23 1.00 0.53
## qr.pi 0.81 0.35 0.53 1.00
## Sample Size
## qr.id qr.cc qr.asi qr.pi
## qr.id 702 702 702 702
## qr.cc 702 702 702 702
## qr.asi 702 702 702 702
## qr.pi 702 702 702 702
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## qr.id qr.cc qr.asi qr.pi
## qr.id 0 0 0 0
## qr.cc 0 0 0 0
## qr.asi 0 0 0 0
## qr.pi 0 0 0 0
Results show all quality rating variables are significantly correlated. In particular, promisingness is significantly correlated with idea development (r = 0.807, p = 0), authoritative source integration (r = 0.5337, p = 0), and constructive criticism (r = 0.3545, p = 0). One correlation that is higher than .5 is between idea development and authoritative source integration (r = 0.5123, p = 0).
The significant correlations could be attributed to fairly large sample size. Let's plot these scores to see how they scatter. Please note I use alpha to the following plots, making more densely packed points darker while other points with less occurrence lighter.
# plot pi vs. id
qplot(qr$pi, qr$id, geom = c("point", "smooth"), alpha = I(1/10), xlab = "Promising ideas",
ylab = "Idea development")
# plot pi vs. asi
qplot(qr$pi, qr$asi, geom = c("point", "smooth"), alpha = I(1/10), xlab = "Promising ideas",
ylab = "Authoritative source")
# plot pi vs. cc
qplot(qr$pi, qr$cc, geom = c("point", "smooth"), alpha = I(1/10), xlab = "Promising ideas",
ylab = "Constructive criticism")
# plot id vs. asi
qplot(qr$id, qr$asi, geom = c("point", "smooth"), alpha = I(1/10), xlab = "Idea development",
ylab = "Authoritative source")
I realized three variables—idea improvement, promisingness, and authoritative sources—had close correlations with each other. So I am going to plot them in 3D.
library(scatterplot3d)
s3d <- scatterplot3d(qr$id, qr$pi, qr$asi, pch = 1, highlight.3d = TRUE, col.grid = "lightblue",
main = "3D Scatterplot", xlab = "Idea development", ylab = "Promising Ideas",
zlab = "Authoritative source")
fit <- lm(asi ~ id + pi, data = qr)
s3d$plane3d(fit, col = "blue")