Quality Rating - Compute Inter-rater Agreement

Load data

# load library
library(irr)  # agreement
library(ggplot2)  # plots
library(plyr)  # ddply
library(psych)  # correlation

# set wd setwd('~/src/r/quality-rating')

# load data
qr <- read.csv("ratings.csv")

qr$note <- as.factor(qr$note)
qr$course <- as.factor(qr$course)
qr$week <- as.factor(qr$week)

Idea Development

Compute agreement

# prepare data
var <- na.omit(qr[, c("id1", "id2", "rater1", "rater2")])
var$rater1 <- as.character(var$rater1)
var$rater2 <- as.character(var$rater2)
var.t <- as.matrix(t(var[, c("id1", "id2")]))

# compute alpha
kripp.alpha(var.t, "ordinal")

##  Krippendorff's alpha
## 
##  Subjects = 702 
##    Raters = 2 
##     alpha = 0.366

kripp.alpha(var.t, "interval")

##  Krippendorff's alpha
## 
##  Subjects = 702 
##    Raters = 2 
##     alpha = 0.393

Check disagreement

diff <- abs(var$id1 - var$id2)
# summary of difference
summary(diff)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    1.00    2.00    1.89    3.00    6.00

# plot it
qplot(diff)

plot of chunk unnamed-chunk-3

Disagrement within pairs of raters

# merge pairs in different orders, e.g. 'rater 1 - 2' with 'rater 2 - 1'
to.order <- var[var$rater1 > var$rater2, ]
var[var$rater1 > var$rater2, ]$rater1 <- to.order$rater2
var[var$rater1 == var$rater2, ]$rater2 <- to.order$rater1

# summarize difference
diff.pairs <- ddply(var, .(rater1, rater2), summarise, diff = mean(abs(id1 - 
    id2)), diff.sd = sd(abs(id1 - id2)))
diff.pairs

##    rater1  rater2  diff diff.sd
## 1 rater 1 rater 2 2.246   1.638
## 2 rater 1 rater 3 2.173   1.367
## 3 rater 1 rater 4 2.009   1.417
## 4 rater 2 rater 3 1.569   1.280
## 5 rater 2 rater 4 1.800   1.340
## 6 rater 3 rater 4 1.556   1.128


# plot
diff.pairs$id <- paste(diff.pairs$rater1, diff.pairs$rater2, sep = " - ")
ggplot(diff.pairs, aes(x = id, y = diff, fill = id)) + geom_bar(stat = "identity") + 
    geom_errorbar(aes(ymin = diff - diff.sd, ymax = diff + diff.sd), width = 0.1) + 
    xlab("Rater pairs") + ylab("Difference") + guides(fill = FALSE)

plot of chunk unnamed-chunk-4

Constructive criticism

Compute agreement

# prepare data
var <- na.omit(qr[, c("cc1", "cc2", "rater1", "rater2")])
var$rater1 <- as.character(var$rater1)
var$rater2 <- as.character(var$rater2)
var.t <- as.matrix(t(var[, c("cc1", "cc2")]))

# compute alpha
kripp.alpha(var.t, "ordinal")

##  Krippendorff's alpha
## 
##  Subjects = 702 
##    Raters = 2 
##     alpha = -0.0117

kripp.alpha(var.t, "interval")

##  Krippendorff's alpha
## 
##  Subjects = 702 
##    Raters = 2 
##     alpha = 0.0858

Check disagreement

diff <- abs(var$cc1 - var$cc2)
# summary of difference
summary(diff)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    1.00    1.00    1.82    3.00    9.00

# plot it
qplot(diff)

plot of chunk unnamed-chunk-6

Disagrement within pairs of raters

# merge pairs in different orders, e.g. 'rater 1 - 2' with 'rater 2 - 1'
to.order <- var[var$rater1 > var$rater2, ]
var[var$rater1 > var$rater2, ]$rater1 <- to.order$rater2
var[var$rater1 == var$rater2, ]$rater2 <- to.order$rater1

# summarize difference
diff.pairs <- ddply(var, .(rater1, rater2), summarise, diff = mean(abs(cc1 - 
    cc2)), diff.sd = sd(abs(cc1 - cc2)))
diff.pairs

##    rater1  rater2   diff diff.sd
## 1 rater 1 rater 2 1.7538   1.685
## 2 rater 1 rater 3 0.7545   1.235
## 3 rater 1 rater 4 1.6068   1.252
## 4 rater 2 rater 3 2.0603   1.899
## 5 rater 2 rater 4 3.2952   2.112
## 6 rater 3 rater 4 1.5484   1.205


# plot
diff.pairs$id <- paste(diff.pairs$rater1, diff.pairs$rater2, sep = " - ")
ggplot(diff.pairs, aes(x = id, y = diff, fill = id)) + geom_bar(stat = "identity") + 
    geom_errorbar(aes(ymin = diff - diff.sd, ymax = diff + diff.sd), width = 0.1) + 
    xlab("Rater pairs") + ylab("Difference") + guides(fill = FALSE)

plot of chunk unnamed-chunk-7

Authoritative source information

Compute agreement

# prepare data
var <- na.omit(qr[, c("asi1", "asi2", "rater1", "rater2")])
var$rater1 <- as.character(var$rater1)
var$rater2 <- as.character(var$rater2)
var.t <- as.matrix(t(var[, c("asi1", "asi2")]))

# compute alpha
kripp.alpha(var.t, "ordinal")

##  Krippendorff's alpha
## 
##  Subjects = 702 
##    Raters = 2 
##     alpha = 0.203

kripp.alpha(var.t, "interval")

##  Krippendorff's alpha
## 
##  Subjects = 702 
##    Raters = 2 
##     alpha = 0.362

Check disagreement

diff <- abs(var$asi1 - var$asi2)
# summary of difference
summary(diff)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    1.00    1.00    1.82    3.00    9.00

# plot it
qplot(diff)

plot of chunk unnamed-chunk-9

Disagrement within pairs of raters

# merge pairs in different orders, e.g. 'rater 1 - 2' with 'rater 2 - 1'
to.order <- var[var$rater1 > var$rater2, ]
var[var$rater1 > var$rater2, ]$rater1 <- to.order$rater2
var[var$rater1 == var$rater2, ]$rater2 <- to.order$rater1

# summarize difference
diff.pairs <- ddply(var, .(rater1, rater2), summarise, diff = mean(abs(asi1 - 
    asi2)), diff.sd = sd(abs(asi1 - asi2)))
diff.pairs

##    rater1  rater2   diff diff.sd
## 1 rater 1 rater 2 0.9538   1.187
## 2 rater 1 rater 3 1.1727   1.360
## 3 rater 1 rater 4 1.8547   1.248
## 4 rater 2 rater 3 1.5603   1.470
## 5 rater 2 rater 4 2.4857   1.846
## 6 rater 3 rater 4 2.9274   1.947


# plot
diff.pairs$id <- paste(diff.pairs$rater1, diff.pairs$rater2, sep = " - ")
ggplot(diff.pairs, aes(x = id, y = diff, fill = id)) + geom_bar(stat = "identity") + 
    geom_errorbar(aes(ymin = diff - diff.sd, ymax = diff + diff.sd), width = 0.1) + 
    xlab("Rater pairs") + ylab("Difference") + guides(fill = FALSE)

plot of chunk unnamed-chunk-10

Promising ideas

Compute agreement

# prepare data
var <- na.omit(qr[, c("pi1", "pi2", "rater1", "rater2")])
var$rater1 <- as.character(var$rater1)
var$rater2 <- as.character(var$rater2)
var.t <- as.matrix(t(var[, c("pi1", "pi2")]))

# compute alpha
kripp.alpha(var.t, "ordinal")

##  Krippendorff's alpha
## 
##  Subjects = 702 
##    Raters = 2 
##     alpha = 0.162

kripp.alpha(var.t, "interval")

##  Krippendorff's alpha
## 
##  Subjects = 702 
##    Raters = 2 
##     alpha = 0.17

Check disagreement

# rating difference
diff <- abs(var$pi1 - var$pi2)
# summary of difference
summary(diff)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    1.00    2.00    2.46    4.00    8.00

# plot it
qplot(diff)

plot of chunk unnamed-chunk-12

Disagrement within pairs of raters

# merge pairs in different orders, e.g. 'rater 1 - 2' with 'rater 2 - 1'
to.order <- var[var$rater1 > var$rater2, ]
var[var$rater1 > var$rater2, ]$rater1 <- to.order$rater2
var[var$rater1 == var$rater2, ]$rater2 <- to.order$rater1

# summarize difference
diff.pairs <- ddply(var, .(rater1, rater2), summarise, diff = mean(abs(pi1 - 
    pi2)), diff.sd = sd(abs(pi1 - pi2)))
diff.pairs

##    rater1  rater2  diff diff.sd
## 1 rater 1 rater 2 2.769   1.866
## 2 rater 1 rater 3 2.482   1.325
## 3 rater 1 rater 4 1.547   1.256
## 4 rater 2 rater 3 1.655   1.313
## 5 rater 2 rater 4 3.200   1.963
## 6 rater 3 rater 4 3.129   1.803


# plot
diff.pairs$id <- paste(diff.pairs$rater1, diff.pairs$rater2, sep = " - ")
ggplot(diff.pairs, aes(x = id, y = diff, fill = id)) + geom_bar(stat = "identity") + 
    geom_errorbar(aes(ymin = diff - diff.sd, ymax = diff + diff.sd), width = 0.1) + 
    xlab("Rater pairs") + ylab("Difference") + guides(fill = FALSE)

plot of chunk unnamed-chunk-13

Correlations

It is of interest to check whether quality rating scores are correlated with each other. Because we don't have a good way to deal with low inter-rater agreement at this moment, average scores are used at this moment.

# use average score
qr$id <- (qr$id1 + qr$id2)/2
qr$cc <- (qr$cc1 + qr$cc2)/2
qr$asi <- (qr$asi1 + qr$asi2)/2
qr$pi <- (qr$pi1 + qr$pi2)/2
scores <- data.frame(qr$id, qr$cc, qr$asi, qr$pi)

# compute correlations
corr <- corr.test(na.omit(scores))
corr

## Call:corr.test(x = na.omit(scores))
## Correlation matrix 
##        qr.id qr.cc qr.asi qr.pi
## qr.id   1.00  0.20   0.51  0.81
## qr.cc   0.20  1.00   0.23  0.35
## qr.asi  0.51  0.23   1.00  0.53
## qr.pi   0.81  0.35   0.53  1.00
## Sample Size 
##        qr.id qr.cc qr.asi qr.pi
## qr.id    702   702    702   702
## qr.cc    702   702    702   702
## qr.asi   702   702    702   702
## qr.pi    702   702    702   702
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##        qr.id qr.cc qr.asi qr.pi
## qr.id      0     0      0     0
## qr.cc      0     0      0     0
## qr.asi     0     0      0     0
## qr.pi      0     0      0     0

Results show all quality rating variables are significantly correlated. In particular, promisingness is significantly correlated with idea development (r = 0.807, p = 0), authoritative source integration (r = 0.5337, p = 0), and constructive criticism (r = 0.3545, p = 0). One correlation that is higher than .5 is between idea development and authoritative source integration (r = 0.5123, p = 0).

The significant correlations could be attributed to fairly large sample size. Let's plot these scores to see how they scatter. Please note I use alpha to the following plots, making more densely packed points darker while other points with less occurrence lighter.

# plot pi vs. id
qplot(qr$pi, qr$id, geom = c("point", "smooth"), alpha = I(1/10), xlab = "Promising ideas", 
    ylab = "Idea development")

plot of chunk corr-plot

# plot pi vs. asi
qplot(qr$pi, qr$asi, geom = c("point", "smooth"), alpha = I(1/10), xlab = "Promising ideas", 
    ylab = "Authoritative source")

plot of chunk corr-plot

# plot pi vs. cc
qplot(qr$pi, qr$cc, geom = c("point", "smooth"), alpha = I(1/10), xlab = "Promising ideas", 
    ylab = "Constructive criticism")

plot of chunk corr-plot

# plot id vs. asi
qplot(qr$id, qr$asi, geom = c("point", "smooth"), alpha = I(1/10), xlab = "Idea development", 
    ylab = "Authoritative source")

plot of chunk corr-plot

I realized three variables—idea improvement, promisingness, and authoritative sources—had close correlations with each other. So I am going to plot them in 3D.

library(scatterplot3d)
s3d <- scatterplot3d(qr$id, qr$pi, qr$asi, pch = 1, highlight.3d = TRUE, col.grid = "lightblue", 
    main = "3D Scatterplot", xlab = "Idea development", ylab = "Promising Ideas", 
    zlab = "Authoritative source")
fit <- lm(asi ~ id + pi, data = qr)
s3d$plane3d(fit, col = "blue")

plot of chunk corr-plot3d