# Load in needed packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(irr)
## Loading required package: lpSolve
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
# Data cleaning
# Load in data
raw <- read.csv("Kayla Thesis Data Collection_February 26, 2026_09.55.csv", skip = 2, header = FALSE)
headers <- read.csv("Kayla Thesis Data Collection_February 26, 2026_09.55.csv", nrows = 1)
colnames(raw) <- colnames(headers)
# Keep only relevant columns
data <- raw %>%
rename(
rater = X0.0,
videoID = X1.0
) %>%
select(rater, videoID, starts_with("X"))
# Split by rater
rater1 <- data %>% filter(grepl("Kayla", rater, ignore.case = TRUE))
rater2 <- data %>% filter(grepl("Connor", rater, ignore.case = TRUE))
rater1 <- rater1 %>% distinct(videoID, .keep_all = TRUE)
rater2 <- rater2 %>% distinct(videoID, .keep_all = TRUE)
colnames(rater1) <- ifelse(colnames(rater1) == "videoID",
"videoID",
paste0(colnames(rater1), "_r1"))
colnames(rater2) <- ifelse(colnames(rater2) == "videoID",
"videoID",
paste0(colnames(rater2), "_r2"))
# Merge by video
merged <- inner_join(rater1, rater2, by = "videoID")
# Select columns to code
code_cols <- c("X1.1", "X1.2", "X1.3", "X1.4", "X2.1", "X2.2", "X2.3", "X2.3b", "X2.4", "X3.1", "X3.2", "X3.3", "X3.3b","X3.4", "X3.5", "X3.6", "X3.7", "X3.8", "X3.9","X3.10", "X3.11", "X3.11b_1", "X3.11b_2", "X3.12", "X3.13", "X3.14", "X3.15", "X3.16", "X3.17", "X3.18", "X3.19", "X3.20", "X3.21", "X3.22")
results <- map_dfr(code_cols, function(col) {
r1 <- as.numeric(merged[[paste0(col, "_r1")]])
r2 <- as.numeric(merged[[paste0(col, "_r2")]])
valid <- complete.cases(r1, r2)
k <- kappa2(data.frame(r1[valid], r2[valid]))
pct <- sum(r1[valid] == r2[valid]) / sum(valid)
tibble(
variable = col,
kappa = round(k$value, 3),
p_value = round(k$p.value, 4),
pct_agreement = round(pct * 100, 1),
n = sum(valid)
)
})
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
results %>%
kable(digits = 3, caption = "Interrater Reliability Results") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Interrater Reliability Results
|
variable
|
kappa
|
p_value
|
pct_agreement
|
n
|
|
X1.1
|
0.656
|
0.000
|
97.8
|
45
|
|
X1.2
|
0.382
|
0.000
|
57.8
|
45
|
|
X1.3
|
0.857
|
0.000
|
91.1
|
45
|
|
X1.4
|
0.605
|
0.000
|
68.9
|
45
|
|
X2.1
|
0.666
|
0.000
|
79.5
|
44
|
|
X2.2
|
0.659
|
0.000
|
87.2
|
39
|
|
X2.3
|
0.848
|
0.000
|
97.8
|
45
|
|
X2.3b
|
1.000
|
0.083
|
100.0
|
3
|
|
X2.4
|
0.770
|
0.000
|
91.1
|
45
|
|
X3.1
|
0.800
|
0.000
|
91.1
|
45
|
|
X3.2
|
0.897
|
0.000
|
97.8
|
45
|
|
X3.3
|
0.800
|
0.000
|
93.3
|
45
|
|
X3.3b
|
1.000
|
0.014
|
100.0
|
6
|
|
X3.4
|
1.000
|
0.000
|
100.0
|
45
|
|
X3.5
|
NaN
|
NaN
|
100.0
|
45
|
|
X3.6
|
0.760
|
0.000
|
93.3
|
45
|
|
X3.7
|
0.732
|
0.000
|
86.7
|
45
|
|
X3.8
|
0.808
|
0.000
|
95.6
|
45
|
|
X3.9
|
1.000
|
0.000
|
100.0
|
45
|
|
X3.10
|
1.000
|
0.000
|
100.0
|
45
|
|
X3.11
|
0.945
|
0.000
|
97.8
|
45
|
|
X3.11b_1
|
0.438
|
0.067
|
75.0
|
12
|
|
X3.11b_2
|
0.676
|
0.013
|
83.3
|
12
|
|
X3.12
|
0.909
|
0.000
|
95.6
|
45
|
|
X3.13
|
0.646
|
0.000
|
95.6
|
45
|
|
X3.14
|
0.877
|
0.000
|
97.8
|
45
|
|
X3.15
|
0.773
|
0.000
|
88.9
|
45
|
|
X3.16
|
-0.031
|
0.827
|
93.3
|
45
|
|
X3.17
|
0.897
|
0.000
|
97.8
|
45
|
|
X3.18
|
0.607
|
0.000
|
88.6
|
44
|
|
X3.19
|
0.500
|
0.000
|
86.7
|
45
|
|
X3.20
|
0.137
|
0.354
|
84.4
|
45
|
|
X3.21
|
0.672
|
0.000
|
88.9
|
45
|
|
X3.22
|
0.910
|
0.000
|
97.8
|
45
|
# Overall summary
cat("Mean Kappa:", round(mean(results$kappa, na.rm = TRUE), 3), "\n")
## Mean Kappa: 0.733
cat("Range:", round(min(results$kappa, na.rm = TRUE), 3),
"-", round(max(results$kappa, na.rm = TRUE), 3), "\n")
## Range: -0.031 - 1
cat("% variables with κ ≥ .80:",
round(mean(results$kappa >= .80, na.rm = TRUE) * 100, 1), "%\n")
## % variables with κ ≥ .80: 48.5 %