# Load in needed packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(irr)
## Loading required package: lpSolve
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# Data cleaning

# Load in data
raw <- read.csv("Kayla Thesis Data Collection_February 26, 2026_09.55.csv", skip = 2, header = FALSE)
headers <- read.csv("Kayla Thesis Data Collection_February 26, 2026_09.55.csv", nrows = 1)
colnames(raw) <- colnames(headers)

# Keep only relevant columns
data <- raw %>%
  rename(
    rater   = X0.0,
    videoID = X1.0 
  ) %>%
  select(rater, videoID, starts_with("X"))

# Split by rater
rater1 <- data %>% filter(grepl("Kayla", rater, ignore.case = TRUE))
rater2 <- data %>% filter(grepl("Connor", rater, ignore.case = TRUE))

rater1 <- rater1 %>% distinct(videoID, .keep_all = TRUE)
rater2 <- rater2 %>% distinct(videoID, .keep_all = TRUE)

colnames(rater1) <- ifelse(colnames(rater1) == "videoID", 
                           "videoID", 
                           paste0(colnames(rater1), "_r1"))

colnames(rater2) <- ifelse(colnames(rater2) == "videoID", 
                           "videoID", 
                           paste0(colnames(rater2), "_r2"))

# Merge by video
merged <- inner_join(rater1, rater2, by = "videoID")

# Select columns to code
code_cols <- c("X1.1", "X1.2", "X1.3", "X1.4", "X2.1", "X2.2", "X2.3", "X2.3b", "X2.4", "X3.1", "X3.2", "X3.3", "X3.3b","X3.4", "X3.5", "X3.6", "X3.7", "X3.8", "X3.9","X3.10", "X3.11", "X3.11b_1", "X3.11b_2", "X3.12", "X3.13", "X3.14", "X3.15", "X3.16", "X3.17", "X3.18", "X3.19", "X3.20", "X3.21", "X3.22")
results <- map_dfr(code_cols, function(col) {
  r1 <- as.numeric(merged[[paste0(col, "_r1")]])
  r2 <- as.numeric(merged[[paste0(col, "_r2")]])

valid <- complete.cases(r1, r2)

k   <- kappa2(data.frame(r1[valid], r2[valid]))
  pct <- sum(r1[valid] == r2[valid]) / sum(valid)
  
  tibble(
    variable       = col,
    kappa          = round(k$value, 3),
    p_value        = round(k$p.value, 4),
    pct_agreement  = round(pct * 100, 1),
    n              = sum(valid)
  )
})
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
## Warning in .f(.x[[i]], ...): NAs introduced by coercion
results %>%
  kable(digits = 3, caption = "Interrater Reliability Results") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Interrater Reliability Results
variable kappa p_value pct_agreement n
X1.1 0.656 0.000 97.8 45
X1.2 0.382 0.000 57.8 45
X1.3 0.857 0.000 91.1 45
X1.4 0.605 0.000 68.9 45
X2.1 0.666 0.000 79.5 44
X2.2 0.659 0.000 87.2 39
X2.3 0.848 0.000 97.8 45
X2.3b 1.000 0.083 100.0 3
X2.4 0.770 0.000 91.1 45
X3.1 0.800 0.000 91.1 45
X3.2 0.897 0.000 97.8 45
X3.3 0.800 0.000 93.3 45
X3.3b 1.000 0.014 100.0 6
X3.4 1.000 0.000 100.0 45
X3.5 NaN NaN 100.0 45
X3.6 0.760 0.000 93.3 45
X3.7 0.732 0.000 86.7 45
X3.8 0.808 0.000 95.6 45
X3.9 1.000 0.000 100.0 45
X3.10 1.000 0.000 100.0 45
X3.11 0.945 0.000 97.8 45
X3.11b_1 0.438 0.067 75.0 12
X3.11b_2 0.676 0.013 83.3 12
X3.12 0.909 0.000 95.6 45
X3.13 0.646 0.000 95.6 45
X3.14 0.877 0.000 97.8 45
X3.15 0.773 0.000 88.9 45
X3.16 -0.031 0.827 93.3 45
X3.17 0.897 0.000 97.8 45
X3.18 0.607 0.000 88.6 44
X3.19 0.500 0.000 86.7 45
X3.20 0.137 0.354 84.4 45
X3.21 0.672 0.000 88.9 45
X3.22 0.910 0.000 97.8 45
# Overall summary
cat("Mean Kappa:", round(mean(results$kappa, na.rm = TRUE), 3), "\n")
## Mean Kappa: 0.733
cat("Range:", round(min(results$kappa, na.rm = TRUE), 3), 
    "-", round(max(results$kappa, na.rm = TRUE), 3), "\n")
## Range: -0.031 - 1
cat("% variables with κ ≥ .80:", 
    round(mean(results$kappa >= .80, na.rm = TRUE) * 100, 1), "%\n")
## % variables with κ ≥ .80: 48.5 %