library(readxl)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- read_excel("통합 문서1.xlsx")
timeline <- sort(unique(unlist(mapply(function(start, end) {
if (end - 0.01 > start) seq(start, end - 0.01, by = 0.5) else NULL
}, df$begin, df$'end'))))
label_at_time <- function(df, timeline) {
sapply(timeline, function(t) {
row <- df[df$begin <= t & df$end > t, ]
if (nrow(row) > 0) return(row$label[1]) else return(NA)
})
}
df <- df %>%
mutate(Coder = case_when(
grepl("김지원", filename) ~ "김지원",
grepl("한영은", filename) ~ "한영은",
TRUE ~ NA_character_
)) %>%
filter(!is.na(Coder)) # 두 사람만 필터
label_at_time <- function(df, timeline, coder_name) {
coder_df <- df %>% filter(Coder == coder_name)
sapply(timeline, function(t) {
row <- coder_df %>% filter(begin <= t & end > t)
if (nrow(row) > 0) row$label[1] else NA
})
}
김지원_labels <- label_at_time(df, timeline, "김지원")
한영은_labels <- label_at_time(df, timeline, "한영은")
valid_idx <- which(!is.na(김지원_labels) & !is.na(한영은_labels))
# 라벨 페어 테이블
label_df <- data.frame(
김지원 = 김지원_labels[valid_idx],
한영은 = 한영은_labels[valid_idx]
)
# Cohen's Kappa 계산
library(irr)
## Warning: 패키지 'irr'는 R 버전 4.4.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: lpSolve
## Warning: 패키지 'lpSolve'는 R 버전 4.4.2에서 작성되었습니다
library(psych)
## Warning: 패키지 'psych'는 R 버전 4.4.3에서 작성되었습니다
result <- cohen.kappa(label_df)
print(result$kappa)
## [1] 0.65528
df <- read_excel("통합 문서2.xlsx")
timeline <- sort(unique(unlist(mapply(function(start, end) {
if (end - 0.01 > start) seq(start, end - 0.01, by = 0.5) else NULL
}, df$begin, df$'end'))))
label_at_time <- function(df, timeline) {
sapply(timeline, function(t) {
row <- df[df$begin <= t & df$end > t, ]
if (nrow(row) > 0) return(row$label[1]) else return(NA)
})
}
df <- df %>%
mutate(Coder = case_when(
grepl("박여은", filename) ~ "박여은",
grepl("한영은", filename) ~ "한영은",
TRUE ~ NA_character_
)) %>%
filter(!is.na(Coder)) # 두 사람만 필터
label_at_time <- function(df, timeline, coder_name) {
coder_df <- df %>% filter(Coder == coder_name)
sapply(timeline, function(t) {
row <- coder_df %>% filter(begin <= t & end > t)
if (nrow(row) > 0) row$label[1] else NA
})
}
박여은_labels <- label_at_time(df, timeline, "박여은")
한영은_labels <- label_at_time(df, timeline, "한영은")
valid_idx <- which(!is.na(박여은_labels) & !is.na(한영은_labels))
# 라벨 페어 테이블
label_df <- data.frame(
박여은 = 박여은_labels[valid_idx],
한영은 = 한영은_labels[valid_idx]
)
# Cohen's Kappa 계산
library(irr)
library(psych)
result <- cohen.kappa(label_df)
print(result$kappa)
## [1] 0.6596518
##윤현정, 한영은
df <- read_excel("통합 문서3.xlsx")
timeline <- sort(unique(unlist(mapply(function(start, end) {
if (end - 0.01 > start) seq(start, end - 0.01, by = 0.5) else NULL
}, df$begin, df$'end'))))
label_at_time <- function(df, timeline) {
sapply(timeline, function(t) {
row <- df[df$begin <= t & df$end > t, ]
if (nrow(row) > 0) return(row$label[1]) else return(NA)
})
}
df <- df %>%
mutate(Coder = case_when(
grepl("윤현정", filename) ~ "윤현정",
grepl("한영은", filename) ~ "한영은",
TRUE ~ NA_character_
)) %>%
filter(!is.na(Coder)) # 두 사람만 필터
label_at_time <- function(df, timeline, coder_name) {
coder_df <- df %>% filter(Coder == coder_name)
sapply(timeline, function(t) {
row <- coder_df %>% filter(begin <= t & end > t)
if (nrow(row) > 0) row$label[1] else NA
})
}
윤현정_labels <- label_at_time(df, timeline, "윤현정")
한영은_labels <- label_at_time(df, timeline, "한영은")
valid_idx <- which(!is.na(윤현정_labels) & !is.na(한영은_labels))
# 라벨 페어 테이블
label_df <- data.frame(
윤현정 = 윤현정_labels[valid_idx],
한영은 = 한영은_labels[valid_idx]
)
# Cohen's Kappa 계산
library(irr)
library(psych)
result <- cohen.kappa(label_df)
print(result$kappa)
## [1] 0.6718662