knitr::opts_chunk$set(warning = FALSE)
knitr::opts_knit$set(root.dir = "C:/Users/nasta/Dropbox/____Nordface_POst_doc/FULL_Texts_Scaling/annotations/Annotations_TRILOGUES/Output_file/Round2")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(foreign)
library(ggplot2)
library(stringr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.1.7 ✔ purrr 0.3.4
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(readr)
library(purrr)
library(tidycomm)
## Warning: package 'tidycomm' was built under R version 4.2.1
library(readxl)
require(lpSolve)
## Loading required package: lpSolve
stas_main=read_excel("C:/Users/nasta/Dropbox/____Nordface_POst_doc/FULL_Texts_Scaling/annotations/Annotations_TRILOGUES/Output_file/Round2/Round2_Stas_completed_10082022.xlsx")
table(stas_main$stas_eu_com)
##
## 0 1 888 999
## 66 125 351 215
table(stas_main$stas_eu_cn)
##
## 0 1 888 999
## 98 138 268 253
table(stas_main$stas_com_cn_change)
##
## 1 2 3 4 888 999
## 8 34 138 8 355 214
#load second coded Stas' sample
stas_checkd=read_excel("C:/Users/nasta/Dropbox/____Nordface_POst_doc/FULL_Texts_Scaling/annotations/Annotations_TRILOGUES/Output_file/Round2/R2_Stas_secondcoded_annotations_10082022.xlsx")
table(stas_checkd$sasha_eu_com)
##
## 0 1 888 999
## 3 36 51 60
table(stas_checkd$sasha_eu_cn)
##
## 0 1 888 999
## 7 43 30 70
table(stas_checkd$sasha_com_cn_change)
##
## 1 2 3 4 888 999
## 2 4 30 2 52 60
#load Sasha's main file
sasha_main=read_excel("C:/Users/nasta/Dropbox/____Nordface_POst_doc/FULL_Texts_Scaling/annotations/Annotations_TRILOGUES/Output_file/Round2/Round2_Sasha_completed_10082022.xlsx")
table(sasha_main$sasha_eu_com)
##
## 0 1 888 999
## 49 105 311 272
table(sasha_main$sasha_eu_cn)
##
## 0 1 888 999
## 68 113 216 340
table(sasha_main$sasha_com_cn_change)
##
## 1 2 3 4 5 888 999
## 8 15 116 2 8 313 275
#load second coded Sasha's file
sasha_checkd= read_excel("C:/Users/nasta/Dropbox/____Nordface_POst_doc/FULL_Texts_Scaling/annotations/Annotations_TRILOGUES/Output_file/Round2/R2_secondcode_Sasha_05082022.xlsx")
## New names:
## • `` -> `...19`
table(sasha_checkd$stas_eu_com)
##
## 0 1 888 999
## 10 26 112 40
table(sasha_checkd$stas_eu_cn)
##
## 0 1 888 999
## 13 31 96 48
table(sasha_checkd$stas_com_cn_change)
##
## 1 2 3 4 888 999
## 1 6 28 1 112 40
sasha_checkd$`...19`=NULL
# merge all stats related files
stas_all=left_join(stas_main, stas_checkd)
## Joining, by = c("index", "cod", "proposal_year", "index2", "identifier",
## "CAP_str", "article", "paragraph", "article_names", "COM", "EP", "Council",
## "Comments")
rm(stas_checkd, stas_main)
stas_all=stas_all %>% mutate(sasha_eu_com=ifelse(identifier=='2011/0299(COD)_93', 999, sasha_eu_com), sasha_eu_cn=ifelse(identifier=='2011/0299(COD)_93', 999, sasha_eu_cn), sasha_com_cn_change=ifelse(identifier=='2011/0299(COD)_93', 999, sasha_com_cn_change))
#merge all sasha's files
sasha_all=left_join(sasha_main, sasha_checkd)
## Joining, by = c("index", "cod", "proposal_year", "index2", "identifier",
## "CAP_str", "COM", "EP", "Council", "Comments", "article", "paragraph",
## "article_names", "STRTHR", "full_cell_deleted")
rm(sasha_checkd, sasha_main)
sasha_all$...19=NULL
sasha_all$STRTHR=NULL
sasha_all$full_cell_deleted=NULL
sasha_all$article=as.character(sasha_all$article)
#cross merge
round_2_doubecoded=bind_rows(stas_all, sasha_all)
#select the double coded
double_coded=round_2_doubecoded %>% filter(!is.na(sasha_eu_com)) %>% filter(!is.na(stas_eu_com)) %>%
mutate(stas_eu_com=ifelse(stas_eu_com=="XXX", 888, stas_eu_com ),
sasha_eu_com= ifelse(sasha_eu_com=="xxx", 888, sasha_eu_com ), sasha_eu_com= ifelse(sasha_eu_com=="XXX", 888, sasha_eu_com ),
stas_eu_cn= ifelse(stas_eu_cn=="XXX", 888, stas_eu_cn ),
stas_com_cn_change= ifelse(stas_com_cn_change=="XXX", 888, stas_com_cn_change),
sasha_eu_cn= ifelse( sasha_eu_cn=="xxx", 888, sasha_eu_cn ),
sasha_eu_cn= ifelse( sasha_eu_cn=="x", 888, sasha_eu_cn ), sasha_eu_cn= ifelse( sasha_eu_cn=="XXX", 888, sasha_eu_cn ),
sasha_com_cn_change= ifelse(sasha_com_cn_change=="xxx", 888, sasha_com_cn_change ), sasha_com_cn_change= ifelse(sasha_com_cn_change=="XXX", 888, sasha_com_cn_change ))
double_coded=double_coded%>%
mutate_at(c( 'stas_com_cn_change', 'sasha_com_cn_change', 'stas_eu_com', 'sasha_eu_com', 'stas_eu_cn', 'sasha_eu_cn'), as.numeric)
write.csv(double_coded, file="C:/Users/nasta/Dropbox/____Nordface_POst_doc/FULL_Texts_Scaling/annotations/Annotations_TRILOGUES/Output_file/Round2/Round2_disagreements.csv", na="", row.names = FALSE)
double_coded=double_coded %>% mutate(com_disagr=ifelse(stas_eu_com==sasha_eu_com, 0, 1),
cn_disagr=ifelse(stas_eu_cn==sasha_eu_cn, 0, 1),
change_disagree=ifelse(stas_com_cn_change==sasha_com_cn_change, 0, 1 ))
#transform into long format as needed
#com
com_long=double_coded %>%select(-c(stas_eu_cn, stas_com_cn_change, sasha_eu_cn, sasha_com_cn_change)) %>%
pivot_longer(cols= ends_with("_eu_com"), names_to = 'coder_id', values_to = 'eu_com' )
#cn
council_long=double_coded %>%select(-c(stas_com_cn_change, sasha_com_cn_change, stas_eu_com, sasha_eu_com)) %>%
pivot_longer(cols= ends_with("_eu_cn"), names_to = 'coder_id', values_to = 'eu_council' )
#change
change_long=double_coded %>%select(-c(stas_eu_cn, sasha_eu_cn, stas_eu_com, sasha_eu_com)) %>%
pivot_longer(cols= ends_with("_change"), names_to = 'coder_id', values_to = 'change' )
#Commission
com_long %>%
test_icr(identifier, coder_id, eu_com, )
## # A tibble: 1 × 8
## Variable n_Units n_Coders n_Categories Level Agreement Holstis_CR
## <chr> <int> <int> <int> <chr> <dbl> <dbl>
## 1 eu_com 288 2 4 nominal 0.844 0.844
## # … with 1 more variable: Krippendorffs_Alpha <dbl>
#Council
council_long %>%
test_icr(identifier, coder_id, eu_council )
## # A tibble: 1 × 8
## Variable n_Units n_Coders n_Categories Level Agreement Holstis_CR
## <chr> <int> <int> <int> <chr> <dbl> <dbl>
## 1 eu_council 288 2 4 nominal 0.792 0.792
## # … with 1 more variable: Krippendorffs_Alpha <dbl>
#change
change_long%>%test_icr(identifier, coder_id, change , levels = c(change = 'interval'))
## # A tibble: 1 × 8
## Variable n_Units n_Coders n_Categories Level Agreement Holstis_CR
## <chr> <int> <int> <int> <chr> <dbl> <dbl>
## 1 change 288 2 6 interval 0.816 0.816
## # … with 1 more variable: Krippendorffs_Alpha <dbl>
The reconciled version of the double coding and reliabiltiy checks
#load the reconciled version
reconciled=read_excel("Round2_disagreements_reconciled_11082022.xlsx")
## New names:
## • `` -> `...23`
## • `` -> `...25`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...32`
recon=c(reconciled$identifier)
double_c=c(double_coded$identifier)
setdiff(recon, double_c)
## character(0)
rm(double_c)
# remove useless columns
colnames(reconciled)
## [1] "index" "cod" "proposal_year"
## [4] "index2" "identifier" "CAP_str"
## [7] "article" "paragraph" "article_names"
## [10] "COM" "EP" "Council"
## [13] "Comments" "stas_eu_com" "stas_eu_cn"
## [16] "stas_com_cn_change" "sasha_eu_com" "sasha_eu_cn"
## [19] "sasha_com_cn_change" "com_disagr" "cn_disagr"
## [22] "change_disagree" "...23" "Relevant"
## [25] "...25" "new code_ea" "...27"
## [28] "...28" "...29" "...30"
## [31] "...31" "...32"
reconciled=reconciled %>% select(-c(20:32)) %>%
mutate_at('article', as.character)
#exclude the observations that are double coded from the main list
round_2_doubecoded=round_2_doubecoded %>%
filter(!identifier %in% recon )
#replace xxx into 888 in the main file
round_2_doubecoded=round_2_doubecoded %>%
mutate(stas_eu_com=ifelse(stas_eu_com=="XXX", 888, stas_eu_com ),
sasha_eu_com= ifelse(sasha_eu_com=="xxx", 888, sasha_eu_com ), sasha_eu_com= ifelse(sasha_eu_com=="XXX", 888, sasha_eu_com ),
stas_eu_cn= ifelse(stas_eu_cn=="XXX", 888, stas_eu_cn ),
stas_com_cn_change= ifelse(stas_com_cn_change=="XXX", 888, stas_com_cn_change),
sasha_eu_cn= ifelse( sasha_eu_cn=="xxx", 888, sasha_eu_cn ),
sasha_eu_cn= ifelse( sasha_eu_cn=="x", 888, sasha_eu_cn ), sasha_eu_cn= ifelse( sasha_eu_cn=="XXX", 888, sasha_eu_cn ),
sasha_com_cn_change= ifelse(sasha_com_cn_change=="xxx", 888, sasha_com_cn_change ), sasha_com_cn_change= ifelse(sasha_com_cn_change=="XXX", 888, sasha_com_cn_change )) %>%
mutate_at(c( 'stas_com_cn_change', 'sasha_com_cn_change', 'stas_eu_com', 'sasha_eu_com', 'stas_eu_cn', 'sasha_eu_cn'), as.numeric)
#add the recolnciled cases in
round_2_final=bind_rows(round_2_doubecoded, reconciled)
#rename the variables
round_2_final=round_2_final %>%
rename("commission1"="stas_eu_com",
"council1"= "stas_eu_cn" ,
" change1"="stas_com_cn_change" ,
"commission2"="sasha_eu_com" ,
"council2"= "sasha_eu_cn" ,
"change2"="sasha_com_cn_change" )
#file to send to Fabian
write.csv(round_2_final, file='C:/Users/nasta/Dropbox/____Nordface_POst_doc/FULL_Texts_Scaling/annotations/Annotations_TRILOGUES/Output_file/Round2/Round2_annotations_complete_11082022.csv', na='', row.names = FALSE)
rm(change_long, com_long, council_long, recon, double_coded, sasha_all, stas_all)
rm(round_2_doubecoded)
#com
com_long=reconciled %>%select(-c(stas_eu_cn, stas_com_cn_change, sasha_eu_cn, sasha_com_cn_change)) %>%
pivot_longer(cols= ends_with("_eu_com"), names_to = 'coder_id', values_to = 'eu_com' )
#cn
council_long=reconciled %>%select(-c(stas_com_cn_change, sasha_com_cn_change, stas_eu_com, sasha_eu_com)) %>%
pivot_longer(cols= ends_with("_eu_cn"), names_to = 'coder_id', values_to = 'eu_council' )
#change
change_long=reconciled %>%select(-c(stas_eu_cn, sasha_eu_cn, stas_eu_com, sasha_eu_com)) %>%
pivot_longer(cols= ends_with("_change"), names_to = 'coder_id', values_to = 'change' )
com_long %>%
test_icr(identifier, coder_id, eu_com, )
## # A tibble: 1 × 8
## Variable n_Units n_Coders n_Categories Level Agreement Holstis_CR
## <chr> <int> <int> <int> <chr> <dbl> <dbl>
## 1 eu_com 288 2 4 nominal 0.906 0.906
## # … with 1 more variable: Krippendorffs_Alpha <dbl>
council_long %>%
test_icr(identifier, coder_id, eu_council )
## # A tibble: 1 × 8
## Variable n_Units n_Coders n_Categories Level Agreement Holstis_CR
## <chr> <int> <int> <int> <chr> <dbl> <dbl>
## 1 eu_council 288 2 4 nominal 0.878 0.878
## # … with 1 more variable: Krippendorffs_Alpha <dbl>
change_long%>%test_icr(identifier, coder_id, change , levels = c(change = 'interval'))
## # A tibble: 1 × 8
## Variable n_Units n_Coders n_Categories Level Agreement Holstis_CR
## <chr> <int> <int> <int> <chr> <dbl> <dbl>
## 1 change 288 2 6 interval 0.896 0.896
## # … with 1 more variable: Krippendorffs_Alpha <dbl>