#install.packages("devtools")
#devtools::install_github("jmgirard/agreement")
library(agreement)
set.seed(2021)Simulate some data of the proposed type (i.e., four raters assigning 100 objects to 4 categories \(\{\text{NA}, 0, 1, 2\}\)). We will simulate it in wide format first where each object is a row and each rater is a column.
categories <- c("NA", "0", "1", "2")
wide_data <-
data.frame(
r1 = sample(categories, size = 100, replace = TRUE),
r2 = sample(categories, size = 100, replace = TRUE),
r3 = sample(categories, size = 100, replace = TRUE),
r4 = sample(categories, size = 100, replace = TRUE)
)
head(wide_data)
#> r1 r2 r3 r4
#> 1 1 0 0 1
#> 2 0 2 NA NA
#> 3 0 0 NA 1
#> 4 0 0 NA NA
#> 5 1 2 0 2
#> 6 2 1 NA 2We can now reshape the data to long format in order to give the {agreement} package what it expects.
long_data <- agreement::wide_to_long(wide_data)
long_data
#> # A tibble: 400 x 3
#> Object Rater Score
#> <int> <chr> <chr>
#> 1 1 r1 1
#> 2 1 r2 0
#> 3 1 r3 0
#> 4 1 r4 1
#> 5 2 r1 0
#> 6 2 r2 2
#> 7 2 r3 NA
#> 8 2 r4 NA
#> 9 3 r1 0
#> 10 3 r2 0
#> # ... with 390 more rowsWe can now create our custom weight matrix with identity weights for the NA category and linear weights for the 0, 1, and 2 categories.
w <- matrix(
data = c(1.0, 0.0, 0.0, 0.0,
0.0, 1.0, 0.5, 0.0,
0.0, 0.5, 1.0, 0.5,
0.0, 0.0, 0.5, 1.0),
nrow = 4,
ncol = 4
)
dimnames(w) <- list(categories, categories)
w
#> NA 0 1 2
#> NA 1 0.0 0.0 0.0
#> 0 0 1.0 0.5 0.0
#> 1 0 0.5 1.0 0.5
#> 2 0 0.0 0.5 1.0Now we can calculate all chance-adjusted indexes of categorical agreement using these custom weights. For now, we can turn off bootstrapping by setting bootstrap = 0.
agreement::cat_adjusted(
long_data,
categories = categories,
weighting = "custom",
custom_weights = w,
bootstrap = 0
)
#>
#> Call:
#> agreement::cat_adjusted(.data = long_data, categories = categories,
#> weighting = "custom", bootstrap = 0, custom_weights = w)
#>
#> Chance-Adjusted Categorical Agreement
#>
#> Observed Expected Adjusted
#> custom alpha 0.367 0.374 -0.012
#> custom gamma 0.365 0.375 -0.016
#> custom irsq 0.365 0.374 -0.014
#> custom kappa 0.365 0.371 -0.010
#> custom pi 0.365 0.374 -0.014
#> custom s 0.365 0.375 -0.016Here, we see that observed agreement was around 0.37 and the amount of expected chance agreement was around 0.37-0.38. Given that observed agreement was lower than expected chance agreement, chance-adjusted agreement was very low for all metrics (e.g., around -0.01). This makes sense that agreement was poor because the data was randomly generated above.
Compare this with using identity weights (treating the four categories as nominal):
agreement::calc_weights("identity", categories)
#> NA 0 1 2
#> NA 1 0 0 0
#> 0 0 1 0 0
#> 1 0 0 1 0
#> 2 0 0 0 1agreement::cat_adjusted(
long_data,
categories = categories,
weighting = "identity",
bootstrap = 0
)
#>
#> Call:
#> agreement::cat_adjusted(.data = long_data, categories = categories,
#> weighting = "identity", bootstrap = 0)
#>
#> Chance-Adjusted Categorical Agreement
#>
#> Observed Expected Adjusted
#> identity alpha 0.249 0.250 -0.003
#> identity gamma 0.247 0.250 -0.004
#> identity irsq 0.247 0.250 -0.005
#> identity kappa 0.247 0.248 -0.001
#> identity pi 0.247 0.250 -0.005
#> identity s 0.247 0.250 -0.004The observed and expected agreement are both lower because no partial agreement is being awarded for getting close on the numerical categories. However, their proportions are similar so the adjusted indexes are similar.
sessionInfo()
#> R version 4.0.5 (2021-03-31)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 19042)
#>
#> Matrix products: default
#>
#> locale:
#> [1] LC_COLLATE=English_United States.1252
#> [2] LC_CTYPE=English_United States.1252
#> [3] LC_MONETARY=English_United States.1252
#> [4] LC_NUMERIC=C
#> [5] LC_TIME=English_United States.1252
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] agreement_0.0.0.9003
#>
#> loaded via a namespace (and not attached):
#> [1] rstudioapi_0.13 knitr_1.33 magrittr_2.0.1 tidyselect_1.1.1
#> [5] R6_2.5.0 rlang_0.4.11 fansi_0.4.2 dplyr_1.0.6
#> [9] stringr_1.4.0 tools_4.0.5 xfun_0.22 utf8_1.2.1
#> [13] cli_2.5.0 DBI_1.1.1 jquerylib_0.1.4 htmltools_0.5.1.1
#> [17] ellipsis_0.3.2 yaml_2.2.1 digest_0.6.27 assertthat_0.2.1
#> [21] tibble_3.1.1 lifecycle_1.0.0 crayon_1.4.1 purrr_0.3.4
#> [25] tidyr_1.1.3 sass_0.3.1 vctrs_0.3.8 glue_1.4.2
#> [29] evaluate_0.14 rmarkdown_2.8 stringi_1.6.1 compiler_4.0.5
#> [33] bslib_0.2.4 pillar_1.6.0 generics_0.1.0 boot_1.3-27
#> [37] jsonlite_1.7.2 pkgconfig_2.0.3