library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: weights
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: assertthat
##
## Attaching package: 'assertthat'
## The following object is masked from 'package:tibble':
##
## has_name
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## Loading required package: psych
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## Loading required package: metafor
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading the 'metafor' package (version 3.0-2). For an
## introduction to the package please type: help(metafor)
## Loading required package: rlang
##
## Attaching package: 'rlang'
## The following object is masked from 'package:magrittr':
##
## set_names
## The following object is masked from 'package:assertthat':
##
## has_name
## The following objects are masked from 'package:purrr':
##
## %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
## flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
## splice
##
## Attaching package: 'kirkegaard'
## The following object is masked from 'package:rlang':
##
## is_logical
## The following object is masked from 'package:psych':
##
## rescale
## The following object is masked from 'package:assertthat':
##
## are_equal
## The following objects are masked from 'package:purrr':
##
## is_logical, is_numeric
## The following object is masked from 'package:base':
##
## +
load_packages(
readxl,
rvest,
patchwork,
correlation,
rms
)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## Attaching package: 'rms'
## The following object is masked from 'package:metafor':
##
## vif
theme_set(theme_bw())
We need to merge across years of data, and unfortunately, the format is inconsistent. SAD.
d1 = read_excel("data/data.xlsx", sheet = 1)
d2 = read_excel("data/data.xlsx", sheet = 2)
d3 = read_excel("data/data.xlsx", sheet = 3)
d4 = read_excel("data/data.xlsx", sheet = 4)
#merge into long format
d = bind_rows(
d1 %>% select(Journal, ERR, year),
d2 %>% select(-Rank) %>% pivot_longer(cols = -Journal, names_to = "year", values_to = "ERR") %>% mutate(year = as.numeric(year), ERR = as.numeric(ERR)),
d3 %>% select(Journal, `2019`) %>% set_colnames(c("Journal", "ERR")) %>% mutate(year = 2019),
d4 %>% select(Journal, Observed2020) %>% set_colnames(c("Journal", "ERR")) %>% mutate(year = 2020, ERR = str_extract(ERR, "\\d+") %>% as.numeric())
)
#fix some duplicates
d$Journal %>% unique() %>% length()
## [1] 128
d$Journal %<>% str_replace("&", "and")
d$Journal %<>% mapvalues(from = c("Cognitive Behavioral Therapy", "Aggressive Behaviours"), to = c("Cognitive Behaviour Therapy", "Aggressive Behavior"))
d$Journal %>% unique() %>% length()
## [1] 125
#impact factor data
if (F) {
#scrape impact factors from scimajor
d5 = read_excel("data/data.xlsx", sheet = 5) %>%
filter(!duplicated(Journal), !is.na(Journal))
d5_scraped = d5$url %>%
na.omit() %>%
unique() %>%
map_df(function(x) {
#if NA, return nothing
if (is.na(x)) return(tibble())
# browser()
#if not, download page, get the sjr value for 2020
x_source = read_html(x)
#find the data for sjr
#split by newline
x_source %>%
html_text() %>%
str_split(pattern = "\\r\\n", simplify = T) ->
x_lines
#find the sjr values
x_lines[str_detect(x_lines, "var datasjr = ")] %>%
str_match("2020;([\\d\\.]+)") %>%
.[, 2] ->
sjr
#cite per document
x_lines[str_detect(x_lines, "var datacitesperdoc = ")] %>%
str_match("2020;([\\d\\.]+)") %>%
.[, 2] ->
cpd
tibble(
url = x,
SJR = sjr,
CPD = cpd
)
})
#merge bach using URL as key
left_join(d5,
d5_scraped %>% filter(!duplicated(url)),
by = c("url" = "url")
) ->
d5
#numerical
d5$SJR %<>% as.numeric()
d5$CPD %<>% as.numeric()
d5 %>% write_rds("data/impact_factors.rds")
}
d5 = read_rds("data/impact_factors.rds")
#other study
d6 = read_csv("data/data_final.csv")
## Rows: 540 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): JRNL, SUBJ
## dbl (6): YEAR, EST, NPAPR, NPVAL, JIF, OA
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#plot over time
d %>%
ggplot(aes(year, ERR, group = Journal)) +
geom_line() +
geom_smooth(aes(group = NULL)) +
ggtitle("Replicability index of journals 2010-2021")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#ICC, is this a stable metric at all?
d %>%
pivot_wider(names_from = "Journal", values_from = "ERR") %>%
select(-year) %>%
t() %>%
psych::ICC()
## Call: psych::ICC(x = .)
##
## Intraclass correlation coefficients
## type ICC F df1 df2 p lower bound upper bound
## Single_raters_absolute ICC1 0.27 5.3 124 1375 3.4e-57 0.22 0.32
## Single_random_raters ICC2 0.27 5.8 124 1364 7.0e-64 0.22 0.33
## Single_fixed_raters ICC3 0.29 5.8 124 1364 7.0e-64 0.24 0.34
## Average_raters_absolute ICC1k 0.81 5.3 124 1375 3.4e-57 0.77 0.85
## Average_random_raters ICC2k 0.82 5.8 124 1364 7.0e-64 0.77 0.85
## Average_fixed_raters ICC3k 0.83 5.8 124 1364 7.0e-64 0.79 0.86
##
## Number of subjects = 125 Number of Judges = 12
## See the help file for a discussion of the other 4 McGraw and Wong estimates,
#not so bad
#compare other study
d6 %>%
select(YEAR, JRNL, EST) %>%
pivot_wider(names_from = "JRNL", values_from = "EST") %>%
select(-YEAR) %>%
t() %>%
psych::ICC()
## Call: psych::ICC(x = .)
##
## Intraclass correlation coefficients
## type ICC F df1 df2 p lower bound upper bound
## Single_raters_absolute ICC1 0.20 2.2 107 432 6.8e-09 0.13 0.28
## Single_random_raters ICC2 0.20 2.2 107 428 5.4e-09 0.13 0.28
## Single_fixed_raters ICC3 0.20 2.2 107 428 5.4e-09 0.13 0.28
## Average_raters_absolute ICC1k 0.55 2.2 107 432 6.8e-09 0.43 0.66
## Average_random_raters ICC2k 0.55 2.2 107 428 5.4e-09 0.43 0.66
## Average_fixed_raters ICC3k 0.55 2.2 107 428 5.4e-09 0.43 0.66
##
## Number of subjects = 108 Number of Judges = 5
## See the help file for a discussion of the other 4 McGraw and Wong estimates,
#compute the means by journal
journals = describeBy(d$ERR, d$Journal, mat = T) %>% as_tibble()
#plot the ranking
journals %>%
filter(n >= 3) %>%
mutate(
journal = fct_reorder(group1, mean)
) %>%
ggplot(aes(mean, journal)) +
geom_bar(stat = "identity") +
geom_errorbarh(aes(xmin = mean - 2 * se, xmax = mean + 2*se))
#compare with the impact factor from scimajr
left_join(
journals,
d5,
by = c("group1" = "Journal")
) ->
journals
#cors
journals %>%
select(mean, SJR, CPD) %>%
correlation::correlation(p_adjust = "none")
#replicability by journal replicability
GG_scatter(journals, "SJR", "mean", case_names = "group1") +
scale_x_continuous("Schimago journal citation index", limits = c(-1, 10)) +
scale_y_continuous("Replicability index (Schimmack)")
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/SJR.png")
## `geom_smooth()` using formula 'y ~ x'
GG_scatter(journals, "CPD", "mean", case_names = "group1") +
scale_x_continuous("Citations per document (Schimago)", limits = c(-1, 12)) +
scale_y_continuous("Replicability index (Schimmack)")
## `geom_smooth()` using formula 'y ~ x'
GG_save("figs/CPD.png")
## `geom_smooth()` using formula 'y ~ x'
#without journal of consumer research
journals %>%
filter(group1 != "Journal of Consumer Research") %>%
select(mean, SJR, CPD) %>%
correlation::correlation(p_adjust = "none")
#ranks
journals %>%
select(mean, SJR, CPD) %>%
correlation::correlation(method = "spearman", p_adjust = "none")
#robust
journals %>%
select(mean, SJR, CPD) %>%
correlation::correlation(method = "spearman", p_adjust = "none")
write_sessioninfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rms_6.2-0 SparseM_1.81 correlation_0.7.1
## [4] patchwork_1.1.1 rvest_1.0.2 readxl_1.3.1
## [7] kirkegaard_2021-12-14 rlang_0.4.12 metafor_3.0-2
## [10] Matrix_1.4-0 psych_2.1.9 magrittr_2.0.1
## [13] assertthat_0.2.1 weights_1.0.4 Hmisc_4.6-0
## [16] Formula_1.2-4 survival_3.2-13 lattice_0.20-45
## [19] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
## [22] purrr_0.3.4 readr_2.1.1 tidyr_1.1.4
## [25] tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] backports_1.4.1 plyr_1.8.6 splines_4.1.2
## [4] listenv_0.8.0 TH.data_1.1-0 digest_0.6.29
## [7] foreach_1.5.1 htmltools_0.5.2 gdata_2.18.0
## [10] fansi_0.5.0 checkmate_2.0.0 cluster_2.1.2
## [13] tzdb_0.2.0 recipes_0.1.17 globals_0.14.0
## [16] modelr_0.1.8 gower_0.2.2 matrixStats_0.61.0
## [19] vroom_1.5.7 sandwich_3.0-1 jpeg_0.1-9
## [22] colorspace_2.0-2 haven_2.4.3 xfun_0.29
## [25] crayon_1.4.2 jsonlite_1.7.2 lme4_1.1-27.1
## [28] zoo_1.8-9 iterators_1.0.13 glue_1.6.0
## [31] gtable_0.3.0 ipred_0.9-12 MatrixModels_0.5-0
## [34] future.apply_1.8.1 scales_1.1.1 mvtnorm_1.1-3
## [37] DBI_1.1.2 Rcpp_1.0.7 htmlTable_2.4.0
## [40] tmvnsim_1.0-2 bit_4.0.4 foreign_0.8-81
## [43] stats4_4.1.2 lava_1.6.10 prodlim_2019.11.13
## [46] datawizard_0.2.2 htmlwidgets_1.5.4 httr_1.4.2
## [49] RColorBrewer_1.1-2 ellipsis_0.3.2 mice_3.14.0
## [52] farver_2.1.0 pkgconfig_2.0.3 nnet_7.3-16
## [55] sass_0.4.0 dbplyr_2.1.1 utf8_1.2.2
## [58] caret_6.0-90 labeling_0.4.2 tidyselect_1.1.1
## [61] reshape2_1.4.4 multilevel_2.6 munsell_0.5.0
## [64] cellranger_1.1.0 tools_4.1.2 cli_3.1.0
## [67] generics_0.1.1 broom_0.7.11 mathjaxr_1.4-0
## [70] evaluate_0.14 fastmap_1.1.0 yaml_2.2.1
## [73] bit64_4.0.5 ModelMetrics_1.2.2.2 knitr_1.37
## [76] fs_1.5.2 future_1.23.0 nlme_3.1-152
## [79] quantreg_5.86 xml2_1.3.3 psychometric_2.2
## [82] compiler_4.1.2 rstudioapi_0.13 png_0.1-7
## [85] reprex_2.0.1 bslib_0.3.1 stringi_1.7.6
## [88] highr_0.9 nloptr_1.2.2.3 vctrs_0.3.8
## [91] pillar_1.6.4 lifecycle_1.0.1 jquerylib_0.1.4
## [94] data.table_1.14.2 insight_0.14.5 conquer_1.2.1
## [97] R6_2.5.1 latticeExtra_0.6-29 gridExtra_2.3
## [100] parallelly_1.30.0 codetools_0.2-18 polspline_1.1.19
## [103] boot_1.3-28 MASS_7.3-54 gtools_3.9.2
## [106] withr_2.4.3 mnormt_2.0.2 multcomp_1.4-18
## [109] mgcv_1.8-38 bayestestR_0.11.5 parallel_4.1.2
## [112] hms_1.1.1 grid_4.1.2 rpart_4.1-15
## [115] timeDate_3043.102 class_7.3-19 minqa_1.2.4
## [118] rmarkdown_2.11 pROC_1.18.0 lubridate_1.8.0
## [121] base64enc_0.1-3
#data
journals %>% select(group1, n, mean, sd, se, SJR, CPD, url)