Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Loading required package: weights
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: assertthat
## 
## Attaching package: 'assertthat'
## The following object is masked from 'package:tibble':
## 
##     has_name
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## Loading required package: psych
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## Loading required package: metafor
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading the 'metafor' package (version 3.0-2). For an
## introduction to the package please type: help(metafor)
## Loading required package: rlang
## 
## Attaching package: 'rlang'
## The following object is masked from 'package:magrittr':
## 
##     set_names
## The following object is masked from 'package:assertthat':
## 
##     has_name
## The following objects are masked from 'package:purrr':
## 
##     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
##     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
##     splice
## 
## Attaching package: 'kirkegaard'
## The following object is masked from 'package:rlang':
## 
##     is_logical
## The following object is masked from 'package:psych':
## 
##     rescale
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  readxl,
  rvest,
  patchwork,
  correlation,
  rms
)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## Attaching package: 'rms'
## The following object is masked from 'package:metafor':
## 
##     vif
theme_set(theme_bw())

Data

We need to merge across years of data, and unfortunately, the format is inconsistent. SAD.

d1 = read_excel("data/data.xlsx", sheet = 1)
d2 = read_excel("data/data.xlsx", sheet = 2)
d3 = read_excel("data/data.xlsx", sheet = 3)
d4 = read_excel("data/data.xlsx", sheet = 4)

#merge into long format
d = bind_rows(
  d1 %>% select(Journal, ERR, year),
  d2 %>% select(-Rank) %>% pivot_longer(cols = -Journal, names_to = "year", values_to = "ERR") %>% mutate(year = as.numeric(year), ERR = as.numeric(ERR)),
  d3 %>% select(Journal, `2019`) %>% set_colnames(c("Journal", "ERR")) %>% mutate(year = 2019),
  d4 %>% select(Journal, Observed2020) %>% set_colnames(c("Journal", "ERR")) %>% mutate(year = 2020, ERR = str_extract(ERR, "\\d+") %>% as.numeric())
)

#fix some duplicates
d$Journal %>% unique() %>% length()
## [1] 128
d$Journal %<>% str_replace("&", "and")
d$Journal %<>% mapvalues(from = c("Cognitive Behavioral Therapy", "Aggressive Behaviours"), to = c("Cognitive Behaviour Therapy", "Aggressive Behavior"))
d$Journal %>% unique() %>% length()
## [1] 125
#impact factor data
if (F) {
  #scrape impact factors from scimajor
  d5 = read_excel("data/data.xlsx", sheet = 5) %>% 
  filter(!duplicated(Journal), !is.na(Journal))

  d5_scraped = d5$url %>% 
    na.omit() %>% 
    unique() %>% 
    map_df(function(x) {
    #if NA, return nothing
    if (is.na(x)) return(tibble())
    # browser()
    #if not, download page, get the sjr value for 2020
    x_source = read_html(x)
    
    #find the data for sjr
    #split by newline
    x_source %>% 
      html_text() %>% 
      str_split(pattern = "\\r\\n", simplify = T) -> 
      x_lines
    
    #find the sjr values
    x_lines[str_detect(x_lines, "var datasjr = ")] %>% 
      str_match("2020;([\\d\\.]+)") %>% 
      .[, 2] ->
      sjr
    
    #cite per document
    x_lines[str_detect(x_lines, "var datacitesperdoc = ")] %>% 
      str_match("2020;([\\d\\.]+)") %>% 
      .[, 2] ->
      cpd
    
    tibble(
      url = x,
      SJR = sjr,
      CPD = cpd
    )
  })
  
  #merge bach using URL as key
  left_join(d5,
            d5_scraped %>% filter(!duplicated(url)),
            by = c("url" = "url")
            ) -> 
    d5
  
  #numerical
  d5$SJR %<>% as.numeric()
  d5$CPD %<>% as.numeric()
  
  d5 %>% write_rds("data/impact_factors.rds")
}

d5 = read_rds("data/impact_factors.rds")

#other study
d6 = read_csv("data/data_final.csv")
## Rows: 540 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): JRNL, SUBJ
## dbl (6): YEAR, EST, NPAPR, NPVAL, JIF, OA
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Analysis

#plot over time
d %>% 
  ggplot(aes(year, ERR, group = Journal)) +
  geom_line() +
  geom_smooth(aes(group = NULL)) +
  ggtitle("Replicability index of journals 2010-2021")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#ICC, is this a stable metric at all?
d %>% 
  pivot_wider(names_from = "Journal", values_from = "ERR") %>% 
  select(-year) %>% 
  t() %>% 
  psych::ICC()
## Call: psych::ICC(x = .)
## 
## Intraclass correlation coefficients 
##                          type  ICC   F df1  df2       p lower bound upper bound
## Single_raters_absolute   ICC1 0.27 5.3 124 1375 3.4e-57        0.22        0.32
## Single_random_raters     ICC2 0.27 5.8 124 1364 7.0e-64        0.22        0.33
## Single_fixed_raters      ICC3 0.29 5.8 124 1364 7.0e-64        0.24        0.34
## Average_raters_absolute ICC1k 0.81 5.3 124 1375 3.4e-57        0.77        0.85
## Average_random_raters   ICC2k 0.82 5.8 124 1364 7.0e-64        0.77        0.85
## Average_fixed_raters    ICC3k 0.83 5.8 124 1364 7.0e-64        0.79        0.86
## 
##  Number of subjects = 125     Number of Judges =  12
## See the help file for a discussion of the other 4 McGraw and Wong estimates,
#not so bad

#compare other study
d6 %>% 
  select(YEAR, JRNL, EST) %>% 
  pivot_wider(names_from = "JRNL", values_from = "EST") %>% 
  select(-YEAR) %>% 
  t() %>% 
  psych::ICC()
## Call: psych::ICC(x = .)
## 
## Intraclass correlation coefficients 
##                          type  ICC   F df1 df2       p lower bound upper bound
## Single_raters_absolute   ICC1 0.20 2.2 107 432 6.8e-09        0.13        0.28
## Single_random_raters     ICC2 0.20 2.2 107 428 5.4e-09        0.13        0.28
## Single_fixed_raters      ICC3 0.20 2.2 107 428 5.4e-09        0.13        0.28
## Average_raters_absolute ICC1k 0.55 2.2 107 432 6.8e-09        0.43        0.66
## Average_random_raters   ICC2k 0.55 2.2 107 428 5.4e-09        0.43        0.66
## Average_fixed_raters    ICC3k 0.55 2.2 107 428 5.4e-09        0.43        0.66
## 
##  Number of subjects = 108     Number of Judges =  5
## See the help file for a discussion of the other 4 McGraw and Wong estimates,
#compute the means by journal
journals = describeBy(d$ERR, d$Journal, mat = T) %>% as_tibble()

#plot the ranking
journals %>% 
  filter(n >= 3) %>% 
  mutate(
    journal = fct_reorder(group1, mean)
  ) %>% 
  ggplot(aes(mean, journal)) +
  geom_bar(stat = "identity") +
  geom_errorbarh(aes(xmin = mean - 2 * se, xmax = mean + 2*se))

#compare with the impact factor from scimajr
left_join(
  journals,
  d5,
  by = c("group1" = "Journal")
) ->
  journals

#cors
journals %>% 
  select(mean, SJR, CPD) %>% 
  correlation::correlation(p_adjust = "none")
#replicability by journal replicability
GG_scatter(journals, "SJR", "mean", case_names = "group1") +
  scale_x_continuous("Schimago journal citation index", limits = c(-1, 10)) +
  scale_y_continuous("Replicability index (Schimmack)")
## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/SJR.png")
## `geom_smooth()` using formula 'y ~ x'
GG_scatter(journals, "CPD", "mean", case_names = "group1") +
  scale_x_continuous("Citations per document (Schimago)", limits = c(-1, 12)) +
  scale_y_continuous("Replicability index (Schimmack)")
## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/CPD.png")
## `geom_smooth()` using formula 'y ~ x'
#without journal of consumer research
journals %>% 
  filter(group1 != "Journal of Consumer Research") %>% 
  select(mean, SJR, CPD) %>% 
  correlation::correlation(p_adjust = "none")
#ranks
journals %>% 
  select(mean, SJR, CPD) %>% 
  correlation::correlation(method = "spearman", p_adjust = "none")
#robust
journals %>% 
  select(mean, SJR, CPD) %>% 
  correlation::correlation(method = "spearman", p_adjust = "none")

Meta

write_sessioninfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rms_6.2-0             SparseM_1.81          correlation_0.7.1    
##  [4] patchwork_1.1.1       rvest_1.0.2           readxl_1.3.1         
##  [7] kirkegaard_2021-12-14 rlang_0.4.12          metafor_3.0-2        
## [10] Matrix_1.4-0          psych_2.1.9           magrittr_2.0.1       
## [13] assertthat_0.2.1      weights_1.0.4         Hmisc_4.6-0          
## [16] Formula_1.2-4         survival_3.2-13       lattice_0.20-45      
## [19] forcats_0.5.1         stringr_1.4.0         dplyr_1.0.7          
## [22] purrr_0.3.4           readr_2.1.1           tidyr_1.1.4          
## [25] tibble_3.1.6          ggplot2_3.3.5         tidyverse_1.3.1      
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.1      plyr_1.8.6           splines_4.1.2       
##   [4] listenv_0.8.0        TH.data_1.1-0        digest_0.6.29       
##   [7] foreach_1.5.1        htmltools_0.5.2      gdata_2.18.0        
##  [10] fansi_0.5.0          checkmate_2.0.0      cluster_2.1.2       
##  [13] tzdb_0.2.0           recipes_0.1.17       globals_0.14.0      
##  [16] modelr_0.1.8         gower_0.2.2          matrixStats_0.61.0  
##  [19] vroom_1.5.7          sandwich_3.0-1       jpeg_0.1-9          
##  [22] colorspace_2.0-2     haven_2.4.3          xfun_0.29           
##  [25] crayon_1.4.2         jsonlite_1.7.2       lme4_1.1-27.1       
##  [28] zoo_1.8-9            iterators_1.0.13     glue_1.6.0          
##  [31] gtable_0.3.0         ipred_0.9-12         MatrixModels_0.5-0  
##  [34] future.apply_1.8.1   scales_1.1.1         mvtnorm_1.1-3       
##  [37] DBI_1.1.2            Rcpp_1.0.7           htmlTable_2.4.0     
##  [40] tmvnsim_1.0-2        bit_4.0.4            foreign_0.8-81      
##  [43] stats4_4.1.2         lava_1.6.10          prodlim_2019.11.13  
##  [46] datawizard_0.2.2     htmlwidgets_1.5.4    httr_1.4.2          
##  [49] RColorBrewer_1.1-2   ellipsis_0.3.2       mice_3.14.0         
##  [52] farver_2.1.0         pkgconfig_2.0.3      nnet_7.3-16         
##  [55] sass_0.4.0           dbplyr_2.1.1         utf8_1.2.2          
##  [58] caret_6.0-90         labeling_0.4.2       tidyselect_1.1.1    
##  [61] reshape2_1.4.4       multilevel_2.6       munsell_0.5.0       
##  [64] cellranger_1.1.0     tools_4.1.2          cli_3.1.0           
##  [67] generics_0.1.1       broom_0.7.11         mathjaxr_1.4-0      
##  [70] evaluate_0.14        fastmap_1.1.0        yaml_2.2.1          
##  [73] bit64_4.0.5          ModelMetrics_1.2.2.2 knitr_1.37          
##  [76] fs_1.5.2             future_1.23.0        nlme_3.1-152        
##  [79] quantreg_5.86        xml2_1.3.3           psychometric_2.2    
##  [82] compiler_4.1.2       rstudioapi_0.13      png_0.1-7           
##  [85] reprex_2.0.1         bslib_0.3.1          stringi_1.7.6       
##  [88] highr_0.9            nloptr_1.2.2.3       vctrs_0.3.8         
##  [91] pillar_1.6.4         lifecycle_1.0.1      jquerylib_0.1.4     
##  [94] data.table_1.14.2    insight_0.14.5       conquer_1.2.1       
##  [97] R6_2.5.1             latticeExtra_0.6-29  gridExtra_2.3       
## [100] parallelly_1.30.0    codetools_0.2-18     polspline_1.1.19    
## [103] boot_1.3-28          MASS_7.3-54          gtools_3.9.2        
## [106] withr_2.4.3          mnormt_2.0.2         multcomp_1.4-18     
## [109] mgcv_1.8-38          bayestestR_0.11.5    parallel_4.1.2      
## [112] hms_1.1.1            grid_4.1.2           rpart_4.1-15        
## [115] timeDate_3043.102    class_7.3-19         minqa_1.2.4         
## [118] rmarkdown_2.11       pROC_1.18.0          lubridate_1.8.0     
## [121] base64enc_0.1-3
#data
journals %>% select(group1, n, mean, sd, se, SJR, CPD, url)