# based on CHJ Hartgerink's script
options(stringsAsFactors = TRUE)
library(httr)
library(dplyr)
library(ggplot2)
library(ggbeeswarm)
# Read in Tilburg data
info <- GET('https://osf.io/fgjvw/?action=download', write_disk('rpp_data.csv', overwrite = TRUE)) #downloads data file from the OSF
MASTER <- read.csv("rpp_data.csv")[1:167, ]
colnames(MASTER)[1] <- "ID" # Change first column name to ID to be able to load .csv fileif (file.exists("osfdata_with_dois.rdata")) {
load("osfdata_with_dois.rdata")
} else {
library(rcrossref)
MASTER$DOI = NA_character_
for (i in 1:nrow(MASTER)) {
tryCatch({
MASTER$DOI[i] = rcrossref::cr_works(flq = c(query.title = MASTER$Study.Title..O.[i], query.author = MASTER$Authors..O.[i]), filter = c(from_pub_date = 2007, until_pub_date = 2009), sort = "relevance", limit = 1)$data$DOI}, error = function(e) warning(e))
}
MASTER$citation_count_2018 = NA_real_
for (i in 1:nrow(MASTER)) {
tryCatch({
MASTER$citation_count_2018[i] = rcrossref::cr_citation_count(MASTER$DOI[i])
}, error = function(e) warning(e))
}
}```
No, not for the citation count recorded in the RPP.
##
## Call:
## glm(formula = Citation.count..paper..O. ~ T_pval_USE..R. < 0.05,
## family = quasipoisson(), data = .)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.43 -6.24 -3.23 4.00 20.52
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4709 0.1035 43.19 <2e-16 ***
## T_pval_USE..R. < 0.05TRUE -0.0853 0.1790 -0.48 0.63
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 60)
##
## Null deviance: 5247.2 on 98 degrees of freedom
## Residual deviance: 5233.5 on 97 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
The citation count in the RPP probably includes more sources, but these aren’t systematically different.
##
## Pearson's product-moment correlation
##
## data: MASTER$citation_count_2018 and MASTER$Citation.count..paper..O.
## t = 50, df = 200, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.96 0.98
## sample estimates:
## cor
## 0.97
##
## Call:
## lm(formula = MASTER$citation_count_2018 ~ MASTER$Citation.count..paper..O.)
##
## Coefficients:
## (Intercept) MASTER$Citation.count..paper..O.
## -6.921 0.824
## [1] 67
## [1] 90
Again, no association.
##
## Call:
## glm(formula = citation_count_2018 ~ T_pval_USE..R. < 0.05, family = quasipoisson(),
## data = .)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.29 -6.37 -3.79 3.65 18.04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1545 0.1177 35.30 <2e-16 ***
## T_pval_USE..R. < 0.05TRUE -0.0687 0.2024 -0.34 0.73
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 56)
##
## Null deviance: 4897.0 on 98 degrees of freedom
## Residual deviance: 4890.4 on 97 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
This is pretty dirty now, because I’m subtracting citation counts from one source with another, so most papers are cited less in 2018 than in 2015. But haven’t found a quick way to get citation counts in 2015 from rcrossref.
Again, no association. So, assuming the dirtiness of the analysis doesn’t matter (strong rank order correspondence in the citation counts), the literature hasn’t reacted at all to the presumably important bit of information that a study doesn’t replicate.
##
## Call:
## glm(formula = 90 + citation_count_2018 - Citation.count..paper..O. *
## 0.8 ~ T_pval_USE..R. < 0.05, family = "quasipoisson", data = .)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.954 -0.840 0.199 0.705 5.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4281 0.0249 177.91 <2e-16 ***
## T_pval_USE..R. < 0.05TRUE 0.0176 0.0416 0.42 0.67
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 3.3)
##
## Null deviance: 385.73 on 98 degrees of freedom
## Residual deviance: 385.13 on 97 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
slightly different way of looking at it