require(rio)
require(summarytools)
require(likert)
require(xtable)
require(knitr)
require(grid)
require(gridExtra)
require(sjPlot)
require(multcomp)
require(tidyverse)
require(viridis)
require(hrbrthemes)
require(forcats)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19041)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] hrbrthemes_0.8.0 viridis_0.5.1 viridisLite_0.3.0 forcats_0.5.1
## [5] stringr_1.4.0 dplyr_1.0.4 purrr_0.3.4 readr_1.4.0
## [9] tidyr_1.1.2 tibble_3.0.6 tidyverse_1.3.0 multcomp_1.4-16
## [13] TH.data_1.0-10 MASS_7.3-53 survival_3.2-7 mvtnorm_1.1-1
## [17] sjPlot_2.8.7 gridExtra_2.3 knitr_1.31 likert_1.3.5
## [21] xtable_1.8-4 ggplot2_3.3.3 summarytools_0.9.8 rio_0.5.16
##
## loaded via a namespace (and not attached):
## [1] minqa_1.2.4 colorspace_2.0-0 pryr_0.1.4 ellipsis_0.3.1
## [5] sjlabelled_1.1.7 estimability_1.3 parameters_0.12.0 base64enc_0.1-3
## [9] fs_1.5.0 rstudioapi_0.13 lubridate_1.7.9.2 xml2_1.3.2
## [13] codetools_0.2-18 splines_4.0.4 extrafont_0.17 mnormt_2.0.2
## [17] sjmisc_2.8.6 jsonlite_1.7.2 nloptr_1.2.2.2 ggeffects_1.0.1
## [21] Rttf2pt1_1.3.8 broom_0.7.5 dbplyr_2.1.0 effectsize_0.4.3
## [25] compiler_4.0.4 httr_1.4.2 sjstats_0.18.1 emmeans_1.5.4
## [29] backports_1.2.1 assertthat_0.2.1 Matrix_1.3-2 cli_2.3.0
## [33] htmltools_0.5.1.1 tools_4.0.4 coda_0.19-4 gtable_0.3.0
## [37] glue_1.4.2 reshape2_1.4.4 Rcpp_1.0.6 cellranger_1.1.0
## [41] jquerylib_0.1.3 vctrs_0.3.6 nlme_3.1-152 extrafontdb_1.0
## [45] psych_2.0.12 insight_0.13.0 xfun_0.21 openxlsx_4.2.3
## [49] lme4_1.1-26 rvest_0.3.6 lifecycle_1.0.0 statmod_1.4.35
## [53] zoo_1.8-8 scales_1.1.1 hms_1.0.0 parallel_4.0.4
## [57] sandwich_3.0-0 yaml_2.2.1 curl_4.3 pander_0.6.3
## [61] gdtools_0.2.3 sass_0.3.1 stringi_1.5.3 bayestestR_0.8.2
## [65] checkmate_2.0.0 boot_1.3-27 zip_2.1.1 systemfonts_1.0.1
## [69] rlang_0.4.10 pkgconfig_2.0.3 matrixStats_0.58.0 evaluate_0.14
## [73] lattice_0.20-41 rapportools_1.0 tidyselect_1.1.0 plyr_1.8.6
## [77] magrittr_2.0.1 R6_2.5.0 magick_2.6.0 generics_0.1.0
## [81] DBI_1.1.1 pillar_1.4.7 haven_2.3.1 foreign_0.8-81
## [85] withr_2.4.1 performance_0.7.0 modelr_0.1.8 crayon_1.4.1
## [89] tmvnsim_1.0-2 rmarkdown_2.7 readxl_1.3.1 data.table_1.14.0
## [93] reprex_1.0.0 digest_0.6.27 munsell_0.5.0 bslib_0.2.4
## [97] tcltk_4.0.4
data <- rio::import(file = "../Data/cleaned.csv")
dict <- rio::import(file = "../Data/Dictionary.xlsx")
data$id <- as.character(data$id)
#data type demographics
data <- data %>%
mutate(age = factor(age, levels = c(1:7),
labels = c("18-22", "23-29", "30-39", "40-49", "50-59", "60-69", "70+")),
continent = factor(continent, levels = c(1:6),
labels = c("Africa", "Asia", "Autralia/Oceania", "Europe", "North America", "South America")),
group = factor(group, levels = c(1,2), labels = c("Flat rate", "Raffle")),
gender_f = factor(gender_f, levels = c(1,2), labels = c("Female", "Male")),
race_f = factor(race_f, levels = c(1:6),
labels = c("Asian", "Black", "Hispanic", "Native American", "Non-Hispanic White", "Mixed")),
career = factor(career, levels = c(1:7),
labels = c("Undergraduate student", "Graduate student", "Post-doctoral fellow",
"Assistant professor/lecturer", "Associate/full professor", "Research scientist", "Other")))
#data type likert
likert1 <- dict %>%
filter(value_label == "1 = It should never be used, 2 = It should only be used rarely, 3 = It should be used often, 4 = It should be used almost always") %>%
pull(variable)
factor1 <- function(x){
factor(x, levels = c(1:4),
labels = c("It should never be used", "It should only be used rarely",
"It should be used often", "It should be used almost always"))
}
likert2 <- dict %>%
filter(value_label == "1 = Never, 2 = Once or Twice, 3 = Often") %>%
pull(variable)
factor2 <- function(x){
factor(x, levels = c(1:3),
labels = c("Never", "Once or Twice", "Often"))
}
likert3 <- dict %>%
filter(value_label == "1 = Never, 2 = Once, 3 = Occasionally, 4 = Frequently, 5 = Almost always") %>%
pull(variable)
factor3 <- function(x){
factor(x, levels = c(1:5),
labels = c("Never", "Once", "Occasionally", "Frequently", "Almost always"))
}
likert4 <- dict %>%
filter(value_label == "0 = No, 1 = Yes, 2 = I have not published an empirical paper") %>%
pull(variable)
factor4 <- function(x){
factor(x, levels = c(0:1),
labels = c("No", "Yes"))
}
likert5 <- dict %>%
filter(value_label == "0 = No, 1 = Yes") %>%
pull(variable)
factor5 <- function(x){
factor(x, levels = c(0:1),
labels = c("No", "Yes"))
}
data <- data %>%
mutate_at(likert1, factor1) %>%
mutate_at(likert2, factor2) %>%
mutate_at(likert3, factor3) %>%
mutate_at(likert4, factor4) %>%
mutate_at(likert5, factor5)
rm(likert1, likert2, likert3, likert4, likert5,
factor1, factor2, factor3, factor4, factor5)
#list of ID with complete responses
full <- data %>%
filter(Finished == 1) %>%
pull(id)
#list of ID with correct attention checks
att <- data %>%
filter(attcheck == 1 & experience_19 == 1) %>%
pull(id)
#list of ID with complete AND correct attention checks
fullatt <- data %>%
filter(id %in% att & id %in% full) %>%
pull(id)
There are 352 participants in total, 265 of whom have completed responses and 159 with completed responses who also passed the two attention checks.
freq(data$age)
Frequencies
data$age
Type: Factor
| Freq | % Valid | % Valid Cum. | % Total | % Total Cum. | |
|---|---|---|---|---|---|
| 18-22 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
| 23-29 | 24 | 9.16 | 9.16 | 6.82 | 6.82 |
| 30-39 | 107 | 40.84 | 50.00 | 30.40 | 37.22 |
| 40-49 | 70 | 26.72 | 76.72 | 19.89 | 57.10 |
| 50-59 | 37 | 14.12 | 90.84 | 10.51 | 67.61 |
| 60-69 | 19 | 7.25 | 98.09 | 5.40 | 73.01 |
| 70+ | 5 | 1.91 | 100.00 | 1.42 | 74.43 |
| 90 | 25.57 | 100.00 | |||
| Total | 352 | 100.00 | 100.00 | 100.00 | 100.00 |
freq(data$gender_f)
Frequencies
data$gender_f
Type: Factor
| Freq | % Valid | % Valid Cum. | % Total | % Total Cum. | |
|---|---|---|---|---|---|
| Female | 173 | 68.65 | 68.65 | 49.15 | 49.15 |
| Male | 79 | 31.35 | 100.00 | 22.44 | 71.59 |
| 100 | 28.41 | 100.00 | |||
| Total | 352 | 100.00 | 100.00 | 100.00 | 100.00 |
freq(data$race_f)
Frequencies
data$race_f
Type: Factor
| Freq | % Valid | % Valid Cum. | % Total | % Total Cum. | |
|---|---|---|---|---|---|
| Asian | 48 | 19.28 | 19.28 | 13.64 | 13.64 |
| Black | 34 | 13.65 | 32.93 | 9.66 | 23.30 |
| Hispanic | 37 | 14.86 | 47.79 | 10.51 | 33.81 |
| Native American | 2 | 0.80 | 48.59 | 0.57 | 34.38 |
| Non-Hispanic White | 110 | 44.18 | 92.77 | 31.25 | 65.62 |
| Mixed | 18 | 7.23 | 100.00 | 5.11 | 70.74 |
| 103 | 29.26 | 100.00 | |||
| Total | 352 | 100.00 | 100.00 | 100.00 | 100.00 |
freq(data$continent)
Frequencies
data$continent
Type: Factor
| Freq | % Valid | % Valid Cum. | % Total | % Total Cum. | |
|---|---|---|---|---|---|
| Africa | 1 | 0.38 | 0.38 | 0.28 | 0.28 |
| Asia | 4 | 1.53 | 1.91 | 1.14 | 1.42 |
| Autralia/Oceania | 4 | 1.53 | 3.44 | 1.14 | 2.56 |
| Europe | 7 | 2.67 | 6.11 | 1.99 | 4.55 |
| North America | 245 | 93.51 | 99.62 | 69.60 | 74.15 |
| South America | 1 | 0.38 | 100.00 | 0.28 | 74.43 |
| 90 | 25.57 | 100.00 | |||
| Total | 352 | 100.00 | 100.00 | 100.00 | 100.00 |
freq(data$career)
Frequencies
data$career
Type: Factor
| Freq | % Valid | % Valid Cum. | % Total | % Total Cum. | |
|---|---|---|---|---|---|
| Undergraduate student | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
| Graduate student | 26 | 9.96 | 9.96 | 7.39 | 7.39 |
| Post-doctoral fellow | 20 | 7.66 | 17.62 | 5.68 | 13.07 |
| Assistant professor/lecturer | 76 | 29.12 | 46.74 | 21.59 | 34.66 |
| Associate/full professor | 120 | 45.98 | 92.72 | 34.09 | 68.75 |
| Research scientist | 7 | 2.68 | 95.40 | 1.99 | 70.74 |
| Other | 12 | 4.60 | 100.00 | 3.41 | 74.15 |
| 91 | 25.85 | 100.00 | |||
| Total | 352 | 100.00 | 100.00 | 100.00 | 100.00 |
freq(data$culture)
Frequencies
data$culture
Type: Factor
| Freq | % Valid | % Valid Cum. | % Total | % Total Cum. | |
|---|---|---|---|---|---|
| No | 34 | 12.93 | 12.93 | 9.66 | 9.66 |
| Yes | 229 | 87.07 | 100.00 | 65.06 | 74.72 |
| 89 | 25.28 | 100.00 | |||
| Total | 352 | 100.00 | 100.00 | 100.00 | 100.00 |
descr <- data %>% select(politics, c19stress, agree, consci, extra, neuro, open)
descr(descr, stats = "common", order = "p")
Descriptive Statistics
descr
N: 352
| politics | c19stress | agree | consci | extra | neuro | open | |
|---|---|---|---|---|---|---|---|
| Mean | 2.16 | 6.14 | 3.88 | 4.19 | 3.07 | 2.39 | 3.80 |
| Std.Dev | 1.05 | 2.16 | 0.79 | 0.82 | 1.09 | 0.85 | 0.74 |
| Min | 1.00 | 1.00 | 1.50 | 1.50 | 1.00 | 1.00 | 1.00 |
| Median | 2.00 | 6.00 | 4.00 | 4.50 | 3.00 | 2.50 | 4.00 |
| Max | 7.00 | 10.00 | 5.00 | 5.00 | 5.00 | 4.50 | 5.00 |
| N.Valid | 260.00 | 263.00 | 262.00 | 263.00 | 263.00 | 263.00 | 263.00 |
| Pct.Valid | 73.86 | 74.72 | 74.43 | 74.72 | 74.72 | 74.72 | 74.72 |
rm(descr)
Have you engaged in these practices?
qrpself <- data.frame(var = dict %>% filter(scale == "Self QRP") %>% pull(variable),
labels = dict %>% filter(scale == "Self QRP") %>% pull(label))
likert <- data %>% select(all_of(qrpself$var))
colnames(likert) <- qrpself$labels
likert(likert)
plot(likert(likert[,1:10]))
## Item Never Once Occasionally Frequently
## 1 Not reporting nonsignificance 33 8.3 46.4 10.8
## 2 Not reporting nonsignificance (covariate) 56 6.4 32.7 4.6
## 3 HARKing 57 11.2 25.9 5.6
## 4 Not reporting alternative models 43 7.5 38.1 9.3
## 5 Rounding p-value 63 7.8 19.5 6.0
## 6 Excluding data 74 6.6 17.8 1.0
## 7 Increasing sample 80 8.0 10.1 2.1
## 8 Changing analysis 42 10.7 41.3 6.0
## 9 Not reporting problems 81 9.5 8.5 0.0
## 10 Imputing data 93 3.2 3.5 0.0
## Almost always
## 1 1.44
## 2 0.71
## 3 0.70
## 4 2.49
## 5 3.19
## 6 0.00
## 7 0.00
## 8 0.00
## 9 0.70
## 10 0.00
likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:10], grouping = likertf$gender_f))
data <- data %>%
mutate(gender = ifelse(gender_f == "Female", 0, ifelse(gender_f == "Male", 1, NA)))
tab_model(lm(selfQRP ~ gender, data))
| selfQRP | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.78 | 1.71 – 1.85 | <0.001 |
| gender | -0.06 | -0.19 – 0.06 | 0.324 |
| Observations | 242 | ||
| R2 / R2 adjusted | 0.004 / -0.000 | ||
Overall, there does not seem to be a strong gender difference in engagement in questionable research practices. From the graphs of individual practices, we can see that female researchers reported more rounding of p-values whereas male researchers more often did not report problems of the studies.
likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:10], grouping = likerta$age))
tab_model(lm(selfQRP ~ as.numeric(age), data))
| selfQRP | |||
|---|---|---|---|
| Predictors | Estimates | CI | p |
| (Intercept) | 1.72 | 1.52 – 1.92 | <0.001 |
| age | 0.01 | -0.04 – 0.06 | 0.684 |
| Observations | 250 | ||
| R2 / R2 adjusted | 0.001 / -0.003 | ||
Overall, there is no age difference in engagement in questionable research practices. Although it seems that the 70+ group has slightly different patterns compared to the rest, there are only 5 individuals in this age group.
likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:10], grouping = likertc$career))
data$careernum <- as.factor(as.numeric(data$career))
plot(glht(aov(selfQRP ~ careernum, data), linfct = mcp(careernum = "Tukey"),
alternative = "two.sided"),
main = "Career stage")
Career stages:
Overall, there doesn’t seem to be strong differences between different career stages in engagement in questionable research practices. Although it appears that research scientists and “other” career groups show a different pattern, it’s important to note that there were only 19 participants (7.3%) that identified with these two groups. Graduate students reported less QRP engagement than assistant professors/lecturers and associate/full professors. Nonetheless, there were only 26 graduate students and many more professors/lecturers (196)
likert <- likert %>%
mutate(lead_1 = data$lead_1,
lead_2 = data$lead_2,
lead_3 = data$lead_3)
likertl4 <- likert %>% filter(is.na(lead_1) == F &
is.na(lead_2) == F &
is.na(lead_3) == F)
likertl4 <- likertl4 %>%
mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
0, 1))
likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
"Some leadership positions"))
plot(likert(likertl4[,1:10], grouping = likertl4$lead_any))
likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:10], grouping = likertl1$lead_1))
likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:10], grouping = likertl2$lead_2))
likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:10], grouping = likertl3$lead_3))
Leadership positions:
Overall there is no clear difference between researchers with and without recent leadership positions in their QRP engagement.
likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:10], grouping = likertce$culture))
Percentage of researchers in ethnic minority/cultural psychology who you believe have engaged in these practices?
qrppeer <- data.frame(var = dict %>% filter(scale == "Peer QRP") %>% pull(variable),
labels = dict %>% filter(scale == "Peer QRP") %>% pull(label))
descr <- data %>% select(all_of(qrppeer$var))
colnames(descr) <- qrppeer$labels
descr(descr, stats = "common", order = "p")
Descriptive Statistics
descr
N: 352
| Not reporting nonsignificance | Not reporting nonsignificance (covariate) | |
|---|---|---|
| Mean | 58.08 | 48.52 |
| Std.Dev | 28.56 | 27.43 |
| Min | 0.00 | 0.00 |
| Median | 64.00 | 50.00 |
| Max | 100.00 | 100.00 |
| N.Valid | 274.00 | 275.00 |
| Pct.Valid | 77.84 | 78.12 |
| HARKing | Not reporting alternative models | Rounding p-value | Excluding data | |
|---|---|---|---|---|
| Mean | 45.64 | 52.29 | 45.56 | 35.53 |
| Std.Dev | 28.17 | 28.51 | 30.25 | 25.35 |
| Min | 0.00 | 0.00 | 0.00 | 0.00 |
| Median | 50.00 | 54.00 | 41.50 | 30.00 |
| Max | 100.00 | 100.00 | 100.00 | 100.00 |
| N.Valid | 279.00 | 275.00 | 270.00 | 280.00 |
| Pct.Valid | 79.26 | 78.12 | 76.70 | 79.55 |
| Increasing sample | Changing analysis | Not reporting problems | Imputing data | |
|---|---|---|---|---|
| Mean | 35.11 | 49.73 | 33.55 | 20.48 |
| Std.Dev | 25.36 | 27.92 | 24.85 | 20.49 |
| Min | 0.00 | 0.00 | 0.00 | 0.00 |
| Median | 30.00 | 51.00 | 29.00 | 11.00 |
| Max | 100.00 | 100.00 | 100.00 | 100.00 |
| N.Valid | 278.00 | 278.00 | 275.00 | 280.00 |
| Pct.Valid | 78.98 | 78.98 | 78.12 | 79.55 |
# plot
descr$id <- as.character(1:nrow(descr))
descr <- gather(descr, key = "qrp", value = "percent",
"Not reporting nonsignificance":"Imputing data")
p <- descr %>%
ggplot( aes(x=qrp, y=percent, fill=qrp, color=qrp)) +
geom_violin(width=1.25, size=0.2) +
geom_boxplot(width = .15, fill = "white", color = "black") +
stat_summary(fun.y=mean, geom="label", aes(label=round(..y.., 2), color = "black", size = 13, fill = "white")) +
scale_fill_viridis(discrete=TRUE) +
scale_color_viridis(discrete=TRUE) +
theme_minimal() +
theme(
legend.position="none",
axis.text.y = element_text(size = 12)
) +
coord_flip() + #horizontal version
xlab("") +
ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")
print(p)
#lollipop plot comparing with self engagement
descr <- data.frame(self = c(100-33, 100-56, 100-57, 100-43, 100-63, 100-74, 100-80, 100-42, 100-81, 100-93),
other = c(58.08, 48.52, 45.64, 52.29, 45.56, 35.53, 35.11, 49.73, 33.55, 20.48),
qrp = c("Not reporting nonsignificance", "Not reporting nonsignificance (covariate)",
"HARKing", "Not reporting alternative models", "Rounding p-value",
"Excluding data", "Increasing sample", "Changing analysis",
"Not reporting problems", "Imputing data"))
descr <- descr %>% arrange(self) %>%
mutate(qrp = factor(qrp, qrp)) #factor is needed to preserve order
p <- ggplot(descr) +
geom_segment(
aes(x=qrp, xend=qrp, y=self, yend=other),
color= ifelse(descr$self < descr$other, "grey", "orange"),
size= ifelse(descr$self < descr$other, 1, 2)) +
geom_point(
aes(x=qrp, y=self),
color="green",
size=4) +
geom_point(
aes(x=qrp, y=other),
color="red",
size=4,
shape=17) +
theme_minimal() +
coord_flip() +
theme(
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12)
) +
ylim(0, 80) +
xlab("") +
ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")
p <- p +
annotate("text", x=grep("Changing analysis", descr$qrp), 20,
label="Underestimation of \n others' engagement",
color="orange", size=5 , angle=0, fontface="bold", hjust=0) +
annotate("text", x=grep("Excluding data", descr$qrp), 50,
label="Overestimation of \n others' engagement",
color="gray", size=5 , angle=0, fontface="bold", hjust=0)
print(p)
What is your opinion of these practices?
qrpopi <- data.frame(var = dict %>% filter(scale == "Opinion QRP") %>% pull(variable),
labels = dict %>% filter(scale == "Opinion QRP") %>% pull(label))
likert <- data %>% select(all_of(qrpopi$var))
colnames(likert) <- qrpopi$labels
likert(likert)
## Item It should never be used
## 1 Not reporting nonsignificance 29
## 2 Not reporting nonsignificance (covariate) 40
## 3 HARKing 59
## 4 Not reporting alternative models 30
## 5 Rounding p-value 62
## 6 Excluding data 64
## 7 Increasing sample 44
## 8 Changing analysis 32
## 9 Not reporting problems 83
## 10 Imputing data 86
## It should only be used rarely It should be used often
## 1 56 14.7
## 2 49 9.7
## 3 28 9.9
## 4 47 21.1
## 5 22 13.8
## 6 33 3.5
## 7 44 10.7
## 8 54 14.7
## 9 15 1.8
## 10 12 1.1
## It should be used almost always
## 1 0.00
## 2 1.44
## 3 3.17
## 4 2.15
## 5 2.18
## 6 0.35
## 7 1.42
## 8 0.00
## 9 0.71
## 10 0.00
likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:10], grouping = likertf$gender_f))
likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:10], grouping = likerta$age))
likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:10], grouping = likertc$career))
likert <- likert %>%
mutate(lead_1 = data$lead_1,
lead_2 = data$lead_2,
lead_3 = data$lead_3)
likertl4 <- likert %>% filter(is.na(lead_1) == F &
is.na(lead_2) == F &
is.na(lead_3) == F)
likertl4 <- likertl4 %>%
mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
0, 1))
likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
"Some leadership positions"))
plot(likert(likertl4[,1:10], grouping = likertl4$lead_any))
likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:10], grouping = likertl1$lead_1))
likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:10], grouping = likertl2$lead_2))
likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:10], grouping = likertl3$lead_3))
likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:10], grouping = likertce$culture))
Have you ever heard of these practices?
prrpaware <- data.frame(var = dict %>% filter(scale == "Aware PRRP") %>% pull(variable),
labels = dict %>% filter(scale == "Aware PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrpaware$var))
colnames(likert) <- prrpaware$labels
likert(likert)
plot(likert(likert[,1:3]))
## Item No Yes
## 1 Posting data 14 86
## 2 Posting instruments 16 84
## 3 Preregistration 23 77
likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))
likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))
likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))
likert <- likert %>%
mutate(lead_1 = data$lead_1,
lead_2 = data$lead_2,
lead_3 = data$lead_3)
likertl4 <- likert %>% filter(is.na(lead_1) == F &
is.na(lead_2) == F &
is.na(lead_3) == F)
likertl4 <- likertl4 %>%
mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
0, 1))
likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
"Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))
likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))
likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))
likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))
likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))
Have you engaged in these practices?
prrpself <- data.frame(var = dict %>% filter(scale == "Self PRRP") %>% pull(variable),
labels = dict %>% filter(scale == "Self PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrpself$var))
colnames(likert) <- prrpself$labels
likert(likert)
plot(likert(likert[,1:3]))
## Item Never Once Occasionally Frequently Almost always
## 1 Posting data 63 9.8 20 4.2 2.4
## 2 Posting instruments 45 7.0 34 9.5 4.2
## 3 Preregistration 61 10.0 18 7.6 3.5
likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))
likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))
likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))
likert <- likert %>%
mutate(lead_1 = data$lead_1,
lead_2 = data$lead_2,
lead_3 = data$lead_3)
likertl4 <- likert %>% filter(is.na(lead_1) == F &
is.na(lead_2) == F &
is.na(lead_3) == F)
likertl4 <- likertl4 %>%
mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
0, 1))
likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
"Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))
likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))
likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))
likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))
likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))
Percentage of researchers in ethnic minority/cultural psychology who you believe have engaged in these practices?
prrppeer <- data.frame(var = dict %>% filter(scale == "Peer PRRP") %>% pull(variable),
labels = dict %>% filter(scale == "Peer PRRP") %>% pull(label))
descr <- data %>% select(all_of(prrppeer$var))
colnames(descr) <- prrppeer$labels
descr(descr, stats = "common", order = "p")
Descriptive Statistics
descr
N: 352
| Posting data | Posting instruments | Preregistration | |
|---|---|---|---|
| Mean | 25.32 | 37.84 | 28.14 |
| Std.Dev | 18.29 | 23.01 | 21.85 |
| Min | 0.00 | 0.00 | 0.00 |
| Median | 20.00 | 30.00 | 25.00 |
| Max | 100.00 | 100.00 | 100.00 |
| N.Valid | 278.00 | 273.00 | 281.00 |
| Pct.Valid | 78.98 | 77.56 | 79.83 |
# plot
descr$id <- as.character(1:nrow(descr))
descr <- gather(descr, key = "prrp", value = "percent",
"Posting data":"Preregistration")
p <- descr %>%
ggplot( aes(x=prrp, y=percent, fill=prrp, color=prrp)) +
geom_violin(width=1.25, size=0.2) +
geom_boxplot(width = .15, fill = "white", color = "black") +
stat_summary(fun.y=mean, geom="label", aes(label=round(..y.., 2), color = "black", size = 13, fill = "white")) +
scale_fill_viridis(discrete=TRUE) +
scale_color_viridis(discrete=TRUE) +
theme_minimal() +
theme(
legend.position="none",
axis.text.y = element_text(size = 12)
) +
coord_flip() + #horizontal version
xlab("") +
ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")
print(p)
#lollipop plot comparing with self engagement
descr <- data.frame(self = c(100-52, 100-71, 100-73),
other = c(37.84, 28.14, 25.32),
prrp = c("Posting instruments", "Preregistration","Posting data"))
descr <- descr %>% arrange(self) %>%
mutate(prrp = factor(prrp, prrp)) #factor is needed to preserve order
p <- ggplot(descr) +
geom_segment(
aes(x=prrp, xend=prrp, y=self, yend=other),
color= ifelse(descr$self < descr$other, "grey", "orange"),
size= ifelse(descr$self < descr$other, 1, 2)) +
geom_point(
aes(x=prrp, y=self),
color="green",
size=4) +
geom_point(
aes(x=prrp, y=other),
color="red",
size=4,
shape=17) +
theme_minimal() +
coord_flip() +
theme(
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12)
) +
ylim(15, 60) +
xlab("") +
ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")
p <- p +
annotate("text", x=grep("Preregistration", descr$prrp), 48,
label="Underestimation of \n others' engagement",
color="orange", size=5 , angle=0, fontface="bold", hjust=0)
print(p)
What is your opinion of these practices?
prrpopi <- data.frame(var = dict %>% filter(scale == "Opinion PRRP") %>% pull(variable),
labels = dict %>% filter(scale == "Opinion PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrpopi$var))
colnames(likert) <- prrpopi$labels
likert(likert)
plot(likert(likert))
## Item It should never be used It should only be used rarely
## 1 Posting data 2.5 27
## 2 Posting instruments 1.1 12
## 3 Preregistration 2.8 25
## It should be used often It should be used almost always
## 1 58 13
## 2 63 24
## 3 58 15
likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))
likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))
likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))
likert <- likert %>%
mutate(lead_1 = data$lead_1,
lead_2 = data$lead_2,
lead_3 = data$lead_3)
likertl4 <- likert %>% filter(is.na(lead_1) == F &
is.na(lead_2) == F &
is.na(lead_3) == F)
likertl4 <- likertl4 %>%
mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
0, 1))
likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
"Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))
likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))
likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))
likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))
likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))
prrprecent <- data.frame(var = dict %>% filter(scale == "Recent PRRP") %>% pull(variable),
labels = dict %>% filter(scale == "Recent PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrprecent$var))
colnames(likert) <- prrprecent$labels
likert(likert)
Item No Yes
1 Posting data 85 15 2 Posting instruments 73 27 3 Preregistration 83 17
likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))
likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))
likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))
likert <- likert %>%
mutate(lead_1 = data$lead_1,
lead_2 = data$lead_2,
lead_3 = data$lead_3)
likertl4 <- likert %>% filter(is.na(lead_1) == F &
is.na(lead_2) == F &
is.na(lead_3) == F)
likertl4 <- likertl4 %>%
mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
0, 1))
likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
"Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))
likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))
likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))
likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))
likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))
whydata <- grep("^prrp1_5_", names(data), value = T)[-1]
descr <- data %>% filter (prrp1_4 == "No") %>% select(all_of(whydata))
colnames(descr) <- c("Proprietary",
"Time-consuming",
"Scooping",
"Deidentification",
"Field not in favor",
"Already public",
"Mistake",
"Unfamiliar",
"Planning to",
"Others")
descr(descr, stats = "common", order = "p") %>% kable()
| Proprietary | Time-consuming | Scooping | Deidentification | Field not in favor | Already public | Mistake | Unfamiliar | Planning to | Others | |
|---|---|---|---|---|---|---|---|---|---|---|
| Mean | 0.32 | 0.21 | 0.1 | 0.17 | 0.05 | 0.06 | 0.08 | 0.21 | 0.1 | 0.18 |
| Std.Dev | 0.47 | 0.41 | 0.3 | 0.37 | 0.23 | 0.24 | 0.26 | 0.41 | 0.3 | 0.39 |
| Min | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 |
| Median | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 |
| Max | 1.00 | 1.00 | 1.0 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.0 | 1.00 |
| N.Valid | 238.00 | 238.00 | 238.0 | 238.00 | 238.00 | 238.00 | 238.00 | 238.00 | 238.0 | 238.00 |
| Pct.Valid | 100.00 | 100.00 | 100.0 | 100.00 | 100.00 | 100.00 | 100.00 | 100.00 | 100.0 | 100.00 |
# plot
descr <- data.frame(
reasons = c("Proprietary", "Time-consuming", "Scooping", "Deidentification", "Field not in favor",
"Already public", "Mistake", "Unfamiliar", "Planning to", "Others"),
percent = c(32, 21, 10, 17, 5, 6, 8, 21, 10, 18)
)
descr %>%
mutate(reasons = fct_reorder(reasons, percent)) %>%
ggplot( aes(x=reasons, y=percent)) +
geom_bar(stat="identity", fill="maroon", alpha=.6, width=.4) +
coord_flip() +
ylim(0,45) +
xlab("") +
ggtitle("Reasons for not posting data") +
theme_bw()
whyinstr <- grep("^prrp2_5_", names(data), value = T)[-1]
descr <- data %>% filter (prrp2_4 == "No") %>% select(all_of(whyinstr))
colnames(descr) <- c("Proprietary",
"Time-consuming",
"Scooping",
"Deidentification",
"Field not in favor",
"Already public",
"Unfamiliar",
"Planning to",
"Others")
descr(descr, stats = "common", order = "p") %>% kable()
| Proprietary | Time-consuming | Scooping | Deidentification | Field not in favor | Already public | Unfamiliar | Planning to | Others | |
|---|---|---|---|---|---|---|---|---|---|
| Mean | 0.32 | 0.17 | 0.05 | 0.04 | 0.08 | 0.37 | 0.2 | 0.07 | 0.16 |
| Std.Dev | 0.47 | 0.38 | 0.23 | 0.19 | 0.27 | 0.48 | 0.4 | 0.25 | 0.37 |
| Min | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 |
| Median | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 |
| Max | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.0 | 1.00 | 1.00 |
| N.Valid | 206.00 | 206.00 | 206.00 | 206.00 | 206.00 | 206.00 | 206.0 | 206.00 | 206.00 |
| Pct.Valid | 100.00 | 100.00 | 100.00 | 100.00 | 100.00 | 100.00 | 100.0 | 100.00 | 100.00 |
# plot
descr <- data.frame(
reasons = c("Proprietary", "Time-consuming", "Scooping", "Deidentification", "Field not in favor",
"Already public", "Unfamiliar", "Planning to", "Others"),
percent = c(32, 17, 5, 4, 8, 37, 20, 7, 16)
)
descr %>%
mutate(reasons = fct_reorder(reasons, percent)) %>%
ggplot( aes(x=reasons, y=percent)) +
geom_bar(stat="identity", fill="red", alpha=.6, width=.4) +
coord_flip() +
ylim(0,45) +
xlab("") +
ggtitle("Reasons for not posting instruments") +
theme_bw()
whyprereg <- grep("^prrp3_5_", names(data), value = T)[-1]
descr <- data %>% filter (prrp3_4 == "No") %>% select(all_of(whyprereg))
colnames(descr) <- c("Time-consuming",
"Scooping",
"Field not in favor",
"Already public",
"Unfamiliar",
"Others")
descr(descr, stats = "common", order = "p") %>% kable()
| Time-consuming | Scooping | Field not in favor | Already public | Unfamiliar | Others | |
|---|---|---|---|---|---|---|
| Mean | 0.21 | 0.09 | 0.13 | 0.05 | 0.44 | 0.28 |
| Std.Dev | 0.41 | 0.29 | 0.34 | 0.21 | 0.50 | 0.45 |
| Min | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| Median | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
| Max | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 |
| N.Valid | 236.00 | 236.00 | 236.00 | 236.00 | 236.00 | 236.00 |
| Pct.Valid | 100.00 | 100.00 | 100.00 | 100.00 | 100.00 | 100.00 |
# plot
descr <- data.frame(
reasons = c("Time-consuming", "Scooping","Field not in favor",
"Already public", "Unfamiliar", "Others"),
percent = c(21, 9, 13, 5, 44, 28)
)
descr %>%
mutate(reasons = fct_reorder(reasons, percent)) %>%
ggplot( aes(x=reasons, y=percent)) +
geom_bar(stat="identity", fill="blue", alpha=.6, width=.4) +
coord_flip() +
ylim(0,45) +
xlab("") +
ggtitle("Reasons for not preregistering") +
theme_bw()