I. Meta-data

require(rio)
require(summarytools)
require(likert)
require(xtable)
require(knitr)
require(grid)
require(gridExtra)
require(sjPlot)
require(multcomp)
require(tidyverse)
require(viridis)
require(hrbrthemes)
require(forcats)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19041)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] hrbrthemes_0.8.0   viridis_0.5.1      viridisLite_0.3.0  forcats_0.5.1     
##  [5] stringr_1.4.0      dplyr_1.0.4        purrr_0.3.4        readr_1.4.0       
##  [9] tidyr_1.1.2        tibble_3.0.6       tidyverse_1.3.0    multcomp_1.4-16   
## [13] TH.data_1.0-10     MASS_7.3-53        survival_3.2-7     mvtnorm_1.1-1     
## [17] sjPlot_2.8.7       gridExtra_2.3      knitr_1.31         likert_1.3.5      
## [21] xtable_1.8-4       ggplot2_3.3.3      summarytools_0.9.8 rio_0.5.16        
## 
## loaded via a namespace (and not attached):
##  [1] minqa_1.2.4        colorspace_2.0-0   pryr_0.1.4         ellipsis_0.3.1    
##  [5] sjlabelled_1.1.7   estimability_1.3   parameters_0.12.0  base64enc_0.1-3   
##  [9] fs_1.5.0           rstudioapi_0.13    lubridate_1.7.9.2  xml2_1.3.2        
## [13] codetools_0.2-18   splines_4.0.4      extrafont_0.17     mnormt_2.0.2      
## [17] sjmisc_2.8.6       jsonlite_1.7.2     nloptr_1.2.2.2     ggeffects_1.0.1   
## [21] Rttf2pt1_1.3.8     broom_0.7.5        dbplyr_2.1.0       effectsize_0.4.3  
## [25] compiler_4.0.4     httr_1.4.2         sjstats_0.18.1     emmeans_1.5.4     
## [29] backports_1.2.1    assertthat_0.2.1   Matrix_1.3-2       cli_2.3.0         
## [33] htmltools_0.5.1.1  tools_4.0.4        coda_0.19-4        gtable_0.3.0      
## [37] glue_1.4.2         reshape2_1.4.4     Rcpp_1.0.6         cellranger_1.1.0  
## [41] jquerylib_0.1.3    vctrs_0.3.6        nlme_3.1-152       extrafontdb_1.0   
## [45] psych_2.0.12       insight_0.13.0     xfun_0.21          openxlsx_4.2.3    
## [49] lme4_1.1-26        rvest_0.3.6        lifecycle_1.0.0    statmod_1.4.35    
## [53] zoo_1.8-8          scales_1.1.1       hms_1.0.0          parallel_4.0.4    
## [57] sandwich_3.0-0     yaml_2.2.1         curl_4.3           pander_0.6.3      
## [61] gdtools_0.2.3      sass_0.3.1         stringi_1.5.3      bayestestR_0.8.2  
## [65] checkmate_2.0.0    boot_1.3-27        zip_2.1.1          systemfonts_1.0.1 
## [69] rlang_0.4.10       pkgconfig_2.0.3    matrixStats_0.58.0 evaluate_0.14     
## [73] lattice_0.20-41    rapportools_1.0    tidyselect_1.1.0   plyr_1.8.6        
## [77] magrittr_2.0.1     R6_2.5.0           magick_2.6.0       generics_0.1.0    
## [81] DBI_1.1.1          pillar_1.4.7       haven_2.3.1        foreign_0.8-81    
## [85] withr_2.4.1        performance_0.7.0  modelr_0.1.8       crayon_1.4.1      
## [89] tmvnsim_1.0-2      rmarkdown_2.7      readxl_1.3.1       data.table_1.14.0 
## [93] reprex_1.0.0       digest_0.6.27      munsell_0.5.0      bslib_0.2.4       
## [97] tcltk_4.0.4

II. Preprocessing

Data

data <- rio::import(file = "../Data/cleaned.csv")
dict <- rio::import(file = "../Data/Dictionary.xlsx")
data$id <- as.character(data$id)

#data type demographics
data <- data %>% 
  mutate(age = factor(age, levels = c(1:7),
                         labels = c("18-22", "23-29", "30-39", "40-49", "50-59", "60-69", "70+")),
         continent = factor(continent, levels = c(1:6),
                            labels = c("Africa", "Asia", "Autralia/Oceania", "Europe", "North America", "South America")),
         group = factor(group, levels = c(1,2), labels = c("Flat rate", "Raffle")),
         gender_f = factor(gender_f, levels = c(1,2), labels = c("Female", "Male")),
         race_f = factor(race_f, levels = c(1:6),
                         labels = c("Asian", "Black", "Hispanic", "Native American", "Non-Hispanic White", "Mixed")),
         career = factor(career, levels = c(1:7),
                         labels = c("Undergraduate student", "Graduate student", "Post-doctoral fellow", 
                                    "Assistant professor/lecturer", "Associate/full professor", "Research scientist", "Other")))

#data type likert 
likert1 <- dict %>% 
  filter(value_label == "1 = It should never be used, 2 = It should only be used rarely, 3 = It should be used often, 4 = It should be used almost always") %>%
  pull(variable)
factor1 <- function(x){
  factor(x, levels = c(1:4), 
         labels = c("It should never be used", "It should only be used rarely", 
                    "It should be used often", "It should be used almost always"))
}

likert2 <- dict %>% 
  filter(value_label == "1 = Never, 2 = Once or Twice, 3 = Often") %>%
  pull(variable)
factor2 <- function(x){
  factor(x, levels = c(1:3), 
         labels = c("Never", "Once or Twice", "Often"))
}

likert3 <- dict %>% 
  filter(value_label == "1 = Never, 2 = Once, 3 = Occasionally, 4 = Frequently, 5 = Almost always") %>%
  pull(variable)
factor3 <- function(x){
  factor(x, levels = c(1:5), 
         labels = c("Never", "Once", "Occasionally", "Frequently", "Almost always"))
}

likert4 <- dict %>% 
  filter(value_label == "0 = No, 1 = Yes, 2 = I have not published an empirical paper") %>%
  pull(variable)
factor4 <- function(x){
  factor(x, levels = c(0:1), 
         labels = c("No", "Yes"))
}

likert5 <- dict %>% 
  filter(value_label == "0 = No, 1 = Yes") %>%
  pull(variable)
factor5 <- function(x){
  factor(x, levels = c(0:1), 
         labels = c("No", "Yes"))
}

data <- data %>% 
  mutate_at(likert1, factor1) %>% 
  mutate_at(likert2, factor2) %>% 
  mutate_at(likert3, factor3) %>% 
  mutate_at(likert4, factor4) %>% 
  mutate_at(likert5, factor5)

rm(likert1, likert2, likert3, likert4, likert5, 
   factor1, factor2, factor3, factor4, factor5)

#list of ID with complete responses
full <- data %>% 
  filter(Finished == 1) %>% 
  pull(id)

#list of ID with correct attention checks
att <- data %>% 
  filter(attcheck == 1 & experience_19 == 1) %>% 
  pull(id)

#list of ID with complete AND correct attention checks
fullatt <- data %>% 
  filter(id %in% att & id %in% full) %>% 
  pull(id)

There are 352 participants in total, 265 of whom have completed responses and 159 with completed responses who also passed the two attention checks.

III. Demographics

freq(data$age)

Frequencies
data$age
Type: Factor

Freq % Valid % Valid Cum. % Total % Total Cum.
18-22 0 0.00 0.00 0.00 0.00
23-29 24 9.16 9.16 6.82 6.82
30-39 107 40.84 50.00 30.40 37.22
40-49 70 26.72 76.72 19.89 57.10
50-59 37 14.12 90.84 10.51 67.61
60-69 19 7.25 98.09 5.40 73.01
70+ 5 1.91 100.00 1.42 74.43
90 25.57 100.00
Total 352 100.00 100.00 100.00 100.00
freq(data$gender_f)

Frequencies
data$gender_f
Type: Factor

Freq % Valid % Valid Cum. % Total % Total Cum.
Female 173 68.65 68.65 49.15 49.15
Male 79 31.35 100.00 22.44 71.59
100 28.41 100.00
Total 352 100.00 100.00 100.00 100.00
freq(data$race_f)

Frequencies
data$race_f
Type: Factor

Freq % Valid % Valid Cum. % Total % Total Cum.
Asian 48 19.28 19.28 13.64 13.64
Black 34 13.65 32.93 9.66 23.30
Hispanic 37 14.86 47.79 10.51 33.81
Native American 2 0.80 48.59 0.57 34.38
Non-Hispanic White 110 44.18 92.77 31.25 65.62
Mixed 18 7.23 100.00 5.11 70.74
103 29.26 100.00
Total 352 100.00 100.00 100.00 100.00
freq(data$continent)

Frequencies
data$continent
Type: Factor

Freq % Valid % Valid Cum. % Total % Total Cum.
Africa 1 0.38 0.38 0.28 0.28
Asia 4 1.53 1.91 1.14 1.42
Autralia/Oceania 4 1.53 3.44 1.14 2.56
Europe 7 2.67 6.11 1.99 4.55
North America 245 93.51 99.62 69.60 74.15
South America 1 0.38 100.00 0.28 74.43
90 25.57 100.00
Total 352 100.00 100.00 100.00 100.00
freq(data$career)

Frequencies
data$career
Type: Factor

Freq % Valid % Valid Cum. % Total % Total Cum.
Undergraduate student 0 0.00 0.00 0.00 0.00
Graduate student 26 9.96 9.96 7.39 7.39
Post-doctoral fellow 20 7.66 17.62 5.68 13.07
Assistant professor/lecturer 76 29.12 46.74 21.59 34.66
Associate/full professor 120 45.98 92.72 34.09 68.75
Research scientist 7 2.68 95.40 1.99 70.74
Other 12 4.60 100.00 3.41 74.15
91 25.85 100.00
Total 352 100.00 100.00 100.00 100.00
freq(data$culture)

Frequencies
data$culture
Type: Factor

Freq % Valid % Valid Cum. % Total % Total Cum.
No 34 12.93 12.93 9.66 9.66
Yes 229 87.07 100.00 65.06 74.72
89 25.28 100.00
Total 352 100.00 100.00 100.00 100.00
descr <- data %>% select(politics, c19stress, agree, consci, extra, neuro, open)
descr(descr, stats = "common", order = "p")

Descriptive Statistics
descr
N: 352

politics c19stress agree consci extra neuro open
Mean 2.16 6.14 3.88 4.19 3.07 2.39 3.80
Std.Dev 1.05 2.16 0.79 0.82 1.09 0.85 0.74
Min 1.00 1.00 1.50 1.50 1.00 1.00 1.00
Median 2.00 6.00 4.00 4.50 3.00 2.50 4.00
Max 7.00 10.00 5.00 5.00 5.00 4.50 5.00
N.Valid 260.00 263.00 262.00 263.00 263.00 263.00 263.00
Pct.Valid 73.86 74.72 74.43 74.72 74.72 74.72 74.72
rm(descr)

IV. Exploratory analyses

1. Questionable research practices

1a. Self

Have you engaged in these practices?

qrpself <- data.frame(var = dict %>% filter(scale == "Self QRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Self QRP") %>% pull(label))
likert <- data %>% select(all_of(qrpself$var))
colnames(likert) <- qrpself$labels
likert(likert)
plot(likert(likert[,1:10]))

##                                         Item Never Once Occasionally Frequently
## 1              Not reporting nonsignificance    33  8.3         46.4       10.8
## 2  Not reporting nonsignificance (covariate)    56  6.4         32.7        4.6
## 3                                    HARKing    57 11.2         25.9        5.6
## 4           Not reporting alternative models    43  7.5         38.1        9.3
## 5                           Rounding p-value    63  7.8         19.5        6.0
## 6                             Excluding data    74  6.6         17.8        1.0
## 7                          Increasing sample    80  8.0         10.1        2.1
## 8                          Changing analysis    42 10.7         41.3        6.0
## 9                     Not reporting problems    81  9.5          8.5        0.0
## 10                             Imputing data    93  3.2          3.5        0.0
##    Almost always
## 1           1.44
## 2           0.71
## 3           0.70
## 4           2.49
## 5           3.19
## 6           0.00
## 7           0.00
## 8           0.00
## 9           0.70
## 10          0.00

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)

plot(likert(likertf[,1:10], grouping = likertf$gender_f))

data <- data %>% 
  mutate(gender = ifelse(gender_f == "Female", 0, ifelse(gender_f == "Male", 1, NA)))
tab_model(lm(selfQRP ~ gender, data))
  selfQRP
Predictors Estimates CI p
(Intercept) 1.78 1.71 – 1.85 <0.001
gender -0.06 -0.19 – 0.06 0.324
Observations 242
R2 / R2 adjusted 0.004 / -0.000

Overall, there does not seem to be a strong gender difference in engagement in questionable research practices. From the graphs of individual practices, we can see that female researchers reported more rounding of p-values whereas male researchers more often did not report problems of the studies.

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:10], grouping = likerta$age))

tab_model(lm(selfQRP ~ as.numeric(age), data))
  selfQRP
Predictors Estimates CI p
(Intercept) 1.72 1.52 – 1.92 <0.001
age 0.01 -0.04 – 0.06 0.684
Observations 250
R2 / R2 adjusted 0.001 / -0.003

Overall, there is no age difference in engagement in questionable research practices. Although it seems that the 70+ group has slightly different patterns compared to the rest, there are only 5 individuals in this age group.

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:10], grouping = likertc$career))

data$careernum <- as.factor(as.numeric(data$career))
plot(glht(aov(selfQRP ~ careernum, data), linfct = mcp(careernum = "Tukey"), 
          alternative = "two.sided"),
     main = "Career stage")

Career stages:

  • 2: Graduate student (N = 26)
  • 3: Post-doctoral fellow (N = 20)
  • 4: Assistant professor/lecturer (N = 76)
  • 5: Associate/full professor (N = 120)
  • 6: Research scientist (N = 7)
  • 7: Other (N = 12)

Overall, there doesn’t seem to be strong differences between different career stages in engagement in questionable research practices. Although it appears that research scientists and “other” career groups show a different pattern, it’s important to note that there were only 19 participants (7.3%) that identified with these two groups. Graduate students reported less QRP engagement than assistant professors/lecturers and associate/full professors. Nonetheless, there were only 26 graduate students and many more professors/lecturers (196)

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:10], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:10], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:10], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:10], grouping = likertl3$lead_3))

Leadership positions:

  • Some leadership positions (N = 147) vs. No leadership position (N = 205)
  • Leader at own institution (N = 75) vs. Not leader at own institution (N = 277)
  • Leader at journal editorialship (N = 50) vs. Not leader at journal editorialship (N = 302)
  • Leader at professional societies (N = 91) vs. Not leader at professional societies (N = 261)

Overall there is no clear difference between researchers with and without recent leadership positions in their QRP engagement.

Grouped by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:10], grouping = likertce$culture))

1b. Other

Percentage of researchers in ethnic minority/cultural psychology who you believe have engaged in these practices?

qrppeer <- data.frame(var = dict %>% filter(scale == "Peer QRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Peer QRP") %>% pull(label))
descr <- data %>% select(all_of(qrppeer$var))
colnames(descr) <- qrppeer$labels
descr(descr, stats = "common", order = "p")

Descriptive Statistics
descr
N: 352

Table continues below
Not reporting nonsignificance Not reporting nonsignificance (covariate)
Mean 58.08 48.52
Std.Dev 28.56 27.43
Min 0.00 0.00
Median 64.00 50.00
Max 100.00 100.00
N.Valid 274.00 275.00
Pct.Valid 77.84 78.12
Table continues below
HARKing Not reporting alternative models Rounding p-value Excluding data
Mean 45.64 52.29 45.56 35.53
Std.Dev 28.17 28.51 30.25 25.35
Min 0.00 0.00 0.00 0.00
Median 50.00 54.00 41.50 30.00
Max 100.00 100.00 100.00 100.00
N.Valid 279.00 275.00 270.00 280.00
Pct.Valid 79.26 78.12 76.70 79.55
Increasing sample Changing analysis Not reporting problems Imputing data
Mean 35.11 49.73 33.55 20.48
Std.Dev 25.36 27.92 24.85 20.49
Min 0.00 0.00 0.00 0.00
Median 30.00 51.00 29.00 11.00
Max 100.00 100.00 100.00 100.00
N.Valid 278.00 278.00 275.00 280.00
Pct.Valid 78.98 78.98 78.12 79.55
# plot
descr$id <- as.character(1:nrow(descr))
descr <- gather(descr, key = "qrp", value = "percent",
                "Not reporting nonsignificance":"Imputing data")
p <- descr %>%
  ggplot( aes(x=qrp, y=percent, fill=qrp, color=qrp)) +
    geom_violin(width=1.25, size=0.2) +
    geom_boxplot(width = .15, fill = "white", color = "black") +
    stat_summary(fun.y=mean, geom="label", aes(label=round(..y.., 2), color = "black", size = 13, fill = "white")) +
    scale_fill_viridis(discrete=TRUE) +
    scale_color_viridis(discrete=TRUE) +
    theme_minimal() +
    theme(
      legend.position="none",
      axis.text.y = element_text(size = 12)
    ) +
    coord_flip() + #horizontal version
    xlab("") +
    ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")
print(p)

#lollipop plot comparing with self engagement
descr <- data.frame(self = c(100-33, 100-56, 100-57, 100-43, 100-63, 100-74, 100-80, 100-42, 100-81, 100-93),
                    other = c(58.08, 48.52, 45.64, 52.29, 45.56, 35.53, 35.11, 49.73, 33.55, 20.48),
                    qrp = c("Not reporting nonsignificance", "Not reporting nonsignificance (covariate)",
                            "HARKing", "Not reporting alternative models", "Rounding p-value", 
                            "Excluding data", "Increasing sample", "Changing analysis",
                            "Not reporting problems", "Imputing data"))

descr <- descr %>% arrange(self) %>% 
  mutate(qrp = factor(qrp, qrp)) #factor is needed to preserve order

p <- ggplot(descr) +
  geom_segment(
    aes(x=qrp, xend=qrp, y=self, yend=other), 
    color= ifelse(descr$self < descr$other, "grey", "orange"), 
    size= ifelse(descr$self < descr$other, 1, 2)) +
  geom_point(
    aes(x=qrp, y=self),
    color="green", 
    size=4) +
  geom_point(
    aes(x=qrp, y=other),
    color="red", 
    size=4,
    shape=17) +
  theme_minimal() +
  coord_flip() +
  theme(
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12)
    ) +
  ylim(0, 80) +
  xlab("") +
  ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")

p <- p + 
  annotate("text", x=grep("Changing analysis", descr$qrp), 20, 
           label="Underestimation of \n others' engagement", 
           color="orange", size=5 , angle=0, fontface="bold", hjust=0) +
  annotate("text", x=grep("Excluding data", descr$qrp), 50, 
           label="Overestimation of \n others' engagement", 
           color="gray", size=5 , angle=0, fontface="bold", hjust=0)

print(p)

1c. Opinion

What is your opinion of these practices?

qrpopi <- data.frame(var = dict %>% filter(scale == "Opinion QRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Opinion QRP") %>% pull(label))
likert <- data %>% select(all_of(qrpopi$var))
colnames(likert) <- qrpopi$labels
likert(likert)
##                                         Item It should never be used
## 1              Not reporting nonsignificance                      29
## 2  Not reporting nonsignificance (covariate)                      40
## 3                                    HARKing                      59
## 4           Not reporting alternative models                      30
## 5                           Rounding p-value                      62
## 6                             Excluding data                      64
## 7                          Increasing sample                      44
## 8                          Changing analysis                      32
## 9                     Not reporting problems                      83
## 10                             Imputing data                      86
##    It should only be used rarely It should be used often
## 1                             56                    14.7
## 2                             49                     9.7
## 3                             28                     9.9
## 4                             47                    21.1
## 5                             22                    13.8
## 6                             33                     3.5
## 7                             44                    10.7
## 8                             54                    14.7
## 9                             15                     1.8
## 10                            12                     1.1
##    It should be used almost always
## 1                             0.00
## 2                             1.44
## 3                             3.17
## 4                             2.15
## 5                             2.18
## 6                             0.35
## 7                             1.42
## 8                             0.00
## 9                             0.71
## 10                            0.00

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:10], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:10], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:10], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:10], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:10], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:10], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:10], grouping = likertl3$lead_3))

Group by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:10], grouping = likertce$culture))

2. Proposed Reforms to research practices

2a. Awareness

Have you ever heard of these practices?

prrpaware <- data.frame(var = dict %>% filter(scale == "Aware PRRP") %>% pull(variable),
                        labels = dict %>% filter(scale == "Aware PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrpaware$var))
colnames(likert) <- prrpaware$labels
likert(likert)
plot(likert(likert[,1:3]))

##                  Item No Yes
## 1        Posting data 14  86
## 2 Posting instruments 16  84
## 3     Preregistration 23  77

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))

Grouped by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))

2b. Self

Have you engaged in these practices?

prrpself <- data.frame(var = dict %>% filter(scale == "Self PRRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Self PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrpself$var))
colnames(likert) <- prrpself$labels
likert(likert)
plot(likert(likert[,1:3]))

##                  Item Never Once Occasionally Frequently Almost always
## 1        Posting data    63  9.8           20        4.2           2.4
## 2 Posting instruments    45  7.0           34        9.5           4.2
## 3     Preregistration    61 10.0           18        7.6           3.5

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))

Grouped by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))

2c. Other

Percentage of researchers in ethnic minority/cultural psychology who you believe have engaged in these practices?

prrppeer <- data.frame(var = dict %>% filter(scale == "Peer PRRP") %>% pull(variable),
                       labels = dict %>% filter(scale == "Peer PRRP") %>% pull(label))
descr <- data %>% select(all_of(prrppeer$var))
colnames(descr) <- prrppeer$labels
descr(descr, stats = "common", order = "p")

Descriptive Statistics
descr
N: 352

Posting data Posting instruments Preregistration
Mean 25.32 37.84 28.14
Std.Dev 18.29 23.01 21.85
Min 0.00 0.00 0.00
Median 20.00 30.00 25.00
Max 100.00 100.00 100.00
N.Valid 278.00 273.00 281.00
Pct.Valid 78.98 77.56 79.83
# plot
descr$id <- as.character(1:nrow(descr))
descr <- gather(descr, key = "prrp", value = "percent",
                "Posting data":"Preregistration")
p <- descr %>%
  ggplot( aes(x=prrp, y=percent, fill=prrp, color=prrp)) +
    geom_violin(width=1.25, size=0.2) +
    geom_boxplot(width = .15, fill = "white", color = "black") +
    stat_summary(fun.y=mean, geom="label", aes(label=round(..y.., 2), color = "black", size = 13, fill = "white")) +
    scale_fill_viridis(discrete=TRUE) +
    scale_color_viridis(discrete=TRUE) +
    theme_minimal() +
    theme(
      legend.position="none",
      axis.text.y = element_text(size = 12)
    ) +
    coord_flip() + #horizontal version
    xlab("") +
    ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")
print(p)

#lollipop plot comparing with self engagement
descr <- data.frame(self = c(100-52, 100-71, 100-73),
                    other = c(37.84, 28.14, 25.32),
                    prrp = c("Posting instruments", "Preregistration","Posting data"))

descr <- descr %>% arrange(self) %>% 
  mutate(prrp = factor(prrp, prrp)) #factor is needed to preserve order

p <- ggplot(descr) +
  geom_segment(
    aes(x=prrp, xend=prrp, y=self, yend=other), 
    color= ifelse(descr$self < descr$other, "grey", "orange"), 
    size= ifelse(descr$self < descr$other, 1, 2)) +
  geom_point(
    aes(x=prrp, y=self),
    color="green", 
    size=4) +
  geom_point(
    aes(x=prrp, y=other),
    color="red", 
    size=4,
    shape=17) +
  theme_minimal() +
  coord_flip() +
  theme(
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12)
    ) +
  ylim(15, 60) +
  xlab("") +
  ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")

p <- p + 
  annotate("text", x=grep("Preregistration", descr$prrp), 48, 
           label="Underestimation of \n others' engagement", 
           color="orange", size=5 , angle=0, fontface="bold", hjust=0)
print(p)

2d. Opinion

What is your opinion of these practices?

prrpopi <- data.frame(var = dict %>% filter(scale == "Opinion PRRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Opinion PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrpopi$var))
colnames(likert) <- prrpopi$labels
likert(likert)
plot(likert(likert))

##                  Item It should never be used It should only be used rarely
## 1        Posting data                     2.5                            27
## 2 Posting instruments                     1.1                            12
## 3     Preregistration                     2.8                            25
##   It should be used often It should be used almost always
## 1                      58                              13
## 2                      63                              24
## 3                      58                              15

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))

Group by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))

2e. Recent - Did you engage in these practices for your last empirical paper?

prrprecent <- data.frame(var = dict %>% filter(scale == "Recent PRRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Recent PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrprecent$var))
colnames(likert) <- prrprecent$labels
likert(likert)
             Item No Yes

1 Posting data 85 15 2 Posting instruments 73 27 3 Preregistration 83 17

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))

Group by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))

2f. Reasons

For not posting data

whydata <- grep("^prrp1_5_", names(data), value = T)[-1]
descr <- data %>% filter (prrp1_4 == "No") %>% select(all_of(whydata))
colnames(descr) <-  c("Proprietary",
                      "Time-consuming",
                      "Scooping",
                      "Deidentification",
                      "Field not in favor",
                      "Already public",
                      "Mistake",
                      "Unfamiliar",
                      "Planning to",
                      "Others")
descr(descr, stats = "common", order = "p") %>% kable()
Proprietary Time-consuming Scooping Deidentification Field not in favor Already public Mistake Unfamiliar Planning to Others
Mean 0.32 0.21 0.1 0.17 0.05 0.06 0.08 0.21 0.1 0.18
Std.Dev 0.47 0.41 0.3 0.37 0.23 0.24 0.26 0.41 0.3 0.39
Min 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 0.0 0.00
Median 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00 0.0 0.00
Max 1.00 1.00 1.0 1.00 1.00 1.00 1.00 1.00 1.0 1.00
N.Valid 238.00 238.00 238.0 238.00 238.00 238.00 238.00 238.00 238.0 238.00
Pct.Valid 100.00 100.00 100.0 100.00 100.00 100.00 100.00 100.00 100.0 100.00
# plot
descr <- data.frame(
  reasons = c("Proprietary", "Time-consuming", "Scooping", "Deidentification", "Field not in favor",
              "Already public", "Mistake", "Unfamiliar", "Planning to", "Others"),
  percent = c(32, 21, 10, 17, 5, 6, 8, 21, 10, 18)
)

descr %>%
  mutate(reasons = fct_reorder(reasons, percent)) %>%
  ggplot( aes(x=reasons, y=percent)) +
    geom_bar(stat="identity", fill="maroon", alpha=.6, width=.4) +
    coord_flip() +
    ylim(0,45) +
    xlab("") + 
    ggtitle("Reasons for not posting data") +
    theme_bw()

For not posting instruments

whyinstr <- grep("^prrp2_5_", names(data), value = T)[-1]
descr <- data %>% filter (prrp2_4 == "No") %>% select(all_of(whyinstr))
colnames(descr) <-  c("Proprietary",
                      "Time-consuming",
                      "Scooping",
                      "Deidentification",
                      "Field not in favor",
                      "Already public",
                      "Unfamiliar",
                      "Planning to",
                      "Others")
descr(descr, stats = "common", order = "p") %>% kable()
Proprietary Time-consuming Scooping Deidentification Field not in favor Already public Unfamiliar Planning to Others
Mean 0.32 0.17 0.05 0.04 0.08 0.37 0.2 0.07 0.16
Std.Dev 0.47 0.38 0.23 0.19 0.27 0.48 0.4 0.25 0.37
Min 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.00 0.00
Median 0.00 0.00 0.00 0.00 0.00 0.00 0.0 0.00 0.00
Max 1.00 1.00 1.00 1.00 1.00 1.00 1.0 1.00 1.00
N.Valid 206.00 206.00 206.00 206.00 206.00 206.00 206.0 206.00 206.00
Pct.Valid 100.00 100.00 100.00 100.00 100.00 100.00 100.0 100.00 100.00
# plot
descr <- data.frame(
  reasons = c("Proprietary", "Time-consuming", "Scooping", "Deidentification", "Field not in favor",
              "Already public", "Unfamiliar", "Planning to", "Others"),
  percent = c(32, 17, 5, 4, 8, 37, 20, 7, 16)
)

descr %>%
  mutate(reasons = fct_reorder(reasons, percent)) %>%
  ggplot( aes(x=reasons, y=percent)) +
    geom_bar(stat="identity", fill="red", alpha=.6, width=.4) +
    coord_flip() +
    ylim(0,45) +
    xlab("") + 
    ggtitle("Reasons for not posting instruments") +
    theme_bw()

For not preregistering

whyprereg <- grep("^prrp3_5_", names(data), value = T)[-1]
descr <- data %>% filter (prrp3_4 == "No") %>% select(all_of(whyprereg))
colnames(descr) <-  c("Time-consuming",
                      "Scooping",
                      "Field not in favor",
                      "Already public",
                      "Unfamiliar",
                      "Others")
descr(descr, stats = "common", order = "p") %>% kable()
Time-consuming Scooping Field not in favor Already public Unfamiliar Others
Mean 0.21 0.09 0.13 0.05 0.44 0.28
Std.Dev 0.41 0.29 0.34 0.21 0.50 0.45
Min 0.00 0.00 0.00 0.00 0.00 0.00
Median 0.00 0.00 0.00 0.00 0.00 0.00
Max 1.00 1.00 1.00 1.00 1.00 1.00
N.Valid 236.00 236.00 236.00 236.00 236.00 236.00
Pct.Valid 100.00 100.00 100.00 100.00 100.00 100.00
# plot
descr <- data.frame(
  reasons = c("Time-consuming", "Scooping","Field not in favor",
              "Already public", "Unfamiliar", "Others"),
  percent = c(21, 9, 13, 5, 44, 28)
)

descr %>%
  mutate(reasons = fct_reorder(reasons, percent)) %>%
  ggplot( aes(x=reasons, y=percent)) +
    geom_bar(stat="identity", fill="blue", alpha=.6, width=.4) +
    coord_flip() +
    ylim(0,45) +
    xlab("") + 
    ggtitle("Reasons for not preregistering") +
    theme_bw()