I. Meta-data

require(rio)
require(summarytools)
require(likert)
require(xtable)
require(knitr)
require(grid)
require(gridExtra)
require(sjPlot)
require(multcomp)
require(tidyverse)
require(viridis)
require(hrbrthemes)
require(forcats)
sessionInfo()

## R version 4.0.4 (2021-02-15)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19041)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] hrbrthemes_0.8.0   viridis_0.5.1      viridisLite_0.3.0  forcats_0.5.1     
##  [5] stringr_1.4.0      dplyr_1.0.4        purrr_0.3.4        readr_1.4.0       
##  [9] tidyr_1.1.2        tibble_3.0.6       tidyverse_1.3.0    multcomp_1.4-16   
## [13] TH.data_1.0-10     MASS_7.3-53        survival_3.2-7     mvtnorm_1.1-1     
## [17] sjPlot_2.8.7       gridExtra_2.3      knitr_1.31         likert_1.3.5      
## [21] xtable_1.8-4       ggplot2_3.3.3      summarytools_0.9.8 rio_0.5.16        
## 
## loaded via a namespace (and not attached):
##  [1] minqa_1.2.4        colorspace_2.0-0   pryr_0.1.4         ellipsis_0.3.1    
##  [5] sjlabelled_1.1.7   estimability_1.3   parameters_0.12.0  base64enc_0.1-3   
##  [9] fs_1.5.0           rstudioapi_0.13    lubridate_1.7.9.2  xml2_1.3.2        
## [13] codetools_0.2-18   splines_4.0.4      extrafont_0.17     mnormt_2.0.2      
## [17] sjmisc_2.8.6       jsonlite_1.7.2     nloptr_1.2.2.2     ggeffects_1.0.1   
## [21] Rttf2pt1_1.3.8     broom_0.7.5        dbplyr_2.1.0       effectsize_0.4.3  
## [25] compiler_4.0.4     httr_1.4.2         sjstats_0.18.1     emmeans_1.5.4     
## [29] backports_1.2.1    assertthat_0.2.1   Matrix_1.3-2       cli_2.3.0         
## [33] htmltools_0.5.1.1  tools_4.0.4        coda_0.19-4        gtable_0.3.0      
## [37] glue_1.4.2         reshape2_1.4.4     Rcpp_1.0.6         cellranger_1.1.0  
## [41] jquerylib_0.1.3    vctrs_0.3.6        nlme_3.1-152       extrafontdb_1.0   
## [45] psych_2.0.12       insight_0.13.0     xfun_0.21          openxlsx_4.2.3    
## [49] lme4_1.1-26        rvest_0.3.6        lifecycle_1.0.0    statmod_1.4.35    
## [53] zoo_1.8-8          scales_1.1.1       hms_1.0.0          parallel_4.0.4    
## [57] sandwich_3.0-0     yaml_2.2.1         curl_4.3           pander_0.6.3      
## [61] gdtools_0.2.3      sass_0.3.1         stringi_1.5.3      bayestestR_0.8.2  
## [65] checkmate_2.0.0    boot_1.3-27        zip_2.1.1          systemfonts_1.0.1 
## [69] rlang_0.4.10       pkgconfig_2.0.3    matrixStats_0.58.0 evaluate_0.14     
## [73] lattice_0.20-41    rapportools_1.0    tidyselect_1.1.0   plyr_1.8.6        
## [77] magrittr_2.0.1     R6_2.5.0           magick_2.6.0       generics_0.1.0    
## [81] DBI_1.1.1          pillar_1.4.7       haven_2.3.1        foreign_0.8-81    
## [85] withr_2.4.1        performance_0.7.0  modelr_0.1.8       crayon_1.4.1      
## [89] tmvnsim_1.0-2      rmarkdown_2.7      readxl_1.3.1       data.table_1.14.0 
## [93] reprex_1.0.0       digest_0.6.27      munsell_0.5.0      bslib_0.2.4       
## [97] tcltk_4.0.4

II. Preprocessing

Data

data <- rio::import(file = "../Data/cleaned.csv")
dict <- rio::import(file = "../Data/Dictionary.xlsx")
data$id <- as.character(data$id)

#data type demographics
data <- data %>% 
  mutate(age = factor(age, levels = c(1:7),
                         labels = c("18-22", "23-29", "30-39", "40-49", "50-59", "60-69", "70+")),
         continent = factor(continent, levels = c(1:6),
                            labels = c("Africa", "Asia", "Autralia/Oceania", "Europe", "North America", "South America")),
         group = factor(group, levels = c(1,2), labels = c("Flat rate", "Raffle")),
         gender_f = factor(gender_f, levels = c(1,2), labels = c("Female", "Male")),
         race_f = factor(race_f, levels = c(1:6),
                         labels = c("Asian", "Black", "Hispanic", "Native American", "Non-Hispanic White", "Mixed")),
         career = factor(career, levels = c(1:7),
                         labels = c("Undergraduate student", "Graduate student", "Post-doctoral fellow", 
                                    "Assistant professor/lecturer", "Associate/full professor", "Research scientist", "Other")))

#data type likert 
likert1 <- dict %>% 
  filter(value_label == "1 = It should never be used, 2 = It should only be used rarely, 3 = It should be used often, 4 = It should be used almost always") %>%
  pull(variable)
factor1 <- function(x){
  factor(x, levels = c(1:4), 
         labels = c("It should never be used", "It should only be used rarely", 
                    "It should be used often", "It should be used almost always"))
}

likert2 <- dict %>% 
  filter(value_label == "1 = Never, 2 = Once or Twice, 3 = Often") %>%
  pull(variable)
factor2 <- function(x){
  factor(x, levels = c(1:3), 
         labels = c("Never", "Once or Twice", "Often"))
}

likert3 <- dict %>% 
  filter(value_label == "1 = Never, 2 = Once, 3 = Occasionally, 4 = Frequently, 5 = Almost always") %>%
  pull(variable)
factor3 <- function(x){
  factor(x, levels = c(1:5), 
         labels = c("Never", "Once", "Occasionally", "Frequently", "Almost always"))
}

likert4 <- dict %>% 
  filter(value_label == "0 = No, 1 = Yes, 2 = I have not published an empirical paper") %>%
  pull(variable)
factor4 <- function(x){
  factor(x, levels = c(0:1), 
         labels = c("No", "Yes"))
}

likert5 <- dict %>% 
  filter(value_label == "0 = No, 1 = Yes") %>%
  pull(variable)
factor5 <- function(x){
  factor(x, levels = c(0:1), 
         labels = c("No", "Yes"))
}

data <- data %>% 
  mutate_at(likert1, factor1) %>% 
  mutate_at(likert2, factor2) %>% 
  mutate_at(likert3, factor3) %>% 
  mutate_at(likert4, factor4) %>% 
  mutate_at(likert5, factor5)

rm(likert1, likert2, likert3, likert4, likert5, 
   factor1, factor2, factor3, factor4, factor5)

#list of ID with complete responses
full <- data %>% 
  filter(Finished == 1) %>% 
  pull(id)

#list of ID with correct attention checks
att <- data %>% 
  filter(attcheck == 1 & experience_19 == 1) %>% 
  pull(id)

#list of ID with complete AND correct attention checks
fullatt <- data %>% 
  filter(id %in% att & id %in% full) %>% 
  pull(id)

There are 352 participants in total, 265 of whom have completed responses and 159 with completed responses who also passed the two attention checks.

III. Demographics

freq(data$age)

Frequencies
data$age
Type: Factor

	Freq	% Valid	% Valid Cum.	% Total	% Total Cum.
18-22	0	0.00	0.00	0.00	0.00
23-29	24	9.16	9.16	6.82	6.82
30-39	107	40.84	50.00	30.40	37.22
40-49	70	26.72	76.72	19.89	57.10
50-59	37	14.12	90.84	10.51	67.61
60-69	19	7.25	98.09	5.40	73.01
70+	5	1.91	100.00	1.42	74.43
	90			25.57	100.00
Total	352	100.00	100.00	100.00	100.00

freq(data$gender_f)

Frequencies
data$gender_f
Type: Factor

	Freq	% Valid	% Valid Cum.	% Total	% Total Cum.
Female	173	68.65	68.65	49.15	49.15
Male	79	31.35	100.00	22.44	71.59
	100			28.41	100.00
Total	352	100.00	100.00	100.00	100.00

freq(data$race_f)

Frequencies
data$race_f
Type: Factor

	Freq	% Valid	% Valid Cum.	% Total	% Total Cum.
Asian	48	19.28	19.28	13.64	13.64
Black	34	13.65	32.93	9.66	23.30
Hispanic	37	14.86	47.79	10.51	33.81
Native American	2	0.80	48.59	0.57	34.38
Non-Hispanic White	110	44.18	92.77	31.25	65.62
Mixed	18	7.23	100.00	5.11	70.74
	103			29.26	100.00
Total	352	100.00	100.00	100.00	100.00

freq(data$continent)

Frequencies
data$continent
Type: Factor

	Freq	% Valid	% Valid Cum.	% Total	% Total Cum.
Africa	1	0.38	0.38	0.28	0.28
Asia	4	1.53	1.91	1.14	1.42
Autralia/Oceania	4	1.53	3.44	1.14	2.56
Europe	7	2.67	6.11	1.99	4.55
North America	245	93.51	99.62	69.60	74.15
South America	1	0.38	100.00	0.28	74.43
	90			25.57	100.00
Total	352	100.00	100.00	100.00	100.00

freq(data$career)

Frequencies
data$career
Type: Factor

	Freq	% Valid	% Valid Cum.	% Total	% Total Cum.
Undergraduate student	0	0.00	0.00	0.00	0.00
Graduate student	26	9.96	9.96	7.39	7.39
Post-doctoral fellow	20	7.66	17.62	5.68	13.07
Assistant professor/lecturer	76	29.12	46.74	21.59	34.66
Associate/full professor	120	45.98	92.72	34.09	68.75
Research scientist	7	2.68	95.40	1.99	70.74
Other	12	4.60	100.00	3.41	74.15
	91			25.85	100.00
Total	352	100.00	100.00	100.00	100.00

freq(data$culture)

Frequencies
data$culture
Type: Factor

	Freq	% Valid	% Valid Cum.	% Total	% Total Cum.
No	34	12.93	12.93	9.66	9.66
Yes	229	87.07	100.00	65.06	74.72
	89			25.28	100.00
Total	352	100.00	100.00	100.00	100.00

descr <- data %>% select(politics, c19stress, agree, consci, extra, neuro, open)
descr(descr, stats = "common", order = "p")

Descriptive Statistics
descr
N: 352

	politics	c19stress	agree	consci	extra	neuro	open
Mean	2.16	6.14	3.88	4.19	3.07	2.39	3.80
Std.Dev	1.05	2.16	0.79	0.82	1.09	0.85	0.74
Min	1.00	1.00	1.50	1.50	1.00	1.00	1.00
Median	2.00	6.00	4.00	4.50	3.00	2.50	4.00
Max	7.00	10.00	5.00	5.00	5.00	4.50	5.00
N.Valid	260.00	263.00	262.00	263.00	263.00	263.00	263.00
Pct.Valid	73.86	74.72	74.43	74.72	74.72	74.72	74.72

rm(descr)

IV. Exploratory analyses

1. Questionable research practices

1a. Self

Have you engaged in these practices?

qrpself <- data.frame(var = dict %>% filter(scale == "Self QRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Self QRP") %>% pull(label))
likert <- data %>% select(all_of(qrpself$var))
colnames(likert) <- qrpself$labels
likert(likert)
plot(likert(likert[,1:10]))

##                                         Item Never Once Occasionally Frequently
## 1              Not reporting nonsignificance    33  8.3         46.4       10.8
## 2  Not reporting nonsignificance (covariate)    56  6.4         32.7        4.6
## 3                                    HARKing    57 11.2         25.9        5.6
## 4           Not reporting alternative models    43  7.5         38.1        9.3
## 5                           Rounding p-value    63  7.8         19.5        6.0
## 6                             Excluding data    74  6.6         17.8        1.0
## 7                          Increasing sample    80  8.0         10.1        2.1
## 8                          Changing analysis    42 10.7         41.3        6.0
## 9                     Not reporting problems    81  9.5          8.5        0.0
## 10                             Imputing data    93  3.2          3.5        0.0
##    Almost always
## 1           1.44
## 2           0.71
## 3           0.70
## 4           2.49
## 5           3.19
## 6           0.00
## 7           0.00
## 8           0.00
## 9           0.70
## 10          0.00

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)

plot(likert(likertf[,1:10], grouping = likertf$gender_f))

data <- data %>% 
  mutate(gender = ifelse(gender_f == "Female", 0, ifelse(gender_f == "Male", 1, NA)))
tab_model(lm(selfQRP ~ gender, data))

	selfQRP
Predictors	Estimates	CI	p
(Intercept)	1.78	1.71 – 1.85	<0.001
gender	-0.06	-0.19 – 0.06	0.324
Observations	242
R² / R² adjusted	0.004 / -0.000

Overall, there does not seem to be a strong gender difference in engagement in questionable research practices. From the graphs of individual practices, we can see that female researchers reported more rounding of p-values whereas male researchers more often did not report problems of the studies.

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:10], grouping = likerta$age))

tab_model(lm(selfQRP ~ as.numeric(age), data))

	selfQRP
Predictors	Estimates	CI	p
(Intercept)	1.72	1.52 – 1.92	<0.001
age	0.01	-0.04 – 0.06	0.684
Observations	250
R² / R² adjusted	0.001 / -0.003

Overall, there is no age difference in engagement in questionable research practices. Although it seems that the 70+ group has slightly different patterns compared to the rest, there are only 5 individuals in this age group.

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:10], grouping = likertc$career))

data$careernum <- as.factor(as.numeric(data$career))
plot(glht(aov(selfQRP ~ careernum, data), linfct = mcp(careernum = "Tukey"), 
          alternative = "two.sided"),
     main = "Career stage")

Career stages:

2: Graduate student (N = 26)
3: Post-doctoral fellow (N = 20)
4: Assistant professor/lecturer (N = 76)
5: Associate/full professor (N = 120)
6: Research scientist (N = 7)
7: Other (N = 12)

Overall, there doesn’t seem to be strong differences between different career stages in engagement in questionable research practices. Although it appears that research scientists and “other” career groups show a different pattern, it’s important to note that there were only 19 participants (7.3%) that identified with these two groups. Graduate students reported less QRP engagement than assistant professors/lecturers and associate/full professors. Nonetheless, there were only 26 graduate students and many more professors/lecturers (196)

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:10], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:10], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:10], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:10], grouping = likertl3$lead_3))

Leadership positions:

Some leadership positions (N = 147) vs. No leadership position (N = 205)
Leader at own institution (N = 75) vs. Not leader at own institution (N = 277)
Leader at journal editorialship (N = 50) vs. Not leader at journal editorialship (N = 302)
Leader at professional societies (N = 91) vs. Not leader at professional societies (N = 261)

Overall there is no clear difference between researchers with and without recent leadership positions in their QRP engagement.

Grouped by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:10], grouping = likertce$culture))

1b. Other

Percentage of researchers in ethnic minority/cultural psychology who you believe have engaged in these practices?

qrppeer <- data.frame(var = dict %>% filter(scale == "Peer QRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Peer QRP") %>% pull(label))
descr <- data %>% select(all_of(qrppeer$var))
colnames(descr) <- qrppeer$labels
descr(descr, stats = "common", order = "p")

Descriptive Statistics
descr
N: 352

Table continues below
	Not reporting nonsignificance	Not reporting nonsignificance (covariate)
Mean	58.08	48.52
Std.Dev	28.56	27.43
Min	0.00	0.00
Median	64.00	50.00
Max	100.00	100.00
N.Valid	274.00	275.00
Pct.Valid	77.84	78.12

Table continues below
	HARKing	Not reporting alternative models	Rounding p-value	Excluding data
Mean	45.64	52.29	45.56	35.53
Std.Dev	28.17	28.51	30.25	25.35
Min	0.00	0.00	0.00	0.00
Median	50.00	54.00	41.50	30.00
Max	100.00	100.00	100.00	100.00
N.Valid	279.00	275.00	270.00	280.00
Pct.Valid	79.26	78.12	76.70	79.55

	Increasing sample	Changing analysis	Not reporting problems	Imputing data
Mean	35.11	49.73	33.55	20.48
Std.Dev	25.36	27.92	24.85	20.49
Min	0.00	0.00	0.00	0.00
Median	30.00	51.00	29.00	11.00
Max	100.00	100.00	100.00	100.00
N.Valid	278.00	278.00	275.00	280.00
Pct.Valid	78.98	78.98	78.12	79.55

# plot
descr$id <- as.character(1:nrow(descr))
descr <- gather(descr, key = "qrp", value = "percent",
                "Not reporting nonsignificance":"Imputing data")
p <- descr %>%
  ggplot( aes(x=qrp, y=percent, fill=qrp, color=qrp)) +
    geom_violin(width=1.25, size=0.2) +
    geom_boxplot(width = .15, fill = "white", color = "black") +
    stat_summary(fun.y=mean, geom="label", aes(label=round(..y.., 2), color = "black", size = 13, fill = "white")) +
    scale_fill_viridis(discrete=TRUE) +
    scale_color_viridis(discrete=TRUE) +
    theme_minimal() +
    theme(
      legend.position="none",
      axis.text.y = element_text(size = 12)
    ) +
    coord_flip() + #horizontal version
    xlab("") +
    ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")
print(p)

#lollipop plot comparing with self engagement
descr <- data.frame(self = c(100-33, 100-56, 100-57, 100-43, 100-63, 100-74, 100-80, 100-42, 100-81, 100-93),
                    other = c(58.08, 48.52, 45.64, 52.29, 45.56, 35.53, 35.11, 49.73, 33.55, 20.48),
                    qrp = c("Not reporting nonsignificance", "Not reporting nonsignificance (covariate)",
                            "HARKing", "Not reporting alternative models", "Rounding p-value", 
                            "Excluding data", "Increasing sample", "Changing analysis",
                            "Not reporting problems", "Imputing data"))

descr <- descr %>% arrange(self) %>% 
  mutate(qrp = factor(qrp, qrp)) #factor is needed to preserve order

p <- ggplot(descr) +
  geom_segment(
    aes(x=qrp, xend=qrp, y=self, yend=other), 
    color= ifelse(descr$self < descr$other, "grey", "orange"), 
    size= ifelse(descr$self < descr$other, 1, 2)) +
  geom_point(
    aes(x=qrp, y=self),
    color="green", 
    size=4) +
  geom_point(
    aes(x=qrp, y=other),
    color="red", 
    size=4,
    shape=17) +
  theme_minimal() +
  coord_flip() +
  theme(
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12)
    ) +
  ylim(0, 80) +
  xlab("") +
  ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")

p <- p + 
  annotate("text", x=grep("Changing analysis", descr$qrp), 20, 
           label="Underestimation of \n others' engagement", 
           color="orange", size=5 , angle=0, fontface="bold", hjust=0) +
  annotate("text", x=grep("Excluding data", descr$qrp), 50, 
           label="Overestimation of \n others' engagement", 
           color="gray", size=5 , angle=0, fontface="bold", hjust=0)

print(p)

1c. Opinion

What is your opinion of these practices?

qrpopi <- data.frame(var = dict %>% filter(scale == "Opinion QRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Opinion QRP") %>% pull(label))
likert <- data %>% select(all_of(qrpopi$var))
colnames(likert) <- qrpopi$labels
likert(likert)

##                                         Item It should never be used
## 1              Not reporting nonsignificance                      29
## 2  Not reporting nonsignificance (covariate)                      40
## 3                                    HARKing                      59
## 4           Not reporting alternative models                      30
## 5                           Rounding p-value                      62
## 6                             Excluding data                      64
## 7                          Increasing sample                      44
## 8                          Changing analysis                      32
## 9                     Not reporting problems                      83
## 10                             Imputing data                      86
##    It should only be used rarely It should be used often
## 1                             56                    14.7
## 2                             49                     9.7
## 3                             28                     9.9
## 4                             47                    21.1
## 5                             22                    13.8
## 6                             33                     3.5
## 7                             44                    10.7
## 8                             54                    14.7
## 9                             15                     1.8
## 10                            12                     1.1
##    It should be used almost always
## 1                             0.00
## 2                             1.44
## 3                             3.17
## 4                             2.15
## 5                             2.18
## 6                             0.35
## 7                             1.42
## 8                             0.00
## 9                             0.71
## 10                            0.00

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:10], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:10], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:10], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:10], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:10], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:10], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:10], grouping = likertl3$lead_3))

Group by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:10], grouping = likertce$culture))

2. Proposed Reforms to research practices

2a. Awareness

Have you ever heard of these practices?

prrpaware <- data.frame(var = dict %>% filter(scale == "Aware PRRP") %>% pull(variable),
                        labels = dict %>% filter(scale == "Aware PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrpaware$var))
colnames(likert) <- prrpaware$labels
likert(likert)
plot(likert(likert[,1:3]))

##                  Item No Yes
## 1        Posting data 14  86
## 2 Posting instruments 16  84
## 3     Preregistration 23  77

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))

Grouped by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))

2b. Self

Have you engaged in these practices?

prrpself <- data.frame(var = dict %>% filter(scale == "Self PRRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Self PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrpself$var))
colnames(likert) <- prrpself$labels
likert(likert)
plot(likert(likert[,1:3]))

##                  Item Never Once Occasionally Frequently Almost always
## 1        Posting data    63  9.8           20        4.2           2.4
## 2 Posting instruments    45  7.0           34        9.5           4.2
## 3     Preregistration    61 10.0           18        7.6           3.5

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))

Grouped by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))

2c. Other

Percentage of researchers in ethnic minority/cultural psychology who you believe have engaged in these practices?

prrppeer <- data.frame(var = dict %>% filter(scale == "Peer PRRP") %>% pull(variable),
                       labels = dict %>% filter(scale == "Peer PRRP") %>% pull(label))
descr <- data %>% select(all_of(prrppeer$var))
colnames(descr) <- prrppeer$labels
descr(descr, stats = "common", order = "p")

Descriptive Statistics
descr
N: 352

	Posting data	Posting instruments	Preregistration
Mean	25.32	37.84	28.14
Std.Dev	18.29	23.01	21.85
Min	0.00	0.00	0.00
Median	20.00	30.00	25.00
Max	100.00	100.00	100.00
N.Valid	278.00	273.00	281.00
Pct.Valid	78.98	77.56	79.83

# plot
descr$id <- as.character(1:nrow(descr))
descr <- gather(descr, key = "prrp", value = "percent",
                "Posting data":"Preregistration")
p <- descr %>%
  ggplot( aes(x=prrp, y=percent, fill=prrp, color=prrp)) +
    geom_violin(width=1.25, size=0.2) +
    geom_boxplot(width = .15, fill = "white", color = "black") +
    stat_summary(fun.y=mean, geom="label", aes(label=round(..y.., 2), color = "black", size = 13, fill = "white")) +
    scale_fill_viridis(discrete=TRUE) +
    scale_color_viridis(discrete=TRUE) +
    theme_minimal() +
    theme(
      legend.position="none",
      axis.text.y = element_text(size = 12)
    ) +
    coord_flip() + #horizontal version
    xlab("") +
    ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")
print(p)

#lollipop plot comparing with self engagement
descr <- data.frame(self = c(100-52, 100-71, 100-73),
                    other = c(37.84, 28.14, 25.32),
                    prrp = c("Posting instruments", "Preregistration","Posting data"))

descr <- descr %>% arrange(self) %>% 
  mutate(prrp = factor(prrp, prrp)) #factor is needed to preserve order

p <- ggplot(descr) +
  geom_segment(
    aes(x=prrp, xend=prrp, y=self, yend=other), 
    color= ifelse(descr$self < descr$other, "grey", "orange"), 
    size= ifelse(descr$self < descr$other, 1, 2)) +
  geom_point(
    aes(x=prrp, y=self),
    color="green", 
    size=4) +
  geom_point(
    aes(x=prrp, y=other),
    color="red", 
    size=4,
    shape=17) +
  theme_minimal() +
  coord_flip() +
  theme(
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12)
    ) +
  ylim(15, 60) +
  xlab("") +
  ylab("Percentage of researchers in ethnic minority/cultural psychology you believe have engaged in this")

p <- p + 
  annotate("text", x=grep("Preregistration", descr$prrp), 48, 
           label="Underestimation of \n others' engagement", 
           color="orange", size=5 , angle=0, fontface="bold", hjust=0)
print(p)

2d. Opinion

What is your opinion of these practices?

prrpopi <- data.frame(var = dict %>% filter(scale == "Opinion PRRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Opinion PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrpopi$var))
colnames(likert) <- prrpopi$labels
likert(likert)
plot(likert(likert))

##                  Item It should never be used It should only be used rarely
## 1        Posting data                     2.5                            27
## 2 Posting instruments                     1.1                            12
## 3     Preregistration                     2.8                            25
##   It should be used often It should be used almost always
## 1                      58                              13
## 2                      63                              24
## 3                      58                              15

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))

Group by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))

2e. Recent - Did you engage in these practices for your last empirical paper?

prrprecent <- data.frame(var = dict %>% filter(scale == "Recent PRRP") %>% pull(variable),
                      labels = dict %>% filter(scale == "Recent PRRP") %>% pull(label))
likert <- data %>% select(all_of(prrprecent$var))
colnames(likert) <- prrprecent$labels
likert(likert)

             Item No Yes

1 Posting data 85 15 2 Posting instruments 73 27 3 Preregistration 83 17

Grouped by gender

likert$gender_f <- data$gender_f
likertf <- likert %>% filter(is.na(gender_f) == F)
plot(likert(likertf[,1:3], grouping = likertf$gender_f))

Grouped by age

likert$age <- data$age
likerta <- likert %>% filter(is.na(age) == F)
plot(likert(likerta[,1:3], grouping = likerta$age))

Grouped by career stage

likert$career <- data$career
likertc <- likert %>% filter(is.na(career) == F)
plot(likert(likertc[,1:3], grouping = likertc$career))

Grouped by leadership position

likert <- likert %>% 
  mutate(lead_1 = data$lead_1,
         lead_2 = data$lead_2,
         lead_3 = data$lead_3)

likertl4 <- likert %>% filter(is.na(lead_1) == F &
                                is.na(lead_2) == F &
                                is.na(lead_3) == F)

likertl4 <- likertl4 %>%
  mutate(lead_any = ifelse(lead_1 == 0 & lead_2 == 0 & lead_3 == 0,
                           0, 1))

likertl4$lead_any <- factor(likertl4$lead_any, levels = c(0,1), labels = c("No leadership position",
                                                                           "Some leadership positions"))
plot(likert(likertl4[,1:3], grouping = likertl4$lead_any))

likert$lead_1 <- factor(data$lead_1, levels = c(0,1), labels = c("Not leader at own institution", "Leader at own institution"))
likertl1 <- likert %>% filter(is.na(lead_1) == F)
plot(likert(likertl1[,1:3], grouping = likertl1$lead_1))

likert$lead_2 <- factor(data$lead_2, levels = c(0,1), labels = c("Not leader at journal editorialship", "Leader at journal editorialship"))
likertl2 <- likert %>% filter(is.na(lead_2) == F)
plot(likert(likertl2[,1:3], grouping = likertl2$lead_2))

likert$lead_3 <- factor(data$lead_3, levels = c(0,1), labels = c("Not leader at professional societies", "Leader at professional societies"))
likertl3 <- likert %>% filter(is.na(lead_3) == F)
plot(likert(likertl3[,1:3], grouping = likertl3$lead_3))

Group by identification with subfield of cultural/ethnic minority psychology

likert$culture <- data$culture
likertce <- likert %>% filter(is.na(culture) == F)
plot(likert(likertce[,1:3], grouping = likertce$culture))

2f. Reasons

For not posting data

whydata <- grep("^prrp1_5_", names(data), value = T)[-1]
descr <- data %>% filter (prrp1_4 == "No") %>% select(all_of(whydata))
colnames(descr) <-  c("Proprietary",
                      "Time-consuming",
                      "Scooping",
                      "Deidentification",
                      "Field not in favor",
                      "Already public",
                      "Mistake",
                      "Unfamiliar",
                      "Planning to",
                      "Others")
descr(descr, stats = "common", order = "p") %>% kable()

	Proprietary	Time-consuming	Scooping	Deidentification	Field not in favor	Already public	Mistake	Unfamiliar	Planning to	Others
Mean	0.32	0.21	0.1	0.17	0.05	0.06	0.08	0.21	0.1	0.18
Std.Dev	0.47	0.41	0.3	0.37	0.23	0.24	0.26	0.41	0.3	0.39
Min	0.00	0.00	0.0	0.00	0.00	0.00	0.00	0.00	0.0	0.00
Median	0.00	0.00	0.0	0.00	0.00	0.00	0.00	0.00	0.0	0.00
Max	1.00	1.00	1.0	1.00	1.00	1.00	1.00	1.00	1.0	1.00
N.Valid	238.00	238.00	238.0	238.00	238.00	238.00	238.00	238.00	238.0	238.00
Pct.Valid	100.00	100.00	100.0	100.00	100.00	100.00	100.00	100.00	100.0	100.00

# plot
descr <- data.frame(
  reasons = c("Proprietary", "Time-consuming", "Scooping", "Deidentification", "Field not in favor",
              "Already public", "Mistake", "Unfamiliar", "Planning to", "Others"),
  percent = c(32, 21, 10, 17, 5, 6, 8, 21, 10, 18)
)

descr %>%
  mutate(reasons = fct_reorder(reasons, percent)) %>%
  ggplot( aes(x=reasons, y=percent)) +
    geom_bar(stat="identity", fill="maroon", alpha=.6, width=.4) +
    coord_flip() +
    ylim(0,45) +
    xlab("") + 
    ggtitle("Reasons for not posting data") +
    theme_bw()

For not posting instruments

whyinstr <- grep("^prrp2_5_", names(data), value = T)[-1]
descr <- data %>% filter (prrp2_4 == "No") %>% select(all_of(whyinstr))
colnames(descr) <-  c("Proprietary",
                      "Time-consuming",
                      "Scooping",
                      "Deidentification",
                      "Field not in favor",
                      "Already public",
                      "Unfamiliar",
                      "Planning to",
                      "Others")
descr(descr, stats = "common", order = "p") %>% kable()

	Proprietary	Time-consuming	Scooping	Deidentification	Field not in favor	Already public	Unfamiliar	Planning to	Others
Mean	0.32	0.17	0.05	0.04	0.08	0.37	0.2	0.07	0.16
Std.Dev	0.47	0.38	0.23	0.19	0.27	0.48	0.4	0.25	0.37
Min	0.00	0.00	0.00	0.00	0.00	0.00	0.0	0.00	0.00
Median	0.00	0.00	0.00	0.00	0.00	0.00	0.0	0.00	0.00
Max	1.00	1.00	1.00	1.00	1.00	1.00	1.0	1.00	1.00
N.Valid	206.00	206.00	206.00	206.00	206.00	206.00	206.0	206.00	206.00
Pct.Valid	100.00	100.00	100.00	100.00	100.00	100.00	100.0	100.00	100.00

# plot
descr <- data.frame(
  reasons = c("Proprietary", "Time-consuming", "Scooping", "Deidentification", "Field not in favor",
              "Already public", "Unfamiliar", "Planning to", "Others"),
  percent = c(32, 17, 5, 4, 8, 37, 20, 7, 16)
)

descr %>%
  mutate(reasons = fct_reorder(reasons, percent)) %>%
  ggplot( aes(x=reasons, y=percent)) +
    geom_bar(stat="identity", fill="red", alpha=.6, width=.4) +
    coord_flip() +
    ylim(0,45) +
    xlab("") + 
    ggtitle("Reasons for not posting instruments") +
    theme_bw()

For not preregistering

whyprereg <- grep("^prrp3_5_", names(data), value = T)[-1]
descr <- data %>% filter (prrp3_4 == "No") %>% select(all_of(whyprereg))
colnames(descr) <-  c("Time-consuming",
                      "Scooping",
                      "Field not in favor",
                      "Already public",
                      "Unfamiliar",
                      "Others")
descr(descr, stats = "common", order = "p") %>% kable()

	Time-consuming	Scooping	Field not in favor	Already public	Unfamiliar	Others
Mean	0.21	0.09	0.13	0.05	0.44	0.28
Std.Dev	0.41	0.29	0.34	0.21	0.50	0.45
Min	0.00	0.00	0.00	0.00	0.00	0.00
Median	0.00	0.00	0.00	0.00	0.00	0.00
Max	1.00	1.00	1.00	1.00	1.00	1.00
N.Valid	236.00	236.00	236.00	236.00	236.00	236.00
Pct.Valid	100.00	100.00	100.00	100.00	100.00	100.00

# plot
descr <- data.frame(
  reasons = c("Time-consuming", "Scooping","Field not in favor",
              "Already public", "Unfamiliar", "Others"),
  percent = c(21, 9, 13, 5, 44, 28)
)

descr %>%
  mutate(reasons = fct_reorder(reasons, percent)) %>%
  ggplot( aes(x=reasons, y=percent)) +
    geom_bar(stat="identity", fill="blue", alpha=.6, width=.4) +
    coord_flip() +
    ylim(0,45) +
    xlab("") + 
    ggtitle("Reasons for not preregistering") +
    theme_bw()

OS Survey - Quant Results

Linh Nguyen

2021-06-21

I. Meta-data

II. Preprocessing

Data

III. Demographics

IV. Exploratory analyses

1. Questionable research practices

1a. Self

Grouped by gender

Grouped by age

Grouped by career stage

Grouped by leadership position

Grouped by identification with subfield of cultural/ethnic minority psychology

1b. Other

1c. Opinion

Grouped by gender

Grouped by age

Grouped by career stage

Grouped by leadership position

Group by identification with subfield of cultural/ethnic minority psychology

2. Proposed Reforms to research practices

2a. Awareness

Grouped by gender

Grouped by age

Grouped by career stage

Grouped by leadership position

Grouped by identification with subfield of cultural/ethnic minority psychology

2b. Self

Grouped by gender

Grouped by age

Grouped by career stage

Grouped by leadership position

Grouped by identification with subfield of cultural/ethnic minority psychology

2c. Other

2d. Opinion

Grouped by gender

Grouped by age

Grouped by career stage

Grouped by leadership position

Group by identification with subfield of cultural/ethnic minority psychology

2e. Recent - Did you engage in these practices for your last empirical paper?

Grouped by gender

Grouped by age

Grouped by career stage

Grouped by leadership position

Group by identification with subfield of cultural/ethnic minority psychology

2f. Reasons

For not posting data

For not posting instruments

For not preregistering