install.packages(c('tidyverse', 'MASS', 'car', 'ggplot2'), 
                 repos = list(CRAN="http://cran.rstudio.com/"))

## Installing packages into '/Users/perso204/Library/R/x86_64/4.4/library'
## (as 'lib' is unspecified)

## 
## The downloaded binary packages are in
##  /var/folders/96/xz1fg2r56mb2kymyn8r37rqc0000gq/T//Rtmp7Iu7iv/downloaded_packages

#Import data
library(readr)
DAV <- read_csv("DAV_data.csv")

## Rows: 386 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): Author, Journal, Study_Number, Sample_Region, Study_Method, Demogr...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Converting missing values to No because missing was automatically no given our coding scheme
DAV$Construct_def[is.na(DAV$Construct_def)] <- "No"
DAV$name_instrument[is.na(DAV$name_instrument)] <- "No"
DAV$Response_options[is.na(DAV$Response_options)] <- "No"
DAV$Quant_reported[is.na(DAV$Quant_reported)] <- "No"
DAV$Mod_reported[is.na(DAV$Mod_reported)] <- "No"
DAV$Justification[is.na(DAV$Justification)] <- "No"

Journal

table(DAV$Journal) #frequencies

## 
## JPSP PSPB SPPS 
##   31   55   42

prop.table(table(DAV$Journal)) #proportions

## 
##      JPSP      PSPB      SPPS 
## 0.2421875 0.4296875 0.3281250

Study Number

table(DAV$Study_Number) #frequencies

## 
##  Only study     Study 1    Study 1a Study 1a/1b     Study 2 
##          22          89          10           1           6

prop.table(table(DAV$Study_Number)) #proportions

## 
##  Only study     Study 1    Study 1a Study 1a/1b     Study 2 
##   0.1718750   0.6953125   0.0781250   0.0078125   0.0468750

Sample Region

table(DAV$Sample_Region) #frequencies

## 
##                                     Australia 
##                                             5 
##                                 British Isles 
##                                             1 
##                                         China 
##                                             6 
##                                 China/Credemo 
##                                             1 
##                                clickworker.de 
##                                             1 
##                                 CloudResearch 
##                                             1 
##                                       Denmark 
##                                             1 
##                        England/United Kingdom 
##                                             2 
##                                Estonia, World 
##                                             1 
##                                        France 
##                                             1 
##                             Gallup World Poll 
##                                             1 
##                                       Germany 
##                                             8 
##                            Germany and Israel 
##                                             1 
##                                        Israel 
##                                             1 
##                                         Japan 
##                                             1 
##                                       Lebanon 
##                                             1 
##                           Mixed/international 
##                                             4 
##                Mixed/international; China/USA 
##                                             2 
##                                         mTurk 
##                                             5 
##                                         MTurk 
##                                             2 
##                                    mTurk; USA 
##                                             2 
##                           mTurk/CloudResearch 
##                                             1 
##                                   Netherlands 
##                                             5 
##                                   New Zealand 
##                                             2 
##                      Newspaper ads/online ads 
##                                             1 
##                                        Norway 
##                                             1 
##                                        Online 
##                                             2 
##                                        Poland 
##                                             1 
##                              Project Implicit 
##                                             1 
##                                      Prolific 
##                                             9 
##                                 Prolific; USA 
##                                             1 
##                         Prolific; USA (mixed) 
##                                             1 
##                                      Scotland 
##                                             1 
##                                Silicon Valley 
##                                             1 
##                                     Singapore 
##                                             1 
##                                      Slovakia 
##                                             1 
## Smartphone app on Google; international/mixed 
##                                             1 
##                                         Spain 
##                                             1 
##              Students, social network (mixed) 
##                                             1 
##                                   Switzerland 
##                                             1 
##                                       Twitter 
##                                             1 
##                                           U.S 
##                                             1 
##                        Undergraduate students 
##                                             3 
##                                United Kingdom 
##                                             2 
##                                       Unknown 
##                                             1 
##                                           USA 
##                                            36 
##  USA and Canada/Prolific; mixed/international 
##                                             1 
##                                USA and Turkey 
##                                             1 
##                                    USA and UK 
##                                             1

prop.table(table(DAV$Sample_Region)) #proportions

## 
##                                     Australia 
##                                     0.0390625 
##                                 British Isles 
##                                     0.0078125 
##                                         China 
##                                     0.0468750 
##                                 China/Credemo 
##                                     0.0078125 
##                                clickworker.de 
##                                     0.0078125 
##                                 CloudResearch 
##                                     0.0078125 
##                                       Denmark 
##                                     0.0078125 
##                        England/United Kingdom 
##                                     0.0156250 
##                                Estonia, World 
##                                     0.0078125 
##                                        France 
##                                     0.0078125 
##                             Gallup World Poll 
##                                     0.0078125 
##                                       Germany 
##                                     0.0625000 
##                            Germany and Israel 
##                                     0.0078125 
##                                        Israel 
##                                     0.0078125 
##                                         Japan 
##                                     0.0078125 
##                                       Lebanon 
##                                     0.0078125 
##                           Mixed/international 
##                                     0.0312500 
##                Mixed/international; China/USA 
##                                     0.0156250 
##                                         mTurk 
##                                     0.0390625 
##                                         MTurk 
##                                     0.0156250 
##                                    mTurk; USA 
##                                     0.0156250 
##                           mTurk/CloudResearch 
##                                     0.0078125 
##                                   Netherlands 
##                                     0.0390625 
##                                   New Zealand 
##                                     0.0156250 
##                      Newspaper ads/online ads 
##                                     0.0078125 
##                                        Norway 
##                                     0.0078125 
##                                        Online 
##                                     0.0156250 
##                                        Poland 
##                                     0.0078125 
##                              Project Implicit 
##                                     0.0078125 
##                                      Prolific 
##                                     0.0703125 
##                                 Prolific; USA 
##                                     0.0078125 
##                         Prolific; USA (mixed) 
##                                     0.0078125 
##                                      Scotland 
##                                     0.0078125 
##                                Silicon Valley 
##                                     0.0078125 
##                                     Singapore 
##                                     0.0078125 
##                                      Slovakia 
##                                     0.0078125 
## Smartphone app on Google; international/mixed 
##                                     0.0078125 
##                                         Spain 
##                                     0.0078125 
##              Students, social network (mixed) 
##                                     0.0078125 
##                                   Switzerland 
##                                     0.0078125 
##                                       Twitter 
##                                     0.0078125 
##                                           U.S 
##                                     0.0078125 
##                        Undergraduate students 
##                                     0.0234375 
##                                United Kingdom 
##                                     0.0156250 
##                                       Unknown 
##                                     0.0078125 
##                                           USA 
##                                     0.2812500 
##  USA and Canada/Prolific; mixed/international 
##                                     0.0078125 
##                                USA and Turkey 
##                                     0.0078125 
##                                    USA and UK 
##                                     0.0078125

Study Method

table(DAV$Study_Method) #frequencies

## 
## Mixed-methods   Qualitative  Quantitative 
##             2             2           124

prop.table(table(DAV$Study_Method)) #proportions

## 
## Mixed-methods   Qualitative  Quantitative 
##      0.015625      0.015625      0.968750

Recoding values

# Creating a new variable that combines 2 focus questions using OR 
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

DAV<- DAV %>% 
  mutate(Focus.tot = case_when(
    DAV$Focus_parDem=="Yes" | DAV$Focus_demCat=="Yes" ~ 1,
    DAV$Focus_parDem=="No" & DAV$Focus_demCat=="No" ~ 0
    ))

# If "No" then 0, else 1 
DAV$measure_var<- ifelse(DAV$measure_var=="No", 0, 1)
DAV$Construct_def<- ifelse(DAV$Construct_def=="No", 0, 1)
DAV$name_instrument<- ifelse(DAV$name_instrument=="No", 0, 1)
DAV$Quant_reported<- ifelse(DAV$Quant_reported=="No", 0, 1)
DAV$Mod_reported<- ifelse(DAV$Mod_reported=="No", 0, 1)
DAV$Justification<- ifelse(DAV$Justification=="No", 0, 1)
DAV$Response_options<- ifelse(DAV$Response_options=="No", 0, 1)

DAV$measure_var<- as.numeric(DAV$measure_var)
DAV$Construct_def<- as.numeric(DAV$Construct_def)
DAV$name_instrument<- as.numeric(DAV$name_instrument)
DAV$Quant_reported<- as.numeric(DAV$Quant_reported)
DAV$Mod_reported<- as.numeric(DAV$Mod_reported)
DAV$Response_options<- as.numeric(DAV$Response_options)
DAV$Justification<- as.numeric(DAV$Justification)

# "Best Practice" Score
DAV$Bestprac<- DAV$Construct_def + DAV$name_instrument + DAV$Response_options + DAV$Quant_reported + DAV$Mod_reported + DAV$Justification

DAV.measured <- DAV %>%
  dplyr::filter(measure_var == 1) # getting rid of rows where given demographic variable was not measured so that denominator is studies that measured

Was the demographic a focus of the research question?

########## Frequencies for all studies
focus.tab<- table(DAV$Focus.tot, DAV$Demographic) # focus for all studies
focus.tab

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0            108        109                         125
##   1             22         19                           3

focus.tab.measured<- table(DAV.measured$Focus.tot, DAV.measured$Demographic) # focus for studies that measured demographic variable 
focus.tab.measured

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0             37        101                           8
##   1             20         19                           2

Did they measure it?

# did they measure the variable? 

library(ggplot2)
measure.tab<- table(DAV$measure_var, DAV$Demographic)
measure.tab

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0             73          8                         118
##   1             57        120                          10

measure.tab.df<- as.data.frame(measure.tab)


measure.tab.df$Var2<- as.character(measure.tab.df$Var2)
measure.tab.df$Var2[measure.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (N = 130)"
measure.tab.df$Var2[measure.tab.df$Var2=="Sex/gender"] <- "Sex/gender (N = 128)"
measure.tab.df$Var2[measure.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (N = 128)"


ggplot(measure.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they measure the construct?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 78, size=4, label="73(56%)") + annotate("text", x = 1.8, y = 13, size=4, label="8(6%)") + annotate("text", x = 2.8, y = 122, size=4, label="118(92%)") + theme(plot.title = element_text(hjust=.5))

Did they define the construct?

# Did they define the construct? 
construct.tab<- table(DAV.measured$Construct_def, DAV.measured$Demographic)
construct.tab

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0             47        111                          10
##   1             10          9                           0

construct.tab.df<- as.data.frame(construct.tab)
construct.tab.df$Var2<- as.character(construct.tab.df$Var2)
construct.tab.df$Var2[construct.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
construct.tab.df$Var2[construct.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
construct.tab.df$Var2[construct.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"


ggplot(construct.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they define the construct?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 50, size=4, label="47(82%)") + annotate("text", x = 1.8, y = 115, size=4, label="111(93%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they name their instrument?

instrument.tab<- table(DAV.measured$name_instrument, DAV.measured$Demographic)
instrument.tab

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0             56        120                          10
##   1              1          0                           0

instrument.tab.df<- as.data.frame(instrument.tab)
instrument.tab.df$Var2<- as.character(instrument.tab.df$Var2)
instrument.tab.df$Var2[instrument.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
instrument.tab.df$Var2[instrument.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
instrument.tab.df$Var2[instrument.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"


ggplot(instrument.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they name their instrument?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 60, size=4, label="56(98%)") + annotate("text", x = 1.8, y = 125, size=4, label="120(100%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they report the response options?

response.tab<- table(DAV.measured$Response_options, DAV.measured$Demographic)
response.tab.df<- as.data.frame(response.tab)
response.tab

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0             52        115                          10
##   1              5          5                           0

response.tab.df$Var2<- as.character(response.tab.df$Var2)
response.tab.df$Var2[response.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
response.tab.df$Var2[response.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
response.tab.df$Var2[response.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"


ggplot(response.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they report their response options?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 57, size=4, label="52(91%)") + annotate("text", x = 1.8, y = 120, size=4, label="115(96%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they report a quantification?

quant.tab<- table(DAV.measured$Quant_reported, DAV.measured$Demographic)
quant.tab

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0             52        104                          10
##   1              5         16                           0

quant.tab.df<- as.data.frame(quant.tab)

quant.tab.df$Var2<- as.character(quant.tab.df$Var2)
quant.tab.df$Var2[quant.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
quant.tab.df$Var2[quant.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
quant.tab.df$Var2[quant.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"


ggplot(quant.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they report a quantification?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 57, size=4, label="52(91%)") + annotate("text", x = 1.8, y = 108, size=4, label="104(87%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they report a modification?

mod.tab<- table(DAV.measured$Mod_reported, DAV.measured$Demographic)
mod.tab.df<- as.data.frame(mod.tab)
mod.tab

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0             57        120                          10

mod.tab.df$Var2<- as.character(mod.tab.df$Var2)
mod.tab.df$Var2[mod.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
mod.tab.df$Var2[mod.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
mod.tab.df$Var2[mod.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"


ggplot(mod.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they report a modification?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = 1, y = 60, size=4, label="57(100%)") + annotate("text", x = 2, y = 125, size=4, label="120(100%)") + annotate("text", x = 3, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they justify their measure?

just.tab<- table(DAV.measured$Justification, DAV.measured$Demographic)
just.tab.df<- as.data.frame(just.tab)
just.tab

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0             55        119                          10
##   1              2          1                           0

just.tab.df$Var2<- as.character(just.tab.df$Var2)
just.tab.df$Var2[just.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
just.tab.df$Var2[just.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
just.tab.df$Var2[just.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"


ggplot(just.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they justify their measure?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 59, size=4, label="55(96%)") + annotate("text", x = 1.8, y = 125, size=4, label="119(99%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Best Practice Score

group_by(DAV.measured, Demographic) %>%
  summarise(
    count = n(),
    mean = mean(Bestprac, na.rm = TRUE),
    sd = sd(Bestprac, na.rm = TRUE),
    median = median(Bestprac, na.rm = TRUE),
    min = min(Bestprac, na.rm = TRUE),
    max = max(Bestprac, na.rm=TRUE)
  )

## # A tibble: 3 × 7
##   Demographic                 count  mean    sd median   min   max
##   <chr>                       <int> <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 Race/ethnicity                 57 0.404 0.942      0     0     5
## 2 Sex/gender                    120 0.258 0.587      0     0     3
## 3 Sexual orientation/identity    10 0     0          0     0     0

ggplot(DAV.measured, aes(Demographic, Bestprac)) + 
  geom_boxplot(fill = "lightpink", color = "hotpink") +
  labs(x="Demographic", y="Best Practice Score") +
  theme_bw() + ylim(0, 6) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

bestprac.tab.2<- table(DAV.measured$Bestprac, DAV.measured$Demographic)
bestprac.tab.2

##    
##     Race/ethnicity Sex/gender Sexual orientation/identity
##   0             43         97                          10
##   1             10         16                           0
##   2              2          6                           0
##   3              0          1                           0
##   4              1          0                           0
##   5              1          0                           0

bestprac.tab.df.2<- as.data.frame(bestprac.tab.2)

bestprac.tab.df.2$Var2<- as.character(bestprac.tab.df.2$Var2)
bestprac.tab.df.2$Var2[bestprac.tab.df.2$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
bestprac.tab.df.2$Var2[bestprac.tab.df.2$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
bestprac.tab.df.2$Var2[bestprac.tab.df.2$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"


ggplot(bestprac.tab.df.2, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Best Practice Score") + theme_classic() + scale_fill_grey(labels=c(0, 1, 2, 3, 4, 5)) + annotate("text", x = .7, y = 47, size=4, label="43(75%)") + annotate("text", x = 1.7, y = 100, size=4, label="97(81%)") + annotate("text", x = 2.7, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

DAV Analyses

Journal

Study Number

Sample Region

Study Method

Recoding values

Was the demographic a focus of the research question?

Did they measure it?

Did they define the construct?

Did they name their instrument?

Did they report the response options?

Did they report a quantification?

Did they report a modification?

Did they justify their measure?

Best Practice Score