install.packages(c('tidyverse', 'MASS', 'car', 'ggplot2'),
repos = list(CRAN="http://cran.rstudio.com/"))
## Installing packages into '/Users/perso204/Library/R/x86_64/4.4/library'
## (as 'lib' is unspecified)
##
## The downloaded binary packages are in
## /var/folders/96/xz1fg2r56mb2kymyn8r37rqc0000gq/T//Rtmp7Iu7iv/downloaded_packages
#Import data
library(readr)
DAV <- read_csv("DAV_data.csv")
## Rows: 386 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): Author, Journal, Study_Number, Sample_Region, Study_Method, Demogr...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Converting missing values to No because missing was automatically no given our coding scheme
DAV$Construct_def[is.na(DAV$Construct_def)] <- "No"
DAV$name_instrument[is.na(DAV$name_instrument)] <- "No"
DAV$Response_options[is.na(DAV$Response_options)] <- "No"
DAV$Quant_reported[is.na(DAV$Quant_reported)] <- "No"
DAV$Mod_reported[is.na(DAV$Mod_reported)] <- "No"
DAV$Justification[is.na(DAV$Justification)] <- "No"
Journal
table(DAV$Journal) #frequencies
##
## JPSP PSPB SPPS
## 31 55 42
prop.table(table(DAV$Journal)) #proportions
##
## JPSP PSPB SPPS
## 0.2421875 0.4296875 0.3281250
Study Number
table(DAV$Study_Number) #frequencies
##
## Only study Study 1 Study 1a Study 1a/1b Study 2
## 22 89 10 1 6
prop.table(table(DAV$Study_Number)) #proportions
##
## Only study Study 1 Study 1a Study 1a/1b Study 2
## 0.1718750 0.6953125 0.0781250 0.0078125 0.0468750
Sample Region
table(DAV$Sample_Region) #frequencies
##
## Australia
## 5
## British Isles
## 1
## China
## 6
## China/Credemo
## 1
## clickworker.de
## 1
## CloudResearch
## 1
## Denmark
## 1
## England/United Kingdom
## 2
## Estonia, World
## 1
## France
## 1
## Gallup World Poll
## 1
## Germany
## 8
## Germany and Israel
## 1
## Israel
## 1
## Japan
## 1
## Lebanon
## 1
## Mixed/international
## 4
## Mixed/international; China/USA
## 2
## mTurk
## 5
## MTurk
## 2
## mTurk; USA
## 2
## mTurk/CloudResearch
## 1
## Netherlands
## 5
## New Zealand
## 2
## Newspaper ads/online ads
## 1
## Norway
## 1
## Online
## 2
## Poland
## 1
## Project Implicit
## 1
## Prolific
## 9
## Prolific; USA
## 1
## Prolific; USA (mixed)
## 1
## Scotland
## 1
## Silicon Valley
## 1
## Singapore
## 1
## Slovakia
## 1
## Smartphone app on Google; international/mixed
## 1
## Spain
## 1
## Students, social network (mixed)
## 1
## Switzerland
## 1
## Twitter
## 1
## U.S
## 1
## Undergraduate students
## 3
## United Kingdom
## 2
## Unknown
## 1
## USA
## 36
## USA and Canada/Prolific; mixed/international
## 1
## USA and Turkey
## 1
## USA and UK
## 1
prop.table(table(DAV$Sample_Region)) #proportions
##
## Australia
## 0.0390625
## British Isles
## 0.0078125
## China
## 0.0468750
## China/Credemo
## 0.0078125
## clickworker.de
## 0.0078125
## CloudResearch
## 0.0078125
## Denmark
## 0.0078125
## England/United Kingdom
## 0.0156250
## Estonia, World
## 0.0078125
## France
## 0.0078125
## Gallup World Poll
## 0.0078125
## Germany
## 0.0625000
## Germany and Israel
## 0.0078125
## Israel
## 0.0078125
## Japan
## 0.0078125
## Lebanon
## 0.0078125
## Mixed/international
## 0.0312500
## Mixed/international; China/USA
## 0.0156250
## mTurk
## 0.0390625
## MTurk
## 0.0156250
## mTurk; USA
## 0.0156250
## mTurk/CloudResearch
## 0.0078125
## Netherlands
## 0.0390625
## New Zealand
## 0.0156250
## Newspaper ads/online ads
## 0.0078125
## Norway
## 0.0078125
## Online
## 0.0156250
## Poland
## 0.0078125
## Project Implicit
## 0.0078125
## Prolific
## 0.0703125
## Prolific; USA
## 0.0078125
## Prolific; USA (mixed)
## 0.0078125
## Scotland
## 0.0078125
## Silicon Valley
## 0.0078125
## Singapore
## 0.0078125
## Slovakia
## 0.0078125
## Smartphone app on Google; international/mixed
## 0.0078125
## Spain
## 0.0078125
## Students, social network (mixed)
## 0.0078125
## Switzerland
## 0.0078125
## Twitter
## 0.0078125
## U.S
## 0.0078125
## Undergraduate students
## 0.0234375
## United Kingdom
## 0.0156250
## Unknown
## 0.0078125
## USA
## 0.2812500
## USA and Canada/Prolific; mixed/international
## 0.0078125
## USA and Turkey
## 0.0078125
## USA and UK
## 0.0078125
Study Method
table(DAV$Study_Method) #frequencies
##
## Mixed-methods Qualitative Quantitative
## 2 2 124
prop.table(table(DAV$Study_Method)) #proportions
##
## Mixed-methods Qualitative Quantitative
## 0.015625 0.015625 0.968750
Recoding values
# Creating a new variable that combines 2 focus questions using OR
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
DAV<- DAV %>%
mutate(Focus.tot = case_when(
DAV$Focus_parDem=="Yes" | DAV$Focus_demCat=="Yes" ~ 1,
DAV$Focus_parDem=="No" & DAV$Focus_demCat=="No" ~ 0
))
# If "No" then 0, else 1
DAV$measure_var<- ifelse(DAV$measure_var=="No", 0, 1)
DAV$Construct_def<- ifelse(DAV$Construct_def=="No", 0, 1)
DAV$name_instrument<- ifelse(DAV$name_instrument=="No", 0, 1)
DAV$Quant_reported<- ifelse(DAV$Quant_reported=="No", 0, 1)
DAV$Mod_reported<- ifelse(DAV$Mod_reported=="No", 0, 1)
DAV$Justification<- ifelse(DAV$Justification=="No", 0, 1)
DAV$Response_options<- ifelse(DAV$Response_options=="No", 0, 1)
DAV$measure_var<- as.numeric(DAV$measure_var)
DAV$Construct_def<- as.numeric(DAV$Construct_def)
DAV$name_instrument<- as.numeric(DAV$name_instrument)
DAV$Quant_reported<- as.numeric(DAV$Quant_reported)
DAV$Mod_reported<- as.numeric(DAV$Mod_reported)
DAV$Response_options<- as.numeric(DAV$Response_options)
DAV$Justification<- as.numeric(DAV$Justification)
# "Best Practice" Score
DAV$Bestprac<- DAV$Construct_def + DAV$name_instrument + DAV$Response_options + DAV$Quant_reported + DAV$Mod_reported + DAV$Justification
DAV.measured <- DAV %>%
dplyr::filter(measure_var == 1) # getting rid of rows where given demographic variable was not measured so that denominator is studies that measured
Was the demographic a focus of the research question?
########## Frequencies for all studies
focus.tab<- table(DAV$Focus.tot, DAV$Demographic) # focus for all studies
focus.tab
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 108 109 125
## 1 22 19 3
focus.tab.measured<- table(DAV.measured$Focus.tot, DAV.measured$Demographic) # focus for studies that measured demographic variable
focus.tab.measured
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 37 101 8
## 1 20 19 2
Did they measure it?
# did they measure the variable?
library(ggplot2)
measure.tab<- table(DAV$measure_var, DAV$Demographic)
measure.tab
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 73 8 118
## 1 57 120 10
measure.tab.df<- as.data.frame(measure.tab)
measure.tab.df$Var2<- as.character(measure.tab.df$Var2)
measure.tab.df$Var2[measure.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (N = 130)"
measure.tab.df$Var2[measure.tab.df$Var2=="Sex/gender"] <- "Sex/gender (N = 128)"
measure.tab.df$Var2[measure.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (N = 128)"
ggplot(measure.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they measure the construct?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 78, size=4, label="73(56%)") + annotate("text", x = 1.8, y = 13, size=4, label="8(6%)") + annotate("text", x = 2.8, y = 122, size=4, label="118(92%)") + theme(plot.title = element_text(hjust=.5))

Did they define the construct?
# Did they define the construct?
construct.tab<- table(DAV.measured$Construct_def, DAV.measured$Demographic)
construct.tab
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 47 111 10
## 1 10 9 0
construct.tab.df<- as.data.frame(construct.tab)
construct.tab.df$Var2<- as.character(construct.tab.df$Var2)
construct.tab.df$Var2[construct.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
construct.tab.df$Var2[construct.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
construct.tab.df$Var2[construct.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"
ggplot(construct.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they define the construct?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 50, size=4, label="47(82%)") + annotate("text", x = 1.8, y = 115, size=4, label="111(93%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they name their instrument?
instrument.tab<- table(DAV.measured$name_instrument, DAV.measured$Demographic)
instrument.tab
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 56 120 10
## 1 1 0 0
instrument.tab.df<- as.data.frame(instrument.tab)
instrument.tab.df$Var2<- as.character(instrument.tab.df$Var2)
instrument.tab.df$Var2[instrument.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
instrument.tab.df$Var2[instrument.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
instrument.tab.df$Var2[instrument.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"
ggplot(instrument.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they name their instrument?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 60, size=4, label="56(98%)") + annotate("text", x = 1.8, y = 125, size=4, label="120(100%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they report the response options?
response.tab<- table(DAV.measured$Response_options, DAV.measured$Demographic)
response.tab.df<- as.data.frame(response.tab)
response.tab
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 52 115 10
## 1 5 5 0
response.tab.df$Var2<- as.character(response.tab.df$Var2)
response.tab.df$Var2[response.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
response.tab.df$Var2[response.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
response.tab.df$Var2[response.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"
ggplot(response.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they report their response options?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 57, size=4, label="52(91%)") + annotate("text", x = 1.8, y = 120, size=4, label="115(96%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they report a quantification?
quant.tab<- table(DAV.measured$Quant_reported, DAV.measured$Demographic)
quant.tab
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 52 104 10
## 1 5 16 0
quant.tab.df<- as.data.frame(quant.tab)
quant.tab.df$Var2<- as.character(quant.tab.df$Var2)
quant.tab.df$Var2[quant.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
quant.tab.df$Var2[quant.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
quant.tab.df$Var2[quant.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"
ggplot(quant.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they report a quantification?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 57, size=4, label="52(91%)") + annotate("text", x = 1.8, y = 108, size=4, label="104(87%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they report a modification?
mod.tab<- table(DAV.measured$Mod_reported, DAV.measured$Demographic)
mod.tab.df<- as.data.frame(mod.tab)
mod.tab
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 57 120 10
mod.tab.df$Var2<- as.character(mod.tab.df$Var2)
mod.tab.df$Var2[mod.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
mod.tab.df$Var2[mod.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
mod.tab.df$Var2[mod.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"
ggplot(mod.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they report a modification?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = 1, y = 60, size=4, label="57(100%)") + annotate("text", x = 2, y = 125, size=4, label="120(100%)") + annotate("text", x = 3, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Did they justify their measure?
just.tab<- table(DAV.measured$Justification, DAV.measured$Demographic)
just.tab.df<- as.data.frame(just.tab)
just.tab
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 55 119 10
## 1 2 1 0
just.tab.df$Var2<- as.character(just.tab.df$Var2)
just.tab.df$Var2[just.tab.df$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
just.tab.df$Var2[just.tab.df$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
just.tab.df$Var2[just.tab.df$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"
ggplot(just.tab.df, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Did they justify their measure?") + theme_classic() + scale_fill_grey(labels=c("No", "Yes")) + annotate("text", x = .8, y = 59, size=4, label="55(96%)") + annotate("text", x = 1.8, y = 125, size=4, label="119(99%)") + annotate("text", x = 2.8, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))

Best Practice Score
group_by(DAV.measured, Demographic) %>%
summarise(
count = n(),
mean = mean(Bestprac, na.rm = TRUE),
sd = sd(Bestprac, na.rm = TRUE),
median = median(Bestprac, na.rm = TRUE),
min = min(Bestprac, na.rm = TRUE),
max = max(Bestprac, na.rm=TRUE)
)
## # A tibble: 3 × 7
## Demographic count mean sd median min max
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Race/ethnicity 57 0.404 0.942 0 0 5
## 2 Sex/gender 120 0.258 0.587 0 0 3
## 3 Sexual orientation/identity 10 0 0 0 0 0
ggplot(DAV.measured, aes(Demographic, Bestprac)) +
geom_boxplot(fill = "lightpink", color = "hotpink") +
labs(x="Demographic", y="Best Practice Score") +
theme_bw() + ylim(0, 6) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

bestprac.tab.2<- table(DAV.measured$Bestprac, DAV.measured$Demographic)
bestprac.tab.2
##
## Race/ethnicity Sex/gender Sexual orientation/identity
## 0 43 97 10
## 1 10 16 0
## 2 2 6 0
## 3 0 1 0
## 4 1 0 0
## 5 1 0 0
bestprac.tab.df.2<- as.data.frame(bestprac.tab.2)
bestprac.tab.df.2$Var2<- as.character(bestprac.tab.df.2$Var2)
bestprac.tab.df.2$Var2[bestprac.tab.df.2$Var2=="Race/ethnicity"] <- "Race/ethnicity (n = 57)"
bestprac.tab.df.2$Var2[bestprac.tab.df.2$Var2=="Sex/gender"] <- "Sex/gender (n = 120)"
bestprac.tab.df.2$Var2[bestprac.tab.df.2$Var2=="Sexual orientation/identity"] <- "Sexual orientation/identity (n = 10)"
ggplot(bestprac.tab.df.2, aes(x=Var2, y=Freq, fill=Var1)) + geom_bar(stat="identity", position="dodge") + labs(x = "Demographic", y = "Frequency (n)", fill = "") + ggtitle("Best Practice Score") + theme_classic() + scale_fill_grey(labels=c(0, 1, 2, 3, 4, 5)) + annotate("text", x = .7, y = 47, size=4, label="43(75%)") + annotate("text", x = 1.7, y = 100, size=4, label="97(81%)") + annotate("text", x = 2.7, y = 15, size=4, label="10(100%)") + theme(plot.title = element_text(hjust=.5))
