library (tidyverse)
library (ggeasy) #allows for easy formatting of plots e.g. remove legends
library (ggbeeswarm) #allows for Categorical Scatter (Violin Point) Plots using gerom_beeswarm
library (patchwork) #allows for combining all plots into one image
expone <- "Study 8 data.csv" %>%
read_csv() %>%
rename(
recall_score = SC0, #nicer name for recall score
condition = FL_10_DO #nicer name for condition
)
# Remove first 2 rows of data as they do not include data
expone <- expone %>%
slice(-1:-2)
# List all duplicate IDs
expone$Prolific_PID[duplicated(expone$Prolific_PID)] #59 duplicates found
## [1] "5cd1836da6f34300017e240c" "5c76f2d92819ab0015b94b4c"
## [3] "5c72de7b96a9600001870966" "5c3d48955fd1050001a99364"
## [5] "5eb18525b95d6127da6815ee" "5eaac092d8a78e0172680a0e"
## [7] "5edff6e7c53e0f33aed588ce" "5d6a55cb78fce300014e078e"
## [9] "5e8b765e88065404c4d50fe2" "5ed22fc8bc5d7c01191e3f78"
## [11] "5ea4397b9b06662614d6e4f6" "5d371c527a04ed00018ac1c8"
## [13] "5e9c29bb77f86e13d6435f3d" "5ea3ef637e4bd537fe45cf6c"
## [15] "5ebb0f6aff8da30d1f29dc29" "5cd59eea8618af0001bdab4b"
## [17] "5eac3189c4a262061aed16d5" "5e3842b92fdfd2000fc286a4"
## [19] "5ec504eab4fbc3423f29cd97" "5e332f72c43078000cda48dd"
## [21] "5ea5e76a147a4909b3ff0482" "5c28b31a0091e40001ca5030"
## [23] "5eb089604b5931137dd66a19" "5ed0d82c81c0bd1bccc8ceea"
## [25] "5ed94fdd9a2ae04cf21bfa48" "5be1cc598c6a19000137a503"
## [27] "5cd813aca9a8c4001963a0aa" "5e925105a981b55934a34813"
## [29] "5eb30ae9bb223e0e176edecd" "5ec637f5e0bca2000ae6f211"
## [31] "5eb179d6d6fcb726f9275f97" "5dcbd8542bdeaa8740d52630"
## [33] "5b570f4cc146600001b82d8d" "5ed793104268812282fdc90d"
## [35] "5edf50e72ef80a1fe0267aeb" "5d7f598628843a00181eb444"
## [37] "5ec2ec9fdaef0d11e3109f74" "5d1290103b20b0000102e8e3"
## [39] "57eee744e62704000199d5ec" "5e6a839e2fd3b003a0f7f248"
## [41] "5e7365f19674532b961c2bb6" "5eb94e5731298c01178531c0"
## [43] "5e9db2995b38950c6f669b55" "584bb2b8bd873800015531da"
## [45] "5eac351c63858608351866b4" "5ec0260375bf15077a00e645"
## [47] "5cc3289b9e21e200015f0bb3" "5eb16a0602af57258fd3f8e1"
## [49] "5ed7578ef05e671db283844e" "5ebc1ae1ea22c801479541ab"
## [51] "5edfd94cb54d22309545cf06" "559ab96cfdf99b219a612bcf"
## [53] "5eac35df3043c62536d14f14" "5e9ff2a0cf50621a9b17c94f"
## [55] "5e5d7f349238db09c60bcbab" "5ea611dda778214a5e89fbf2"
## [57] "5c62d8e2a34174000187a003" "5ed6a937eb466b1029493c39"
## [59] "5ebfabc7676c2502837188cf"
# Removing second attempts for 59 duplicate IDs
expone <- expone[!duplicated(expone$Prolific_PID), ]
# We end up with n = 312, which corresponds to paper's total n
# Exclusion Criteria
# Apply pre-registered exclusion criteria - if they did not complete the task, declared they did not respond seriously, failed an attention check by recalling <4 headlines
exponefinal <- expone %>%
filter(
Consent == "1", #filter to include those who consented
Finished == "1", #filter to include those who Finished
Serious_check == "1", #filter to include those who answered they passed Serious Check
recall_score >= "4", #filter to include recalls core 4 and above only
)
exponefinaldata <- subset(
exponefinal, select = c (Finished, `Duration (in seconds)`, Gender, Age, Serious_check, recall_score, condition, contradiction_1:advancement)
)
# Count final participant n in Exp 1
exponefinaldata %>%
count(Serious_check)
## # A tibble: 1 x 2
## Serious_check n
## <chr> <int>
## 1 1 294
First, I decided to make the data read clearer by using the mutate() to rename “1” to male and “2” to female under the Gender variable. Then, using count() allows us to count total n for male and female.
exponefinaldata <- mutate(
exponefinaldata,
Gender = ifelse(exponefinaldata$Gender==1, "male", "female")
)
# Count males and females
exponefinaldata %>%
count(
Gender
)
## # A tibble: 2 x 2
## Gender n
## <chr> <int>
## 1 female 168
## 2 male 126
Then, we use as.numeric() to change the Age variable for Chr to Numeric so that we can calculate the descriptive statistics for Age variable. This is where I implemented the suggestion by Jenny, using summarise() to get mean, SD, min and max in a single line of code. I realised that this was produced in a tibble and did not have the appropriate decimal places as in the paper, so I printed it as a data frame so we could look at the decimal places and match it with the paper’s figures accordingly.
# Other descriptive statistics
exponefinaldata$Age <- as.numeric(exponefinal$Age) #change from Character to Numeric
exponefinaldata %>%
summarise(
Mean = mean(Age),
SD = sd(Age),
Min = min(Age),
Max = max(Age)
) %>%
as.data.frame() #display as data frame as tibble automatically rounds up and we would like to print decial places
## Mean SD Min Max
## 1 34.29252 12.96633 18 69
The authors have the means, 95% CI crossbars, and swarm plot layered onto their violin plots. So we attempt to add these using the help of ggplot2’s geom_plot and ggbesswarm’s geom_beeswarm.
#Contradiction Plot
# Change from Chr to Numberic for all contradiction variables
exponefinaldata$contradiction_1 <- as.numeric(exponefinaldata$contradiction_1)
exponefinaldata$contradiction_2 <- as.numeric(exponefinaldata$contradiction_2)
exponefinaldata$contradiction_3 <- as.numeric(exponefinaldata$contradiction_3)
exponefinaldata$contradiction_4 <- as.numeric(exponefinaldata$contradiction_4)
exponefinaldata$contradiction_5 <- as.numeric(exponefinaldata$contradiction_5)
exponefinaldata$contradiction_6 <- as.numeric(exponefinaldata$contradiction_6)
# Change from Chr to Factor to allow grouping in the plot
exponefinaldata$condition <- as.factor(exponefinaldata$condition)
#Sum contradiction variables into a new variable 'Contradiction'
exponefinaldata <- exponefinaldata %>%
rowwise() %>%
mutate(
contradiction = sum(contradiction_1, contradiction_2, contradiction_3, contradiction_4, contradiction_5, contradiction_6)
)
#Replicate Condition column so that we can separate it into 4 variables
exponefinaldata <- exponefinaldata %>%
mutate(
condition2 = condition
)
#Separate the data into 4 columns
exponefinaldata <- separate(data = exponefinaldata, col = condition2, into = c("block", "number", "format", "conflict"))
#Convert format and conflict to Factors
exponefinaldata$format <- as.factor(exponefinaldata$format)
exponefinaldata$conflict <- as.factor(exponefinaldata$conflict)
#Rename from Conflict/Consistent to Conf./Non-Conf.
levels(exponefinaldata$conflict)[levels(exponefinaldata$conflict)=="Conflict"] <- "Conf."
levels(exponefinaldata$conflict)[levels(exponefinaldata$conflict)=="Consistent"] <- "Non-Conf."
#Plot Contradiction
contradiction <- ggplot(
data = exponefinaldata,
aes(
x = conflict,
y = contradiction,
fill = conflict
)
) +
geom_violin() +
ggtitle(
label = "Contradiction"
) +
theme(
plot.title = element_text(hjust = 0.5) #center the plot title
) +
scale_x_discrete(
name = NULL
) +
scale_y_continuous(
name = "Perceived Contradiction",
limits = c(0,30)
) +
facet_wrap(
vars(format),
strip.position = "bottom"
) +
stat_summary( #adding crossbars to indicate mean and 95% confidence intervals
fun.data = "mean_cl_normal", #from the Hmisc package, mean_cl_normal function used to calculate and display 95% CIs
geom = "crossbar", #specifying we want crossbars
fill = "white", #changing crossbar fill colour
alpha = .7 #changes transparency of fill to 70%
) +
easy_remove_legend() +
geom_beeswarm(
cex = 0.2 #add a bee swarm plot (one-dimensional scatter plot) to show all data points, cex specified width
) +
scale_fill_manual(
values = c("slategray2", "lightpink1") #change violin plot fill colours
)
plot(contradiction)
#Plot Confusion
exponefinaldata$confusion <- as.numeric(exponefinaldata$confusion)
confusion <- ggplot(
data = exponefinaldata,
aes(
x = conflict,
y = confusion,
fill = conflict
)
) +
geom_violin() +
ggtitle(
label = "Confusion"
) +
theme(
plot.title = element_text(hjust = 0.5) #center the plot title
) +
scale_x_discrete(
name = NULL
) +
scale_y_continuous(
name = "Perceived Confusion"
) +
facet_wrap(
vars(format),
strip.position = "bottom"
) +
stat_summary( #adding crossbars to indicate mean and 95% confidence intervals
fun.data = "mean_cl_normal", #from the Hmisc package, mean_cl_normal function used to calculate and display 95% CIs
geom = "crossbar", #specifying we want crossbars
fill = "white", #changing crossbar fill colour
alpha = .7 #changes transparency of fill to 70%
) +
easy_remove_legend() +
geom_beeswarm(
cex = 0.2 #add a bee swarm plot (one-dimensional scatter plot) to show all data points, cex specified width
) +
scale_fill_manual(
values = c("slategray2", "lightpink1") #change violin plot fill colours
)
plot(confusion)
#Plot Advancement
exponefinaldata$advancement <- as.numeric(exponefinaldata$advancement)
advancement <- ggplot(
data = exponefinaldata,
aes(
x = conflict,
y = advancement,
fill = conflict
)
) +
geom_violin() +
ggtitle(
label = "Advancement" #plot title
) +
theme(
plot.title = element_text(hjust = 0.5) #center the plot title
) +
scale_x_discrete(
name = NULL #remove label for x-axis
) +
scale_y_continuous(
name = "Perceived Scientific Advancement" #Label for y-axis
) +
facet_wrap(
vars(format), #facet wrap Genetic/Qualified
strip.position = "bottom"
) +
stat_summary( #adding crossbars to indicate mean and 95% confidence intervals
fun.data = "mean_cl_normal", #from the Hmisc package, mean_cl_normal function used to calculate and display 95% CIs
geom = "crossbar", #specifying we want crossbars
fill = "white", #changing crossbar fill colour
alpha = .7 #changes transparency of fill to 70%
) +
easy_remove_legend()+
geom_beeswarm(
cex = 0.2 #add a bee swarm plot (one-dimensional scatter plot) to show all data points, cex specified width
) +
scale_fill_manual(
values = c("slategray2", "lightpink1") #change violin plot fill colours
)
plot(advancement)
Here we are using the Patchwork package’s plot_layout function to put all 3 plots together!
combinedplot1 <- contradiction + advancement + confusion + plot_layout(ncol = 2)
plot (combinedplot1)
Next figure is a histogram displaying number of participants in each condition who felt the body of research reported in the headlines resulted in us knowing more than before, less than before, or the same as before.
This follows on from the Advancement plot, whereby participants rated -1 (less), 0 (same), or 1 (more). The first step is using the function ordered(), we can specify the levels of advancement (-1, 0, 1) and label them accordingly (less, same, more). I named this new object ‘ordered_advancement’.
Next, we plot the histogram using ggplot2’s geom_bar. Firstly, the data is specified using ‘exponefinaldata’ and within the aes argument, we specify the x-axis variable as ‘ordered_advancement’, and group/fill using both the conditions conflict and format by stating as ‘conflict:format’. Geom_bar is used to produce the histogram, specifying the argument position=“dodge” to place the histogram bars side by side (the default is set to “stack”).
Finally, we format the histogram. We use the function scale_fill_grey() to specify the colour scheme, then change the title of the legend to “Condition” and rename the short-formed labels to long-form by stating using the labels argument. X-axis legend is also named “Advancement”, as well as changing the labels to capitalise (Less, Same, More); Y-axis legend is named “Number of Participants”.
ordered_advancement <- ordered(
exponefinaldata$advancement,
levels = c(-1, 0, 1),
labels = c("less", "same", "more")
)
advancement_histogram <- ggplot(
data = exponefinaldata,
aes(
x = ordered_advancement,
group = conflict:format,
fill = conflict:format
)
) +
geom_bar (
position = "dodge" #places bars side by side
) +
scale_fill_grey( #change colour scheme to grayscale
name = "Condition",
labels = c("Conflicting/Generic",
"Conflicting/Qualified",
"Non-conflicting/Generic",
"Non-conflicting/Qualified"
)
) +
scale_x_discrete(
name = "Advancement",
labels = c("Less",
"Same",
"More"
)
) +
scale_y_continuous(
name = "Number of Participants",
)
plot(advancement_histogram)