# install.packages("tidyverse")
# install.packages("here")
# install.packages("grid")
# install.packages("gridExtra")
# install.packages("effectsize")
# install.packages("corrplot")
I use four packages that I also used in my verification:
tidyverse
Collection of useful R packages particularly:
here
grid & gridExtra
I also use two new packages for my exploratory section:
effectsize
corrplot
library(tidyverse)
library(here)
library(grid)
library(gridExtra)
library(effectsize)
library(corrplot)
here::i_am("Exploratory.Rmd")
Using read_csv() from tidyverse readr, I load in my final datasets of exp 1 & 2 from my verification (write_csv() used in my verification). This saves me from showing and running my code again from the previous section.
data_study_1 <- read_csv(file = here("data_study_1_Tidied.csv"))
## New names:
## Rows: 294 Columns: 20
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): block, Format, Conflict dbl (17): ...1, Finished, Duration (in seconds),
## Gender, Age, Serious_check,...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
glimpse(data_study_1)
## Rows: 294
## Columns: 20
## $ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
## $ Finished <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ `Duration (in seconds)` <dbl> 428, 364, 384, 632, 430, 543, 363, 354, 377, 4…
## $ Gender <dbl> 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2…
## $ Age <dbl> 50, 23, 28, 24, 26, 27, 19, 21, 19, 57, 38, 47…
## $ Serious_check <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ recall_score <dbl> 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 6, 7, 7, 7…
## $ block <chr> "Block", "Block", "Block", "Block", "Block", "…
## $ number <dbl> 3, 3, 2, 2, 1, 4, 2, 1, 3, 1, 4, 2, 4, 4, 2, 3…
## $ Format <chr> "Qualified", "Qualified", "Generic", "Generic"…
## $ Conflict <chr> "Conf.", "Conf.", "Non-Conf.", "Non-Conf.", "C…
## $ contradiction_1 <dbl> 5, 5, 1, 3, 4, 1, 1, 4, 4, 5, 1, 2, 2, 1, 1, 5…
## $ contradiction_2 <dbl> 5, 5, 2, 4, 4, 3, 1, 4, 5, 4, 2, 3, 2, 4, 1, 5…
## $ contradiction_3 <dbl> 4, 5, 1, 2, 4, 1, 5, 2, 2, 5, 1, 2, 2, 2, 1, 5…
## $ contradiction_4 <dbl> 4, 5, 1, 2, 4, 4, 1, 4, 3, 3, 4, 3, 2, 3, 4, 4…
## $ contradiction_5 <dbl> 4, 5, 4, 2, 4, 1, 1, 3, 2, 5, 1, 2, 2, 1, 3, 5…
## $ contradiction_6 <dbl> 5, 5, 3, 1, 4, 1, 1, 4, 5, 3, 1, 2, 4, 2, 1, 5…
## $ confusion <dbl> 4, 5, 1, 4, 4, 2, 5, 4, 5, 5, 1, 3, 5, 2, 2, 5…
## $ advancement <dbl> -1, 0, 1, -1, 0, 0, -1, 0, -1, -1, 0, 0, -1, 0…
## $ contradiction <dbl> 27, 30, 12, 14, 24, 11, 10, 21, 21, 25, 10, 14…
data_study_2 <- read_csv(file = here("data_study_2_Tidied.csv"))
## New names:
## Rows: 400 Columns: 44
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): Gender, block, Format, Conflict dbl (40): ...1, Finished, Duration (in
## seconds), Age, Serious_check, recall_...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
glimpse(data_study_2)
## Rows: 400
## Columns: 44
## $ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,…
## $ Finished <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ `Duration (in seconds)` <dbl> 430, 454, 513, 486, 745, 584, 476, 674, 445, 6…
## $ Gender <chr> "Female", "Male", "Female", "Female", "Male", …
## $ Age <dbl> 51, 19, 31, 32, 49, 20, 20, 29, 29, 18, 31, 21…
## $ Serious_check <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ recall_score <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 7, 7…
## $ block <chr> "Block", "Block", "Block", "Block", "Block", "…
## $ number <dbl> 3, 3, 4, 1, 1, 2, 4, 2, 2, 1, 2, 1, 2, 4, 3, 4…
## $ Format <chr> "Qualified", "Qualified", "Qualified", "Generi…
## $ Conflict <chr> "Conf.", "Conf.", "Non-Conf.", "Conf.", "Conf.…
## $ NC_1 <dbl> 2, 2, 2, 5, 4, 4, 4, 1, 4, 4, 3, 4, 4, 2, 4, 1…
## $ NC_2 <dbl> 2, 2, 1, 4, 4, 4, 4, 1, 4, 4, 3, 4, 3, 2, 4, 1…
## $ NC_3 <dbl> 2, 2, 1, 3, 4, 3, 2, 4, 2, 2, 3, 2, 2, 1, 2, 1…
## $ NC_4 <dbl> 2, 4, 2, 4, 4, 4, 5, 1, 4, 4, 3, 3, 2, 4, 4, 3…
## $ NC_5 <dbl> 2, 3, 2, 2, 4, 2, 4, 4, 2, 2, 3, 3, 3, 4, 4, 3…
## $ NC_6 <dbl> 1, 3, 2, 2, 4, 2, 2, 4, 2, 2, 3, 2, 2, 2, 4, 2…
## $ NBS_1 <dbl> 2, 5, 1, 4, 4, 4, 5, 3, 4, 5, 4, 4, 3, 3, 3, 3…
## $ NBS_2 <dbl> 2, 3, 2, 4, 2, 4, 4, 4, 2, 2, 4, 2, 2, 2, 2, 1…
## $ NBS_3 <dbl> 2, 2, 2, 4, 2, 5, 4, 4, 2, 2, 3, 4, 1, 1, 3, 1…
## $ NBS_4 <dbl> 1, 2, 2, 3, 2, 1, 1, 2, 2, 1, 3, 2, 2, 2, 2, 1…
## $ NBS_5 <dbl> 1, 2, 2, 3, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 2, 1…
## $ NBS_6 <dbl> 1, 4, 2, 4, 2, 2, 5, 2, 4, 4, 3, 4, 2, 2, 4, 3…
## $ Mistrust_expertise_1 <dbl> 2, 2, 1, 3, 2, 2, 3, 1, 3, 2, 3, 3, 3, 1, 3, 2…
## $ Mistrust_expertise_2 <dbl> 2, 1, 1, 2, 1, 1, 2, 1, 3, 3, 3, 3, 1, 1, 3, 1…
## $ Mistrust_expertise_3 <dbl> 2, 2, 5, 3, 2, 1, 4, 2, 2, 2, 3, 2, 2, 2, 2, 1…
## $ confidence <dbl> 1, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2…
## $ Certainty_sci_know_1 <dbl> 4, 5, 5, 3, 4, 4, 5, 2, 4, 5, 3, 4, 1, 5, 5, 4…
## $ Certainty_sci_know_2 <dbl> 2, 3, 4, 2, 2, 3, 5, 1, 4, 2, 3, 2, 5, 4, 5, 3…
## $ Certainty_sci_know_3 <dbl> 4, 5, 5, 4, 5, 5, 5, 5, 3, 4, 3, 4, 5, 5, 5, 5…
## $ Certainty_sci_know_4 <dbl> 4, 5, 3, 3, 2, 3, 5, 4, 4, 2, 3, 4, 2, 2, 5, 4…
## $ Certainty_sci_know_5 <dbl> 4, 5, 5, 5, 5, 4, 5, 3, 4, 4, 3, 4, 3, 4, 5, 5…
## $ Certainty_sci_know_6 <dbl> 4, 5, 5, 5, 4, 4, 4, 5, 3, 2, 3, 4, 5, 5, 5, 5…
## $ Development_sci_know_1 <dbl> 4, 5, 5, 5, 4, 5, 5, 5, 4, 5, 3, 4, 5, 5, 5, 5…
## $ Development_sci_know_2 <dbl> 4, 4, 5, 4, 5, 5, 5, 5, 4, 5, 3, 4, 4, 5, 5, 5…
## $ Development_sci_know_3 <dbl> 4, 5, 5, 4, 4, 5, 4, 5, 5, 5, 2, 4, 5, 5, 5, 5…
## $ Development_sci_know_4 <dbl> 4, 4, 5, 4, 5, 5, 5, 5, 5, 5, 2, 4, 5, 5, 5, 5…
## $ Development_sci_know_5 <dbl> 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 2, 4, 5, 5, 5, 5…
## $ Development_sci_know_6 <dbl> 5, 4, 5, 4, 4, 5, 4, 5, 4, 4, 3, 2, 3, 5, 5, 5…
## $ confusion <dbl> 1.833333, 2.666667, 1.666667, 3.333333, 4.0000…
## $ backlash <dbl> 1.500000, 3.000000, 1.833333, 3.666667, 2.3333…
## $ mistrust <dbl> 2.000000, 1.666667, 2.333333, 2.666667, 1.6666…
## $ certainty <dbl> 3.666667, 4.666667, 4.500000, 3.666667, 3.6666…
## $ development <dbl> 4.333333, 4.500000, 5.000000, 4.166667, 4.5000…
I will also use my custom function from the verification report in this section so I will leave my definition of it here as reference:
pirate.fun(): Create a pirate plot
Description
Creates a pirate plot with customisable labels, limits, breaks, violin width, etc.
Input:
Output:
pirate plot graph with:
Example:
See exploratory question 2 or my verification report
Packages:
pirate.fun <- function(data, y_var, plot_title, y_title, lim1, lim2, break_size,
add.minor.break, no.minor.breaks = 0, violin_width) {
# Boolean for minor breaks
minor_break <- if (add.minor.break) {
element_line(
linewidth = 0.3,
color = "gray"
)
} else {
element_blank() # Otherwise no lines
}
# Plot
ggplot(
data = data,
mapping = aes(
x = number,
y = {{y_var}}, # {{ }} indicates variable in dataset
fill = Conflict
)
) +
geom_violin(
width = violin_width, # Violin width varies so it is a parameter
linewidth = 0.9
) +
geom_jitter(
stroke = 0,
size = 1.5,
alpha = 0.3,
position = position_jitter(
height = 0,
width = 0.04
)
) +
stat_summary(
fun.data = "mean_cl_normal",
geom = "crossbar",
fill = "white",
alpha = .7,
width = 0.5,
linewidth = 1.1
) +
annotate(
geom = "text",
x = 0.5,
y = (lim1 - 1.55*((lim2-lim1)/30)), # location of labels are standardised
size = 3,
label = "Conflict"
) +
annotate(
geom = "text",
x = 0.5,
y = (lim1 - 2.55*((lim2-lim1)/30)),
size = 3, label = "Format"
) +
coord_cartesian(
ylim=c(lim1, lim2),
clip="off"
) +
labs(
title = plot_title,
x = "Generic Qualified",
y = y_title
) +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.text.y = element_text(angle = 90, hjust=0.5),
panel.background = element_rect(fill = NA),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(
linewidth = 0.3,
color="gray"
),
panel.border = element_rect(
fill = NA,
colour = "black",
linewidth = 0.5,
),
legend.position = "none",
panel.grid.minor.y = minor_break
) +
scale_x_discrete(labels = c("Conf.", "Non-Conf.", "Conf.", "Non-Conf.")) +
scale_y_continuous(breaks = seq(lim1, lim2, by = break_size),
minor_breaks = seq(lim1, lim2, by = (1/no.minor.breaks)),
# So that y axis/divided desired amount of minor breaks
expand = expansion(mult = c(0,0),
add = c( # standardises expansion
(lim2-lim1)/30,
(lim2-lim1)/30
))) +
scale_fill_manual(values = c(
"slategray2",
"lightpink1"
))
}
When considering what research questions to explore I needed to keep mind that some of the manipulations in the two experiments did not work. This means it wouldn’t make sense to factor in these manipulations when analysing the measures that they did not (significantly) affect.
Namely, only the conflict manipulation worked in exp 1. Because of this, I opted not to analyse format’s influence further for exp 1. However, I do choose to look at how the effect of conflicting headlines on the advancement measure might have sex-based differences. I also choose to look at how the time spent on the survey might affect the three measures.
While in exp 2, both manipulations did not result in significant differences. This is why I avoid looking at experiment 2’s manipulations entirely (they were too weak/didn’t work) and instead I look at the correlations between the six measures, something that should be unaffected by the strength of the manipulations.
When first reading the article, I was really quite fascinated in the conflict condition’s effect on the advancement measure. The effect being that conflicting headlines were viewed as a setback in scientific knowledge while consistent headlines an advancement (seen in plot above; +1 denotes advancement). As undergraduate science students, I was surprised to see this effect particularly the effect of participants viewing conflicting headlines as a setback in scientific knowledge. To look further into this effect, I wanted to see if this effect would vary between sexes. I would think that maybe women would, on average, show a more sizeable effect because more men skew towards STEM/science education. It could also be because of something biological (male/female brains are wired differently). Splitting the data by the gender demographic, I predict that males will be more conservative when it comes to this effect and women will show a stronger effect.
To do this, I first filter out all the “Qualified” Format rows because as mentioned before I am only considering the conflict manipulation and do this by only using the “Generic” format participants. I then mutate() the Gender column to M and F from their equivalent number (easier to look at).
data_exploratory_1 <- data_study_1 %>%
filter(Format == "Generic") %>%
mutate(Gender = case_when(
Gender == "1" ~ "M",
Gender == "2" ~ "F"
))
Before any statistical tests, I use aggregate() from base R to see the marginal means and cell means (sex x conflict). This will also give me the directionality of any effects before any plotting.
For my post-hoc analysis I want to perform a statistical test that will check if gender’s effect on advancement as well as interaction with conflict. This fits the description of a two way ANOVA. I use aov() from base R which will let me perform a two way ANOVA (alpha = .05) to analyse Gender and Conflict’s effect on the advancement measure.
Because I am unsure of what a large effect for the advancement measure is, I use cohens_f() from the “effectsize” package to obviously calculate cohen’s f value which obviously allows me to estimate the effect size. (the names of these functions are very intuitive)
N.B. I was originally going to conduct a scheffe test after the ANOVA (using ScheffeTest() from DescTools) but realised it was redundant for our analysis (didn’t need to look at more contrasts). I also was originally going to calculate cohen’s d. They provide very similar results but, as I understand it, cohen’s f is more appropriate in the context of ANOVAs.
I’ve shown how I was going to do the scheffe test and calculate Cohen’s d below (without running):
# 95 CI Scheffe
# install.packages("DescTools")
library(DescTools) # For Scheffe Tests
ScheffeTest(advancement_gender_aov) # From DescTools pacakge
# Calculate effect size with Cohen's D
# install.packages("rstatix")
library(rstatix) # For cohen's d test
data_exploratory_1 %>%
cohens_d(advancement ~ Gender)
# Small effect size (female is 0.445 deviations lower)
This is the actual statistical analysis:
# Cell means
plot_exploration_data <- aggregate(advancement ~ Gender + Conflict, data = data_exploratory_1, mean)
print(plot_exploration_data)
## Gender Conflict advancement
## 1 F Conf. -0.28205128
## 2 M Conf. -0.06250000
## 3 F Non-Conf. -0.07894737
## 4 M Non-Conf. 0.28571429
# Marginal means
aggregate(advancement ~ Gender, data = data_exploratory_1, mean)
## Gender advancement
## 1 F -0.1818182
## 2 M 0.1194030
aggregate(advancement ~ Conflict, data = data_exploratory_1, mean)
## Conflict advancement
## 1 Conf. -0.18309859
## 2 Non-Conf. 0.09589041
# ANOVA F Overall Test
advancement_gender_aov <- aov(advancement ~ Gender * Conflict, data = data_exploratory_1)
summary(advancement_gender_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 3.25 3.251 7.379 0.00743 **
## Conflict 1 2.63 2.632 5.975 0.01575 *
## Gender:Conflict 1 0.19 0.188 0.428 0.51424
## Residuals 140 61.68 0.441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Gender and Conflict main effect is sig. but not interaction.
# To tell direction we check means but i will save interpretation for later
# Estimate effect size using Cohen's f
cohens_f(advancement_gender_aov, partial = TRUE, ci = 0.95, squared = FALSE, model2 = NULL)
## # Effect Size for ANOVA (Type I)
##
## Parameter | Cohen's f (partial) | 95% CI
## ---------------------------------------------------
## Gender | 0.23 | [0.09, Inf]
## Conflict | 0.21 | [0.07, Inf]
## Gender:Conflict | 0.06 | [0.00, Inf]
##
## - One-sided CIs: upper bound fixed at [Inf].
# Gender - small effect size (0.23)
P value is <.05 for conflict but more importantly gender which has a cohen’s f of 0.23. If we look at the means, he we can see that the difference comes from females having a lower advancement score compared to males.
Now, I use ggplot2 from tidyverse to create a simple interaction plot that maps “Conflict” on the x axis, “advancement” on the y axis. I use a point and line geom which are split into two separate sets of lines/pints by grouping and colouring through the “Gender” variable. I then adjust the labels of the axes, title, legend, the theme/formatting of the plot and finally the limits (and their expansion value).
# Plot
plot_exploration_1 <- ggplot(
data = plot_exploration_data,
mapping = aes(
x = Conflict,
y = advancement,
color = Gender,
group = Gender)
) +
geom_point() +
geom_line() +
labs(
title = "Advancement",
x = "Level of Conflict",
y = "Perceived Scientific Advancement"
) +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.text.y = element_text(angle = 90, hjust=0.5),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = NA),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(
linewidth = 0.3,
color="gray"
),
panel.grid.minor.y = element_line(
linewidth = 0.3,
color="gray"
),
panel.border = element_rect(
fill = NA,
colour = "black",
linewidth = 0.5,
)
) +
scale_x_discrete(labels = c("Conflicting", "Consistent")) +
scale_y_continuous(limits = c(-1, 1),
breaks = seq(-1, 1, by = 1),
minor_breaks = seq(-1, 1, by = 0.2), # Adjust minor breaks
expand = expansion(add = c(0.1,0.1))
) +
scale_color_manual(
values = c("lightpink1", "slategray2"), # Set colours of lines/points
labels = c("Female", "Male")
)
plot(plot_exploration_1)
Based on the participants that are looking at consistent headlines, this plot as well as the post-hoc ANOVA shows that women are more likely to view these scientific headlines as a setback in scientific advancement compared to men who view them as progress (small effect size, cohen’s f = 0.23). Conflicting headlines are viewed as setbacks in knowledge more than consistent headlines which are seen as advancements in knowledge (we already know this). These are the main effects and they are both significant. There was no significant interaction effect.
In general, this effect demonstrates a flawed way of thinking that contradicts scientific view that “new findings cannot generally reduce our knowledge” (from the article). Essentially, the advancement value the participants respond with really shouldn’t be <0 because conflicting findings don’t set back our scientific knowledge. When we compare sexes, this becomes even more interesting because we now see that this effect (conflicting headlines = setback in knowledge) is even stronger in women than in men. This aligns with what I predicted. I still can’t be too sure about why this effect occurs (social/education or biological) just that there is a sex-based difference.
While doing the verification report, I was surprised to see the “Duration (in seconds)” variable that qualtrics produces. I was curious about it and thought maybe those who spent longer in the survey (presumably) studying the headlines would maybe see greater scores across the measures. The variable ‘Duration (in seconds)’ is not perfect because we don’t really know what each participant is spending their time on but it’s the best way I have to model this.
N.B. I only do experiment 1 here because neither manipulation worked in experiment 2.
First, I filter out all the rows with “Qualified” for the same reason as before. I then calculate three quantiles which i will use to separate the participants into a fast, middle and slow class based on the duration variable.
# Filter out "qualified" participants
data_exploratory_2 <- data_study_1 %>%
filter(Format == "Generic")
# Calculate three quantiles
quantile(
data_exploratory_2$`Duration (in seconds)`,
probs = c(0,1/3,2/3,1))
## 0% 33.33333% 66.66667% 100%
## 316.0000 414.6667 511.3333 1131.0000
# Assign duration classes
data_exploratory_2 <- data_study_1 %>%
mutate(duration_class = case_when(
`Duration (in seconds)` < (414 + 2/3) ~ "Fast",
`Duration (in seconds)` >= (414 + 2/3) & `Duration (in seconds)` <= (511 + 1/3) ~ "Middle",
`Duration (in seconds)` > (511 + 1/3) ~ "Slow"
))
I follow a similar procedure as the previous exploratory question. I first use aggregate() from base R to see the marginal means and cell means (duration class x conflict). Then, I perform a two way ANOVA (alpha = .05) but since I don’t find any significant differences for the duration classes, I don’t calculate an effect size like I did before.
# Means
aggregate(contradiction ~ duration_class + Conflict, data = data_exploratory_2, mean)
## duration_class Conflict contradiction
## 1 Fast Conf. 25.35417
## 2 Middle Conf. 25.47059
## 3 Slow Conf. 24.93750
## 4 Fast Non-Conf. 12.91304
## 5 Middle Non-Conf. 13.45833
## 6 Slow Non-Conf. 13.90566
aggregate(advancement ~ duration_class + Conflict, data = data_exploratory_2, mean)
## duration_class Conflict advancement
## 1 Fast Conf. -0.12500000
## 2 Middle Conf. -0.33333333
## 3 Slow Conf. -0.27083333
## 4 Fast Non-Conf. 0.10869565
## 5 Middle Non-Conf. 0.02083333
## 6 Slow Non-Conf. -0.09433962
aggregate(confusion ~ duration_class + Conflict, data = data_exploratory_2, mean)
## duration_class Conflict confusion
## 1 Fast Conf. 4.458333
## 2 Middle Conf. 4.686275
## 3 Slow Conf. 4.416667
## 4 Fast Non-Conf. 3.608696
## 5 Middle Non-Conf. 3.520833
## 6 Slow Non-Conf. 3.792453
# ANOVA F Overall Tests
contradiction_duration_aov <- aov(contradiction ~ duration_class*Conflict, data = data_exploratory_2)
summary(contradiction_duration_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## duration_class 2 13 7 0.469 0.626
## Conflict 1 10244 10244 713.525 <2e-16 ***
## duration_class:Conflict 2 26 13 0.892 0.411
## Residuals 288 4135 14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
advancement_duration_aov <- aov(advancement ~ duration_class*Conflict, data = data_exploratory_2)
summary(advancement_duration_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## duration_class 2 1.64 0.819 1.901 0.151
## Conflict 1 4.76 4.760 11.053 0.001 ***
## duration_class:Conflict 2 0.41 0.205 0.475 0.622
## Residuals 288 124.03 0.431
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confusion_duration_aov <- aov(confusion ~ duration_class*Conflict, data = data_exploratory_2)
summary(confusion_duration_aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## duration_class 2 0.30 0.15 0.192 0.8258
## Conflict 1 56.67 56.67 72.151 1.08e-15 ***
## duration_class:Conflict 2 3.68 1.84 2.345 0.0977 .
## Residuals 288 226.22 0.79
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
None of the duration classes show any significant difference across all three measures of exp 1.
For this, I wanted to reuse the pirate plot function I made in the verification section. I try to do in the same method but quickly realise that it doesn’t really work because there are a lot of hard coded arguments in the original custom function. There are also some things that I don’t want/no longer need such as the secondary labels, a different x variable, wrapping by conflict, other different labels/mappings. So I create a new custom function that slightly tweaks these things.
Here I try to use pirate.fun() that was written in the verification report:
# Call Function
# Contradiction
pirate_exploratory_1_test <- pirate.fun(
data = data_exploratory_2,
y_var = contradiction,
plot_title = "Contradiction",
y_title = "Perceived Contradiction",
lim1 = 0,
lim2 = 30,
break_size = 5,
add.minor.break = FALSE,
violin_width = 0.4
)
# Advancement
pirate_exploratory_2_test <- pirate.fun(
data = data_exploratory_2,
y_var = advancement,
plot_title = "Advancement",
y_title = "Perceived Scientific Advancement",
lim1 = -1,
lim2 = 1,
break_size = 1,
add.minor.break = TRUE,
no.minor.breaks = 5,
violin_width = 1
)
# Confusion
pirate_exploratory_3_test <- pirate.fun(
data = data_exploratory_2,
y_var = confusion,
plot_title = "Confusion",
y_title = "Perceived Confusion",
lim1 = 1,
lim2 = 5,
break_size = 1,
add.minor.break = TRUE,
no.minor.breaks = 2,
violin_width = 1
)
# Create white graphical object for empty quadrant of figure
whitegrob <- rectGrob(gp = gpar( # Graphical Parameters
fill= "white",
col = "white" # col is the border colour
)
)
# Combine
pirate_exploratory_test <- grid.arrange(
pirate_exploratory_1_test, pirate_exploratory_2_test,
pirate_exploratory_3_test, whitegrob,
ncol = 2, nrow = 2
)
## Warning: `position_dodge()` requires non-overlapping x intervals.
## `position_dodge()` requires non-overlapping x intervals.
## `position_dodge()` requires non-overlapping x intervals.
png(filename="pirate_exploratory_test.png", width=1500, height=1000)
plot(pirate_exploratory_test) # Final plot is the png saved after this
dev.off()
## quartz_off_screen
## 2
Definitely not what I want. The violins are way off. I also need to get rid of the secondary labels, change the x variable to duration class and the format label is no longer needed. I want three violins for the three duration classes next to each other and I wanted this to be wrapped across the Conflict variable so that there will be 6 violins for each measure. To do this I make a new function that tweaks the old custom function. I remove everything I no longer need (secondary labels, format labels) and change the existing labels (conflict -> duration class). All the parameters are preserved and their input values shouldn’t change either. The only real thing that I need to do is to add facet_wrap(vars(Conflict)) to the end of the function.
Here I adjust the old function so that it fits better with our needs here:
pirate.fun.exploratory <- function(data, y_var, plot_title, y_title, lim1, lim2,
break_size, add.minor.break, no.minor.breaks = 0, violin_width) {
# Boolean for minor breaks
minor_break <- if (add.minor.break) {
element_line(
linewidth = 0.3,
color = "gray"
)
} else {
element_blank() # Otherwise no lines
}
# Plot
ggplot(
data = data,
mapping = aes(
x = factor(duration_class, levels = c('Slow', 'Middle', 'Fast')), # New variable
y = {{y_var}}, # {{ }} indicates variable in dataset
fill = duration_class
)
) +
geom_violin(
width = violin_width,
linewidth = 0.9
) +
geom_jitter(
stroke = 0,
size = 1.5,
alpha = 0.3,
position = position_jitter(
height = 0,
width = 0.04
)
) +
stat_summary(
fun.data = "mean_cl_normal",
geom = "crossbar",
fill = "white",
alpha = .7,
width = 0.5,
linewidth = 1.1
) +
labs(
title = plot_title,
x = "Survey Duration", # New label
y = y_title
) +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.text.y = element_text(angle = 90, hjust=0.5),
panel.background = element_rect(fill = NA),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(
linewidth = 0.3,
color="gray"
),
panel.border = element_rect(
fill = NA,
colour = "black",
linewidth = 0.5,
),
legend.position = "none",
panel.grid.minor.y = minor_break
) +
scale_y_continuous(limits = c(lim1, lim2),
breaks = seq(lim1, lim2, by = break_size),
minor_breaks = seq(lim1, lim2, by = (1/no.minor.breaks)),
# So that y axis/divided desired amount of minor breaks
expand = expansion(mult = c(0,0),
add = c( # standardises expansion
(lim2-lim1)/30,
(lim2-lim1)/30
))) +
facet_wrap(vars(Conflict)) # Wrap by conflict
}
# Call Function
# Contradiction
pirate_exploratory_1 <- pirate.fun.exploratory(
data = data_exploratory_2,
y_var = contradiction,
plot_title = "Contradiction",
y_title = "Perceived Contradiction",
lim1 = 0,
lim2 = 30,
break_size = 5,
add.minor.break = FALSE,
violin_width = 0.4
)
# Advancement
pirate_exploratory_2 <- pirate.fun.exploratory(
data = data_exploratory_2,
y_var = advancement,
plot_title = "Advancement",
y_title = "Perceived Scientific Advancement",
lim1 = -1,
lim2 = 1,
break_size = 1,
add.minor.break = TRUE,
no.minor.breaks = 5,
violin_width = 1
)
# Confusion
pirate_exploratory_3 <- pirate.fun.exploratory(
data = data_exploratory_2,
y_var = confusion,
plot_title = "Confusion",
y_title = "Perceived Confusion",
lim1 = 1,
lim2 = 5,
break_size = 1,
add.minor.break = TRUE,
no.minor.breaks = 2,
violin_width = 1
)
# Combine
pirate_exploratory <- grid.arrange(
pirate_exploratory_1, pirate_exploratory_2,
pirate_exploratory_3, whitegrob,
ncol = 2, nrow = 2
)
png(filename="pirate_exploratory.png", width=1500, height=1000)
plot(pirate_exploratory) # Final plot is the png saved after this
dev.off()
## quartz_off_screen
## 2
After tweaking the old custom function, I got what I wanted. Bonus that the colours look good as well even though I didn’t intentionally do that. The plot, besides looking cool, visualises the lack of differences between duration classes (they are not significantly different). Hence, the time taken to finish the survey didn’t seem to affect the three measures in experiment 1.
I’ve only done exploratory analysis for exp 1 but I still want to do some exp 2. Since neither manipulation in experiment 2 worked, I avoid looking too much into that. Instead, I remembered that while reading the paper that two of the measures in exp 2 seemed quite similar in terms of what they were measuring (E.g. “Mistrust of expertise” contrasts “Confidence in scientific community”). Consequently, I’ve decided to look into the correlation between the different measures. This is perfect because the strength of manipulations (or a lack thereof) should not affect the correlations between the measures. In other words, one measure will still be highly correlated with another measure regardless of whether the manipulation worked.
I use cor() from the “corrplot” package to calculate Pearson’s correlation coefficient across the six selected measures in exp 2. These are displayed using head() rounded to two dp.
# Create correlation data for 6 measures of exp 2
data_correlation <- data_study_2 %>%
select(
confusion,
backlash,
mistrust,
confidence,
certainty,
development
) %>%
cor()
head(round(data_correlation,2))
## confusion backlash mistrust confidence certainty development
## confusion 1.00 0.47 0.26 0.41 -0.09 -0.09
## backlash 0.47 1.00 0.46 0.33 -0.05 -0.10
## mistrust 0.26 0.46 1.00 0.51 -0.29 -0.27
## confidence 0.41 0.33 0.51 1.00 -0.12 -0.12
## certainty -0.09 -0.05 -0.29 -0.12 1.00 0.43
## development -0.09 -0.10 -0.27 -0.12 0.43 1.00
Useful resource:
I then use the package’s eponymous corrplot() function to visualise all the correlations above. In the arguments, I specify “pie” in the method argument so that correlations are shown as pie charts and “upper” in the type argument so that the correlations aren’t diagonally mirrored.
# Correlogram plot
corrplot(data_correlation, method = "pie", type="upper")
Note that the first four measures all have weak (0.2-0.39) to moderate (0.4-0.59) correlation coefficients. The last two measures (Epistemic beliefs about Certainty and Development of knowledge) are also moderately correlated with each other. This makes a lot of sense because the first four measures relate to the headlines in the experiment while the last two relate to general beliefs about science. I am also pleasantly surprised to see that out of all the correlations, the Mistrust and Confidence measure are most strongest correlated (0.51).
I create a scatter plot with a linear regression line to visualise mistrust vs. confidence scores, the two most correlated measures in exp 2. I also reduce the alpha and slightly jitter the raw data points to make it easier to see the distribution of scores.
ggplot(
data = data_study_2,
mapping = aes(
x = mistrust,
y = confidence)
) +
geom_jitter(size = 1.5,
alpha = .5,
position = position_jitter(
height = 0.1,
width = 0.1)) +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Confidence vs. Mistrust",
caption = "r = 0.51",
x = "Mistrust of Expertise",
y = "Confidence in the Scientific Community"
)
## `geom_smooth()` using formula = 'y ~ x'
We can see the correlation of these scores in greater detail using the raw score points and a regression line. Obviously, a moderate correlation between the scores is not the strongest but it is still worth noting.