#Loading packages/Data
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.4 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
mydata <- read_csv("Study_1_data.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
# Rows 2 and3 in the data set are filled with text and not relevant data, so we removed them (Jia found some lovely code to do this)
mydata <- mydata %>%
slice(-1:-2)
# Renaming variables "SC0" to "recall_score" and "FL_10_DO" to "condition"
mydata <- mydata %>% rename(Recall_score = SC0, Condition = FL_10_DO)
# Some participants completed the study twice, thus there are duplicated IDs (Profilic_PID variable) that must be filtered out
mydata %>% count(duplicated(Prolific_PID)) #count duplicates
## # A tibble: 2 x 2
## `duplicated(Prolific_PID)` n
## <lgl> <int>
## 1 FALSE 312
## 2 TRUE 59
mydata %>% summarise(duplicated(Prolific_PID)) #identify duplicates
## # A tibble: 371 x 1
## `duplicated(Prolific_PID)`
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## # … with 361 more rows
mydata %>% distinct(Prolific_PID) #Identify the distinct IDs (312) - these should remain after removing duplicates
## # A tibble: 312 x 1
## Prolific_PID
## <chr>
## 1 5ea9f5919b12c2176f64927c
## 2 5e70f7033953db048bc49caf
## 3 5e2a086c2927f40710bdc706
## 4 5ec6d7347ff48e101440546b
## 5 5a6bb6b79d65ec00017e825c
## 6 5dd7abaaaf16d3746667182c
## 7 5cd1836da6f34300017e240c
## 8 5e7112783f1bd301e39e60a2
## 9 5c8a6e46926c0b0011b9606a
## 10 5d62b2186f363200168bbb85
## # … with 302 more rows
mydata <- mydata[!duplicated(mydata$Prolific_PID), ] ##removes the second (duplicate) response
mydata %>% summarise(n()) #No. of obs. (312)
## # A tibble: 1 x 1
## `n()`
## <int>
## 1 312
# Applying exclusion criteria (participants who finished the study (Finished==1), declared that they answered seriously (seriousness_check==1) AND scored 4 or above on recall) ***Filtering for 'Finished' excludes those who didn't consent automatically
# THEN creating a subset of relevant variables
mydata <- mydata %>%
filter(
Finished == 1,
Serious_check == 1,
Recall_score >= 4) %>%
select(
Finished,
`Duration (in seconds)`,
Gender,
Age,
Serious_check,
Recall_score,
Condition,
contradiction_1:advancement)
#Final sample size = 294
mydata %>% summarise(n())
## # A tibble: 1 x 1
## `n()`
## <int>
## 1 294
#Exporting the tidied data to a .csv
write_csv(mydata, "MyDataSubset.csv")
##For contradiction plot, must create a NEW variable: sum the contradiction scores (sum of the six contradiction ratings)
#FIRST Change from Chr to Numberic for all contradiction variables (summing the contradiction ratings didn't seem to work when they were Characters)
mydata$contradiction_1 <- as.numeric(mydata$contradiction_1)
mydata$contradiction_2 <- as.numeric(mydata$contradiction_2)
mydata$contradiction_3 <- as.numeric(mydata$contradiction_3)
mydata$contradiction_4 <- as.numeric(mydata$contradiction_4)
mydata$contradiction_5 <- as.numeric(mydata$contradiction_5)
mydata$contradiction_6 <- as.numeric(mydata$contradiction_6)
library("dplyr") #Lauren
mydata <- mydata %>%
rowwise() %>%
mutate(contradiction = sum(contradiction_1, contradiction_2, contradiction_3, contradiction_4, contradiction_5, contradiction_6))
##For all plots
#Separate the data in Condition into 4 columns to separate levels of each IV ("Block_1_Generic_Conflict"...)
mydata <- separate(mydata, Condition, c("block", "number", "Format", "Conflict"))
#rename (to mimic the labels in the plots)
levels(mydata$Conflict)[levels(mydata$Conflict)=="Conflict"] <- "Conf."
levels(mydata$Conflict)[levels(mydata$Conflict)=="Consistent"] <- "Non-Conf."
#set these new IV columns as factors
mydata <- mydata %>%
mutate(Format=as.factor(Format)) %>%
mutate(Conflict=as.factor(Conflict))
sapply(mydata, class) #to check
## Finished Duration (in seconds) Gender
## "character" "character" "character"
## Age Serious_check Recall_score
## "character" "character" "character"
## block number Format
## "character" "character" "factor"
## Conflict contradiction_1 contradiction_2
## "factor" "numeric" "numeric"
## contradiction_3 contradiction_4 contradiction_5
## "numeric" "numeric" "numeric"
## contradiction_6 confusion advancement
## "numeric" "character" "character"
## contradiction
## "numeric"
write_csv(mydata, "MyDataTidiedSubset.csv")
#Age
mydata %>% summarise(mean(Age), sd(Age), range(Age)) # M = 34.29, SD = 12.67, Range = 18-69
## Warning in mean.default(Age): argument is not numeric or logical: returning NA
## # A tibble: 2 x 3
## `mean(Age)` `sd(Age)` `range(Age)`
## <dbl> <dbl> <chr>
## 1 NA 13.0 18
## 2 NA 13.0 69
#GENDER
#No. of males(Value: '1') and females('2'), Other('3') and Prefer not to say('4'):
Males <- mydata %>% count(Gender==1) #n=126
Males #Print
## # A tibble: 2 x 2
## `Gender == 1` n
## <lgl> <int>
## 1 FALSE 168
## 2 TRUE 126
Females <- mydata %>% count(Gender==2) #n=168
Females #Print
## # A tibble: 2 x 2
## `Gender == 2` n
## <lgl> <int>
## 1 FALSE 126
## 2 TRUE 168
mydata %>% count(Gender==3) #n=0
## # A tibble: 1 x 2
## `Gender == 3` n
## <lgl> <int>
## 1 FALSE 294
mydata %>% count(Gender==4) #n=0
## # A tibble: 1 x 2
## `Gender == 4` n
## <lgl> <int>
## 1 FALSE 294
CHALLENGE: producing violin plots that are close to the ones in the paper is proving to be extremely difficult. Initial attempts saw me reproducing ‘violins’ that were stacked on top of eachother and not side by side, and changing the axis to reflect appropriate IVs and DVs was tricky. Within our team, we shared our ideas and helped eachother create more similar plots to those in the paper, however we still have a long way to go in terms of plot aesthetics and adding more descriptive features (means and confidence intervals).
CHALLENGE: The scale on the contradiction plot in the paper starts from ‘0’ not 5 (what I produced) –> I tried changing the y axis scale (0 - 30) and label (“Perceived Contradiction”) simultaneously but had no luck, until my group helped me rearrange my code (thanks guys):
#Contradiction plot - My faulty code:
ggplot(mydata) +
geom_violin(aes(x = Conflict, y = contradiction)) +
facet_wrap(vars(Format), strip.position = "bottom")+
ggtitle(label = "Contradiction")+
scale_y_continuous(name = "Perceived Contradiction")+
ylim(0,30)+
scale_x_discrete(name = NULL)+
theme(plot.title = element_text(hjust = 0.5))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
# Issue resolved
ggplot(mydata) +
geom_violin(aes(x = Conflict, y = contradiction)) +
facet_wrap(vars(Format), strip.position = "bottom")+
ggtitle(label = "Contradiction")+
scale_y_continuous(
name = "Perceived Contradiction",
limits = c(0,30))+
scale_x_discrete(name = NULL)+
theme(plot.title = element_text(hjust = 0.5))
# Advancement plot
mydata$advancement <- as.numeric(mydata$advancement) #To change variable from Character to Numeric
ggplot(mydata) +
geom_violin(aes(x = Conflict, y = advancement)) +
facet_wrap(vars(Format), strip.position = "bottom")+
ggtitle(label = "Advancement")+
scale_y_continuous(name = "Perceived Scientific Advancement")+
scale_x_discrete(name = NULL)+
theme(plot.title = element_text(hjust = 0.5))
# Confusion plot
mydata$confusion <- as.numeric(mydata$confusion) #To change variable from Character to Numeric
ggplot(mydata) +
geom_violin(aes(x = Conflict, y = confusion)) +
facet_wrap(vars(Format), strip.position = "bottom")+
ggtitle(label = "Confusion")+
scale_y_continuous(name = "Perceived Confusion")+
scale_x_discrete(name = NULL)+
theme(plot.title = element_text(hjust = 0.5))