# load pacakages
library(tidyverse) # used to clean, manipulate and visualise data
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Read the raw data file
data <- read_csv(file = "/cloud/project/Study 8 data.csv")
## Rows: 373 Columns: 340
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (340): StartDate, EndDate, Status, IPAddress, Progress, Duration (in sec...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Remove row one and two for clarity
data <- data[-c(1, 2), ]
# Apply exclusion critera
# remove participants who responded twice and keep only first response
# Use Prolific_PID variable
duplicates <- data %>%
count(Prolific_PID) %>%
filter(n > 1) %>%
pull(Prolific_PID) # Will identify P's that appear more than once
data <- data %>%
group_by(Prolific_PID) %>%
slice(1) %>%
ungroup() # Keep only the first occurrence that appears
# Remove participants who did not consent
data <- data %>%
filter(Consent == 1, na.rm = TRUE)
# remove participants who were not serious
data <- data %>%
filter(Serious_check == 1, na.rm = TRUE)
# remove participants who did not complete
data <- data %>%
filter(Finished == 1, na.rm = TRUE)
# remove participants who failed attention check
data <- data %>%
filter(SC0 >= 4)
# rename condition variable
colnames(data)[colnames(data)=="FL_10_DO"] <- "condition"
# make dataframe for FIGURE 2 HISTOGRAM
figure2 <- data %>%
group_by(condition, advancement) %>%
summarise(n=n())
## `summarise()` has grouped output by 'condition'. You can override using the
## `.groups` argument.
# recode condition titles
figure2 <- figure2 %>%
mutate(condition = recode(condition,
'Block_1_Generic_Conflict' = 'Conflicting/Generic',
'Block_3_Qualified_Conflict' = 'Conflicting/Qualified',
'Block_2_Generic_Consistent' = 'Non-conflicing/Generic',
'Block_4_Qualified_Consistent' = 'Non-conflicting/Qualified'))
# recode advancement titles
figure2 <- figure2 %>%
mutate(advancement = recode(advancement,
'-1' = 'Less',
'0' = 'Same',
'1' = 'More'))
# Set the factor levels to ensure correct order
figure2$advancement <- factor(figure2$advancement, levels = c("Less", "Same", "More"))
# Plot the histogram
plot <- ggplot(figure2, aes(x = advancement, y = n, fill = condition)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("#333333", "#818181", "#ababab", "#cccccc")) +
labs(x = "Advancement", y = "Number of Participants", fill = "Condition") +
theme(axis.title = element_text(size = 7), # Adjust axis titles size
axis.text = element_text(size = 6), # Adjust axis text size
legend.title = element_text(size = 7), # Adjust legend title size
legend.text = element_text(size = 6)) # Adjust legend text size
print(plot)
