# load pacakages
library(tidyverse) # used to clean, manipulate and visualise data
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Read the raw data file
data <- read_csv(file = "/cloud/project/Study 8 data.csv")
## Rows: 373 Columns: 340
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (340): StartDate, EndDate, Status, IPAddress, Progress, Duration (in sec...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Remove row one and two for clarity
data <- data[-c(1, 2), ]
# count how many participants in the raw data
count_participants <- nrow(data)
# there are 371 participants in original sample
count_participants
## [1] 371
# Apply exclusion critera
# remove participants who responded twice and keep only first response
# Use Prolific_PID variable
duplicates <- data %>%
count(Prolific_PID) %>%
filter(n > 1) %>%
pull(Prolific_PID) # Will identify P's that appear more than once
data_filtered <- data %>%
group_by(Prolific_PID) %>%
slice(1) %>%
ungroup() # Keep only the first occurrence that appears
# we now have 312 participants, 59 were removed
count_participants <- nrow(data_filtered)
count_participants
## [1] 312
# Remove participants who did not consent
data_filtered <- data_filtered %>%
filter(Consent == 1, na.rm = TRUE)
# remove participants who were not serious
data_filtered <- data_filtered %>%
filter(Serious_check == 1, na.rm = TRUE)
# remove participants who did not complete
data_filtered <- data_filtered %>%
filter(Finished == 1, na.rm = TRUE)
# remove participants who failed attention check
data_filtered <- data_filtered %>%
filter(SC0 >= 4)
# we now have 294 participants, with exclusion critera applied (18 removed)
count_participants <- nrow(data_filtered)
count_participants
## [1] 294
# Check demographics
# Count the number of males and females
gender_counts <- data_filtered %>%
group_by(Gender) %>%
summarise(count = n())
# There are 126 males "1" and 168 females "2"
gender_counts
## # A tibble: 2 × 2
## Gender count
## <chr> <int>
## 1 1 126
## 2 2 168
# Age
# ensure argument is numeric
data_filtered <- data_filtered %>%
mutate(Age = as.numeric(as.character(Age)))
# Find Range, Mean and Standard deviation
# Participants were aged 18-69
min(data_filtered$Age,na.rm=TRUE)
## [1] 18
max(data_filtered$Age,na.rm=TRUE)
## [1] 69
# Mean age was 34.29
mean(data_filtered$Age,na.rm=TRUE)
## [1] 34.29252
# Standard deviaiton was 12.97
sd(data_filtered$Age,na.rm=TRUE)
## [1] 12.96633