For this homework assignment, we will continue to work with the dataset used in workshop 5 fake dataset.
Make sure you start a new RProject from scratch (call it homework_5 or something), as many of the steps will be similar but not identical to those we performed in workshop 5.
library(tidyverse)
library(here)
library(janitor)
d <- read_delim(here("Week 5 Homework/data", "workshop_5_data.csv")) |>
clean_names()
Researchers noticed that the software they used to measure reaction time had a systematic error. Specifically, all reaction times that are larger than 800ms was actually 200ms larger than they should be. For example, if a particular trial was recorded as 1000ms, it actually should be corrected to 800ms.
d_tidy <- d |>
pivot_longer(cols = trial_1:trial_10,
names_to = "trial_num",
values_to = "rt")
d_tidy <- d_tidy |>
mutate(rt_corrected = ifelse(rt > 800,
rt - 200,
rt))
d_tidy <- d_tidy |>
mutate(fixed = ifelse(rt_corrected == rt,
"no",
"yes"))
tally <- d_tidy |>
group_by(fixed) |>
summarize(n = n())
After correcting for this error, the researchers next want to exclude all reaction time values that are 1.5 standard deviations lower or higher than the global mean (i.e. the mean regardless of condition).
bounds <- d_tidy |>
group_by() |>
summarize(mean_rt_corrected = mean(rt_corrected),
sd_rt_corrected = sd(rt_corrected),
min_rt = mean_rt_corrected - 1.5*sd_rt_corrected,
max_rt = mean_rt_corrected + 1.5*sd_rt_corrected)
d_cleaned <- d_tidy |>
mutate(outlier_type = ifelse(rt_corrected < bounds$min_rt,
"too_small",
"not_outlier"),
outlier_type = ifelse(rt_corrected > bounds$max_rt,
"too_big",
outlier_type))
counts <- d_cleaned |>
group_by(outlier_type) |>
summarize(n = n())
d_cleaned <- d_cleaned |>
filter(outlier_type == "not_outlier",
is.na(enjoyment_level) == FALSE)
Now, answer the following research questions, providing justification for how you drew your conclusions:
by_sex <- d_cleaned |>
group_by(sex) |>
summarize(mean_rt = mean(rt_corrected))
by_sex
## Male participants have slower reaction time on average
by_condition <- d_cleaned |>
group_by(condition) |>
summarize(mean_rt = mean(rt_corrected))
by_condition
## TV condition had faster reaction time
interaction <- d_cleaned |>
group_by(condition, sex) |>
summarize(mean_rt = mean(rt_corrected))
interaction
## No, there is only a main effect of sex and condition
# We start by recording
d_cleaned <- d_cleaned |>
mutate(enjoyment_recoded = ifelse(as.numeric(enjoyment_level) < 4,
"not_enjoyed",
ifelse(as.numeric(enjoyment_level) > 4,
"enjoyed",
"neutral")))
relationship <- d_cleaned |>
group_by(enjoyment_recoded) |>
summarize(mean_rt = mean(rt_corrected))
relationship
## People who enjoyed the task had the slowest reaction time
## Bonus: We can plot this to visualize
d_cleaned$enjoyment_level <- as.numeric(d_cleaned$enjoyment_level)
plot_data <- d_cleaned |>
group_by(subject_number, enjoyment_level) |>
summarize(mean = mean(rt_corrected))
ggplot(data = plot_data,
aes(x = enjoyment_level, y = mean)) +
geom_point() +
geom_smooth(method = "lm")