raw_df <- list.files(RAW_DATA_DIR, full.names = T) %>%
map_df(read_csv) %>%
select(rt, trial_type, trial_index, subject, trial_stimulus, trial_stimulus_type,
task_type, task_target_stimulus, task_background_stimulus, task_deviant_stimuli, task_order_number, block_order_number, trial_looking_time,
trial_pressed_space_bar,trial_space_bar_rt, responses, question_order) %>%
filter(trial_type == "stimulus-presentation" | trial_type == "demog-age" | trial_type == "demog-gender-and-education" | trial_type == "demog-ethnic-US" | trial_type == "demog-disorder-history") %>%
mutate(
stimuli_type = case_when(
grepl("complex", trial_stimulus) ~ "complex",
grepl("simple", trial_stimulus) ~ "simple"
))
write_csv(raw_df, AGGREGATED_DATA_PATH)
raw_df %>% datatable()
demog_df <- raw_df %>%
filter(grepl("demog", trial_type)) %>%
select(subject, trial_type, responses) %>%
toJSON() %>%
fromJSON() %>%
mutate(
demog_question = map(responses, ~ fromJSON(.) %>% as.data.frame())) %>%
unnest(demog_question) %>%
group_by(subject) %>%
mutate_at(vars(-group_cols()), function(x) {x[!is.na(x)][1]}) %>%
distinct() %>%
select(-trial_type, responses)
demog_df %>% datatable()
joining back
data_with_demog <- left_join(raw_df,demog_df, by = "subject")
data_with_demog %>% datatable()
demog_filtered <- data_with_demog %>%
filter(
current_neuro != "Yes" && past_neuro != "Yes" && current_ld != "Yes" && past_ld != "Yes"
)
demog_filtered
## # A tibble: 0 x 27
## # … with 27 variables: rt <chr>, trial_type <chr>, trial_index <dbl>,
## # subject <chr>, trial_stimulus <chr>, trial_stimulus_type <chr>,
## # task_type <chr>, task_target_stimulus <chr>,
## # task_background_stimulus <chr>, task_deviant_stimuli <chr>,
## # task_order_number <chr>, block_order_number <chr>,
## # trial_looking_time <dbl>, trial_pressed_space_bar <chr>,
## # trial_space_bar_rt <dbl>, responses.x <chr>, question_order <chr>,
## # stimuli_type <chr>, responses.y <chr>, age <fct>, ethnicity <fct>,
## # gender <fct>, education <fct>, current_neuro <fct>, past_neuro <fct>,
## # current_ld <fct>, past_ld <fct>
## proportion of pressing space bar in target trial
prop_press_space_summary <- data_with_demog %>%
select(subject, trial_pressed_space_bar, trial_stimulus_type) %>%
mutate(trial_pressed_space_bar = if_else(is.na(trial_pressed_space_bar), "no", trial_pressed_space_bar)) %>%
filter(trial_stimulus_type == "target") %>%
group_by(subject,trial_pressed_space_bar) %>%
summarize(
n = n()
) %>%
pivot_wider(names_from = trial_pressed_space_bar, values_from = n) %>%
mutate(
no = if_else(is.na(no), 0.0, as.numeric(no)),
yes = if_else(is.na(yes), 0.0, as.numeric(yes)),
sum = no + yes,
no_press_prop = no / sum
) %>%
select(subject, no_press_prop)
## `summarise()` regrouping output by 'subject' (override with `.groups` argument)
## proportion of pressing space bar in non-target trial
prop_wrong_press_summary <- data_with_demog %>%
select(subject, trial_pressed_space_bar, trial_stimulus_type) %>%
mutate(trial_pressed_space_bar = if_else(is.na(trial_pressed_space_bar), "no", trial_pressed_space_bar)) %>%
filter(trial_stimulus_type != "target") %>%
group_by(subject,trial_pressed_space_bar) %>%
summarize(
n = n()
) %>%
pivot_wider(names_from = trial_pressed_space_bar, values_from = n) %>%
mutate(
no = if_else(is.na(no), 0.0, as.numeric(no)),
yes = if_else(is.na(yes), 0.0, as.numeric(yes)),
sum = no + yes,
wrong_press_prop = yes / sum
) %>%
select(subject, wrong_press_prop)
## `summarise()` regrouping output by 'subject' (override with `.groups` argument)
prop_wrong_press_summary
## # A tibble: 2 x 2
## # Groups: subject [2]
## subject wrong_press_prop
## <chr> <dbl>
## 1 SS1602727692346 0.00481
## 2 SS1602730104581 0.0481
data_with_demog_spacebar <- left_join(data_with_demog, prop_press_space_summary, by = "subject")
data_with_demog_spacebar <- left_join(data_with_demog_spacebar, prop_wrong_press_summary, by = "subject")
filtered_spacebar <- data_with_demog_spacebar %>%
filter(no_press_prop > 0.25 | wrong_press_prop >0.25)
summary_lt <- data_with_demog %>%
filter(trial_type == "stimulus-presentation") %>%
summarize(
mean_lt = mean(as.numeric(trial_looking_time), na.rm = TRUE),
sd_lt = sd(trial_looking_time, na.rm = TRUE),
upper_lt = mean_lt + 3 * sd_lt,
lower_lt = mean_lt - 3 * sd_lt
)
UPPER_LT <- summary_lt %>% select(upper_lt) %>% pull()
LOWER_LT <- summary_lt %>% select(lower_lt) %>% pull()
summary_lt %>% kable()
mean_lt | sd_lt | upper_lt | lower_lt |
---|---|---|---|
1077.792 | 628.1518 | 2962.248 | -806.6632 |
trial_lt_to_cut <- data_with_demog %>%
filter(trial_type == "stimulus-presentation") %>%
filter(trial_looking_time < LOWER_LT | trial_looking_time > UPPER_LT)
# check if the participants are missing 25% trials after applying the trial-based exclusion criteria
num_total_trials <- data_with_demog %>%
filter(trial_type == "stimulus-presentation") %>%
group_by(subject) %>%
count()
num_to_cut_trials <- trial_lt_to_cut %>%
filter(trial_type == "stimulus-presentation") %>%
group_by(subject) %>%
count()
num_total_trials %>% kable()
subject | n |
---|---|
SS1602727692346 | 244 |
SS1602730104581 | 244 |
num_to_cut_trials %>% kable()
subject | n |
---|---|
SS1602727692346 | 9 |
SS1602730104581 | 1 |
transformation needed?
data_with_demog %>%
ggplot(aes(x = as.numeric(trial_looking_time))) +
geom_histogram() +
geom_vline(xintercept = UPPER_LT)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8 rows containing non-finite values (stat_bin).
data_with_demog %>%
filter(trial_type == "stimulus-presentation") %>%
group_by(trial_stimulus_type, task_type) %>%
summarize(
mean = mean(as.numeric(trial_looking_time), na.rm = TRUE),
sd = sd(as.numeric(trial_looking_time), na.rm = TRUE),
n = n(),
ci_range_95 = qt(1 - (0.05 / 2), n - 1) * (sd/sqrt(n)),
ci_lower = mean - ci_range_95,
ci_upper = mean + ci_range_95
) %>%
ggplot(aes(x = trial_stimulus_type, y = mean)) +
geom_point() +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
facet_wrap(~task_type)
## `summarise()` regrouping output by 'trial_stimulus_type' (override with `.groups` argument)
orirignal plot?
task_type_order <- c("all_simple", "all_complex", "mixed_simple_deviant", "mixed_complex_deviant")
data_with_demog %>%
filter(trial_type == "stimulus-presentation") %>%
filter(trial_stimulus_type != "target") %>%
group_by(trial_stimulus_type, task_type) %>%
summarize(
mean = mean(as.numeric(rt), na.rm = TRUE),
sd = sd(as.numeric(rt), na.rm = TRUE),
n = n(),
ci_range_95 = qt(1 - (0.05 / 2), n - 1) * (sd/sqrt(n)),
ci_lower = mean - ci_range_95,
ci_upper = mean + ci_range_95
) %>%
ggplot(aes(x = task_type, y = mean)) +
geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
facet_wrap(~trial_stimulus_type) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## `summarise()` regrouping output by 'trial_stimulus_type' (override with `.groups` argument)
(it’s a lie right now because not excluding anyone but it should look sth similar to this)
clean_data <- data_with_demog_spacebar %>%
filter(trial_type == "stimulus-presentation") %>%
select(subject, task_type, task_order_number, trial_stimulus_type, trial_stimulus, stimuli_type,
trial_looking_time, trial_pressed_space_bar, trial_space_bar_rt, age, ethnicity, gender, education, no_press_prop, wrong_press_prop) %>%
mutate(
trial_stimulus_complexity = stimuli_type,
trial_stimulus_path = gsub("<img src='", "", trial_stimulus),
trial_stimulus_path = gsub("' width ='500' height = '500' style='border:5px solid black'>", "", trial_stimulus_path),
demog_age = age,
demog_ethnicity = ethnicity,
demog_gender = gender,
demog_education = education,
target_no_press_percent = no_press_prop,
non_target_press_percent = wrong_press_prop,
trial_pressed_space_bar = if_else(is.na(trial_pressed_space_bar), "no", trial_pressed_space_bar)
) %>%
select(
subject, task_type, task_order_number,
trial_stimulus_path, trial_stimulus_type, trial_stimulus_complexity, trial_looking_time,
trial_pressed_space_bar, trial_space_bar_rt, target_no_press_percent, non_target_press_percent,
demog_age, demog_ethnicity, demog_gender, demog_education
)
clean_data %>% datatable()