library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/looking_time/preschooler-analysis
source(here("helper/clean_data.R"))
MERGED_DATA_PATH <- here("data/01_merged_data/merged_data.csv")
bing_d <- read_csv(here("data/bing_info.csv"))
## Rows: 212 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): bing_id, redcap_event_name, redcap_repeat_instrument
## dbl (4): redcap_repeat_instance, study_name_frank, child_age_today_scheduli...
## date (1): date_of_test
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bing_d <- bing_d %>% filter(study_name_frank == 16) %>%
select(bing_id, child_age_today_scheduling) %>%
mutate(child_age_group = case_when(
child_age_today_scheduling > 3 & child_age_today_scheduling <= 4 ~ "3",
child_age_today_scheduling > 4 & child_age_today_scheduling <= 5 ~ "4",
child_age_today_scheduling > 5 & child_age_today_scheduling <= 6 ~ "5",
)) %>%
## make sure each kid only participated once
distinct(bing_id, .keep_all = TRUE)
raw_df <- read_csv(MERGED_DATA_PATH) %>%
left_join(bing_d, by = "bing_id")
## Rows: 9472 Columns: 28
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): trial_type, internal_node_id, subject, responses, key_press, block...
## dbl (10): trial_index, time_elapsed, rt, minimum_viewing_duration, trial_loo...
## lgl (2): success, trial_stimulus
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
preprocessed df
main_d <- tidy_all_rt_task_data(raw_df)
## Adding missing grouping variables: `subject`
adult_d <- read_csv(here("data/adult_data.csv")) %>% mutate(child_age_group = "adult")
## Rows: 18198 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): subject, block_type, task_type, trial_type, stimulus_displayed, tas...
## dbl (5): block_number, deviant_position, trial_number, trial_looking_time, t...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
main_d <- main_d %>%
bind_rows(adult_d) %>%
mutate(participant_type = case_when(
child_age_group == "adult" ~ "adult",
TRUE ~ "kids"
)) %>%
mutate(deviant_position_print = case_when(
deviant_position == 2 ~ "Deviant at 2nd trial",
deviant_position == 4 ~ "Deviant at 4th trial",
deviant_position == 6 ~ "Deviant at 6th trial",
TRUE ~ "No Deviant"
))
# missing: memory question
exclusion criteria:
memory question and memory practice
failed_memory_test_kids <- raw_df %>%
select(subject,
stimulus_type,
memory_block_index, button_pressed, correct_answer) %>%
filter(stimulus_type == "memory_test") %>%
mutate(block_number = memory_block_index + 1,
memory_correct = ((button_pressed == 0 & correct_answer == "left") |
(button_pressed == 1 & correct_answer == "right"))) %>%
group_by(subject) %>%
summarise(sum_correct = sum(as.numeric(memory_correct))) %>%
filter(sum_correct <= 4) %>%
pull(subject)
failed_memory_practice_kids <- raw_df %>%
select(subject,
stimulus_type,
memory_block_index, button_pressed, correct_answer) %>%
filter(stimulus_type == "memory_practice") %>%
mutate(block_number = memory_block_index + 1,
memory_correct = ((button_pressed == 0 & correct_answer == "left") |
(button_pressed == 1 & correct_answer == "right"))) %>%
group_by(subject) %>%
summarise(sum_correct = sum(as.numeric(memory_correct))) %>%
filter(sum_correct == 0) %>%
pull(subject)
looking time
flat_looking_time <- main_d %>%
filter(participant_type == "kids") %>%
group_by(subject) %>%
mutate(sd_lt = (log(sd(trial_looking_time)))) %>%
filter(sd_lt < 0.15) %>%
pull(subject)
exclude participants
main_d <- main_d %>%
filter(!(subject %in% failed_memory_test_kids)) %>%
filter(!(subject %in% failed_memory_practice_kids))
exclude trial
summary_lt_d <- main_d %>%
filter(participant_type == "kids") %>%
summarise(
median = median(log(trial_looking_time)),
mad = mad(log(trial_looking_time)),
upper = median + 3 * mad,
lower = median - 3 * mad)
main_d <- main_d %>%
filter((participant_type == "kids" & log(trial_looking_time) > summary_lt_d$lower
& log(trial_looking_time) < summary_lt_d$upper) | participant_type == "adult"
)
final sample sie
main_d %>%
distinct(subject, child_age_group) %>%
group_by(child_age_group) %>%
count()
## # A tibble: 4 × 2
## # Groups: child_age_group [4]
## child_age_group n
## <chr> <int>
## 1 3 18
## 2 4 26
## 3 5 20
## 4 adult 380
main_d %>%
filter(participant_type == "kids") %>%
ggplot(aes(x = log(trial_looking_time), fill = as.factor(child_age_group))) +
geom_density(alpha = .3) +
facet_wrap(~trial_type)

kids only
main_d %>%
filter(participant_type == "kids") %>%
#filter(child_age_group > 3) %>%
ggplot(aes(x = trial_number, y = log(trial_looking_time))) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2)) +
stat_summary(geom = "line", fun.data = "mean_cl_boot", position = position_dodge(width = .2)) +
facet_wrap(~deviant_position_print)+
xlab("Trial Number") +
ylab("Looking Time (Log msc)")+
theme_classic()+
langcog::scale_color_solarized(name = "Participant Type")

kids vs adults
main_d %>%
#filter(participant_type == "kids") %>%
#filter(child_age_group > 3) %>%
ggplot(aes(x = trial_number, y = log(trial_looking_time))) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2),
aes(color = as.factor(participant_type))) +
stat_summary(geom = "line", fun.data = "mean_cl_boot", position = position_dodge(width = .2),
aes(color = as.factor(participant_type))
) +
facet_wrap(~deviant_position_print)+
xlab("Trial Number") +
ylab("Looking Time (Log msc)")+
theme_classic()+
langcog::scale_color_solarized(name = "Participant Type")

3 vs 4 vs 5
main_d %>%
filter(participant_type == "kids") %>%
ggplot(aes(x = trial_number, y = log(trial_looking_time))) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2),
aes(color = as.factor(child_age_group))) +
stat_summary(geom = "line", fun.data = "mean_cl_boot", position = position_dodge(width = .2),
aes(color = as.factor(child_age_group))) +
facet_wrap(~deviant_position_print)+
xlab("Trial Number") +
ylab("Looking Time (Log msc)")+
theme_classic()+
langcog::scale_color_solarized(name = "Age Group")

3 vs 4 vs 5 vs adults
main_d %>%
#filter(participant_type == "kids") %>%
ggplot(aes(x = trial_number, y = log(trial_looking_time))) +
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width = .2),
aes(color = as.factor(child_age_group))) +
stat_summary(geom = "line", fun.data = "mean_cl_boot", position = position_dodge(width = .2),
aes(color = as.factor(child_age_group))) +
facet_wrap(~deviant_position_print)+
xlab("Trial Number") +
ylab("Looking Time (Log msc)")+
theme_classic()+
langcog::scale_color_solarized(name = "Age Group")
