── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1.9000 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
here() starts at /Users/visuallearninglab/Documents/visvocab
Note: The package "relayer" is highly experimental. Use at your own risk.
Loading required package: viridisLite
# Function to summarize whether a trial is usable based on whether the subject is looking at the screen for greater than 50% of the critical window
summarize_subj_usable_trials <- function(data, critical_window, suffix, additional_fields=NULL) {
paste("hi")
additional_fields <- additional_fields %||% list()
data %>%
filter(time_normalized_corrected >= critical_window[1] &
time_normalized_corrected <= critical_window[2]) %>%
group_by(SubjectInfo.subjID, Trials.trialID, Trials.ordinal, Trials.trialType) %>%
summarize(
length = n(),
useable_frames = sum(not_looking_away, na.rm = TRUE),
percent_usable = useable_frames / length,
usable = ifelse(percent_usable >= 0.5, 1, 0), # usable if at least 50% looking
mean_target_looking = mean(accuracy, na.rm = TRUE),
!!!additional_fields,
) %>%
rename_with(~ paste0(., "_", suffix), -c(SubjectInfo.subjID, Trials.trialID, Trials.ordinal, Trials.trialType))
}
# Function to compute whether a trial is usable based on whether both the critical window and the baseline window are usable
compute_usable_trial <- function(baseline_col, critical_col) {
case_when(
is.na(baseline_col) ~ 0,
is.na(critical_col) ~ 0,
baseline_col == 1 & critical_col == 1 ~ 1,
TRUE ~ 0
)
}
# Calculate mean, standard deviation, standard error and confidence intervals for data grouped across two variables
summarized_data <- function(data, x_var, y_var, group_var) {
return(data |>
group_by(across(all_of(c(x_var, group_var)))) |>
summarize(
#across(everything(), ~ if (n_distinct(.) == 1) first(.) else NA),
mean_value = mean(.data[[y_var]], na.rm = TRUE),
sd_value = sd(.data[[y_var]], na.rm = TRUE),
N = n(),
se = sd_value / sqrt(n()),
ci=qt(0.975, N-1)*sd_value/sqrt(N),
lower_ci=mean_value-ci,
upper_ci=mean_value+ci,
.groups = 'drop') |>
select(where(~ !all(is.na(.))))
)
}
# make aesthetics aware size scale, also use better scaling
scale_size_c <- function(name = waiver(), breaks = waiver(), labels = waiver(),
limits = NULL, range = c(1, 6), trans = "identity", guide = "legend", aesthetics = "size")
{
continuous_scale(aesthetics, "area", scales::rescale_pal(range), name = name,
breaks = breaks, labels = labels, limits = limits, trans = trans,
guide = guide)
}
# summarize target looking by input condition
summarize_data <- function(data,summary_field) {
return(data |>
summarize(N=n(),
#mean_age = mean(age),
#mean_age_mo = mean(age_mo),
average_corrected_target_looking=mean(corrected_target_looking,na.rm=TRUE),
se=sd(corrected_target_looking,na.rm=T)/sqrt(N),
ci=qt(0.975, N-1)*sd(corrected_target_looking,na.rm=T)/sqrt(N),
lower_ci=average_corrected_target_looking-ci,
upper_ci=average_corrected_target_looking+ci,
lower_se=average_corrected_target_looking-se,
upper_se=average_corrected_target_looking+se,
average_critical_window_looking=mean(mean_target_looking_critical_window,na.rm=TRUE),
critical_window_ci = qt(0.975, N-1)*sd(mean_target_looking_critical_window,na.rm=T)/sqrt(N),
critical_window_lower_ci=average_critical_window_looking-critical_window_ci,
critical_window_upper_ci=average_critical_window_looking+critical_window_ci) |>
rename_with(~ paste0(., "_", suffix), -c(SubjectInfo.subjID, Trials.trialID, Trials.ordinal, Trials.trialType)))
}
#stolen from peekbank/ peekds
#https://github.com/langcog/peekds/blob/master/R/generate_aoi.R
resample_aoi_trial <- function(df_trial, sample_duration=1000/30) {
print(paste0("Subject Number: ",unique(df_trial$sub_num), "; Trial Number: ", unique(df_trial$Trials.ordinal)))
t_origin <- df_trial$t_norm
data_origin <- df_trial$aoi
# create the new timestamps for resampling
t_start <- min(t_origin) - (min(t_origin) %% sample_duration)
t_resampled <- seq(from = t_start, to = max(t_origin),
by = sample_duration)
# exchange strings values with integers for resampling
# this step critical for interpolating missing vals quickly and correctly
aoi_num <- data_origin %>%
dplyr::recode(target = 1, distractor = 2, other = 3, missing = 4)
# start resampling with approx
aoi_resampled <- stats::approx(x = t_origin, y = aoi_num, xout = t_resampled,
method = "constant", rule = 2,
ties = "ordered")$y
aoi_resampled_recoded <- aoi_resampled %>%
dplyr::recode("1" = "target", "2" = "distractor",
"3" = "other", "4" = "missing")
# adding back the columns to match schema
dplyr::tibble(t_norm = t_resampled,
aoi = aoi_resampled_recoded,
trial_id = df_trial$trial_id[1],
administration_id = df_trial$administration_id[1])
}
resample_times <- function(df_table, sample_duration) {
# first check if this data frame has all the correct columns required for
# re-sampling
required_columns <- c("trial_id", "administration_id", "t_norm", "aoi")
# re-zero and normalize times first
# this is mandatory, comes from our decision that not linking resampling and
# centering causes a lot of problems
if (!all(required_columns %in% colnames(df_table))) {
stop(.msg("Resample times function requires the following columns to be
present in the dataframe:
{paste(required_columns, collapse = ', ')}. Times should be
re-zeroed and normalized first before being resampled!"))
}
# main resampling call
# start resampling process by iterating through every trial within every
# administration
df_out <- df_table %>%
dplyr::mutate(admin_trial_id = paste(.data$administration_id,
.data$trial_id, sep = "_")) %>%
split(.$admin_trial_id) %>%
purrr::map_df(resample_aoi_trial, sample_duration=sample_duration) %>%
dplyr::arrange(.data$administration_id, .data$trial_id)
return(df_out)
}Load data
looking_time_resampled_clean <- read.csv(here("data","processed_data","looking_times_resampled_clean.csv"))
all_looking_times <- read.csv(here("data","processed_data","looking_times_with_exclusion_info.csv"))
trial_metadata <- read.csv(here("data","metadata","trial_metadata.csv"))
trial_summary_data <- read.csv(here("data","processed_data","trial_summary_data.csv"))Overall timecourse plot of proportion target looking
#summarizing within subject for each time point
summarize_subj <- looking_time_resampled_clean %>%
filter(trial_exclusion == 0 & exclude_participant ==0 & exclude_participant_insufficient_data == 0) %>%
group_by(SubjectInfo.subjID, time_normalized_corrected) %>%
summarize(N=n(),
non_na_n = sum(!is.na(accuracy_transformed)),
mean_accuracy=mean(accuracy_transformed,na.rm=TRUE),
sd_accuracy=sd(accuracy_transformed,na.rm=TRUE),
se_accuracy=sd_accuracy/sqrt(non_na_n),
ci=qt(0.975, non_na_n-1)*sd(accuracy_transformed,na.rm=T)/sqrt(non_na_n),
lower_ci=mean_accuracy-ci,
upper_ci=mean_accuracy+ci) %>%
filter(non_na_n > 10) %>%
ungroup()Warning: There were 88 warnings in `summarize()`.
The first warning was:
ℹ In argument: `ci = qt(0.975, non_na_n - 1) * sd(accuracy_transformed, na.rm =
T)/sqrt(non_na_n)`.
ℹ In group 1: `SubjectInfo.subjID = "AWMRW7"` and `time_normalized_corrected =
-3700`.
Caused by warning in `qt()`:
! NaNs produced
ℹ Run `dplyr::last_dplyr_warnings()` to see the 87 remaining warnings.
`summarise()` has grouped output by 'SubjectInfo.subjID'. You can override
using the `.groups` argument.
#summarizing across subjects for each time point
summarize_across_subj <- summarize_subj %>%
group_by(time_normalized_corrected) %>%
dplyr::summarize(n=n(),
accuracy=mean(mean_accuracy,na.rm=TRUE),
sd_accuracy=sd(mean_accuracy,na.rm=TRUE),
se_accuracy=sd_accuracy/sqrt(n),
ci=qt(0.975, non_na_n-1)*sd(mean_accuracy,na.rm=T)/sqrt(non_na_n)) %>%
filter(n > 2)Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
always returns an ungrouped data frame and adjust accordingly.
`summarise()` has grouped output by 'time_normalized_corrected'. You can
override using the `.groups` argument.
looking_times <- ggplot(summarize_across_subj,aes(time_normalized_corrected,accuracy))+
xlim(-2000,4000)+
geom_errorbar(aes(ymin=accuracy-ci,ymax=accuracy+ci),width=0, alpha=0.2)+
#geom_point(alpha=0.2)+
geom_smooth(method="gam")+
geom_vline(xintercept=0,size=1.5)+
geom_hline(yintercept=0.5,size=1.2,linetype="dashed")+
geom_vline(xintercept=300,linetype="dotted")+
ylim(0,1)+
xlab("Time (normalized to target word onset) in ms")+
ylab("Proportion Target Looking")Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
looking_times`geom_smooth()` using formula = 'y ~ s(x, bs = "cs")'
Warning: Removed 127 rows containing non-finite outside the scale range
(`stat_smooth()`).
ggsave(here("data","figures","prop_looking_across_time.png"),looking_times,width=9,height=6,bg = "white")`geom_smooth()` using formula = 'y ~ s(x, bs = "cs")'
Warning: Removed 127 rows containing non-finite outside the scale range
(`stat_smooth()`).
Descriptive pilot plots
Usable trial stats
#Overall baseline-corrected proportion target looking by condition
looking_data_summarized <- trial_summary_data |>
filter(trial_exclusion == 0 & exclude_participant == 0 & exclude_participant_insufficient_data == 0) |>
left_join(trial_metadata) |>
arrange(AoA_Est_target)Joining with `by = join_by(Trials.trialID, Trials.targetImage,
Trials.distractorImage, Trials.imagePair)`
looking_data_by_subject_trial_type <- trial_summary_data |>
mutate(trial_exclusion_reason = as.factor(ifelse(is.na(trial_exclusion_reason), "included", trial_exclusion_reason))) |>
summarize(N = n(),
.by=c(SubjectInfo.subjID, Trials.trialType, trial_exclusion_reason))
looking_data_by_subject_trial_type$trial_exclusion_reason <- factor(looking_data_by_subject_trial_type$trial_exclusion_reason, levels = c( setdiff(unique(looking_data_by_subject_trial_type$trial_exclusion_reason), "included"), "included"))
ggplot(looking_data_by_subject_trial_type, aes(x = Trials.trialType, y = N, fill = trial_exclusion_reason)) +
geom_bar(stat = "identity", position = "stack") + # Use stacked bar chart
facet_wrap(~ SubjectInfo.subjID, ncol = 5) + # Create separate bars for each subject
labs(x = "Trial Type", y = "Count (N)", title = "Trial Exclusions by Trial Type and Subject", fill = "Trial Exclusion Reason") +
theme_minimal() + # Apply minimal theme
theme(axis.text.x = element_text(angle = 45, hjust = 1), strip.text = element_text(size = 8)) +
scale_fill_brewer(palette = "Set3")Subject-level timecourses
looking_times <- ggplot(summarize_subj,aes(time_normalized_corrected,mean_accuracy,color=SubjectInfo.subjID))+
xlim(-2000,4000)+
geom_errorbar(aes(ymin=mean_accuracy-ci,ymax=mean_accuracy+ci),width=0, alpha=0.2)+
#geom_point(alpha=0.2)+
geom_smooth(method="gam")+
geom_vline(xintercept=0,size=1.5)+
geom_hline(yintercept=0.5,size=1.2,linetype="dashed")+
geom_vline(xintercept=300,linetype="dotted")+
ylim(0,1)+
xlab("Time (normalized to target word onset) in ms")+
ylab("Proportion Target Looking") +
scale_color_brewer(palette = "Set3", name="Participant ID")
looking_times`geom_smooth()` using formula = 'y ~ s(x, bs = "cs")'
Warning: Removed 133 rows containing non-finite outside the scale range
(`stat_smooth()`).
ggsave(here("data","figures","prop_looking_across_time_and_subject.png"),looking_times,width=9,height=6,bg = "white")`geom_smooth()` using formula = 'y ~ s(x, bs = "cs")'
Warning: Removed 133 rows containing non-finite outside the scale range
(`stat_smooth()`).
Estimating item and subject-level noise
target_looking_item_subject_level <- summarized_data(looking_data_summarized, "Trials.targetImage", "corrected_target_looking", c("SubjectInfo.subjID", "SubjectInfo.testAge", "AoA_Est_target"))Warning: There were 67 warnings in `summarize()`.
The first warning was:
ℹ In argument: `ci = qt(0.975, N - 1) * sd_value/sqrt(N)`.
ℹ In group 1: `Trials.targetImage = "acorn"`, `SubjectInfo.subjID = "AWMRW7"`,
`SubjectInfo.testAge = 720`, `AoA_Est_target = 5.95`.
Caused by warning in `qt()`:
! NaNs produced
ℹ Run `dplyr::last_dplyr_warnings()` to see the 66 remaining warnings.
target_looking_item_level <- summarized_data(target_looking_item_subject_level |> rename(mean_target_looking = mean_value), "Trials.targetImage", "mean_target_looking", "AoA_Est_target")
target_looking_subject_level <- summarized_data(looking_data_summarized, "SubjectInfo.subjID", "corrected_target_looking", "SubjectInfo.testAge")Subject-level performance
subj_performances <- ggplot(target_looking_subject_level,aes(reorder(SubjectInfo.subjID, mean_value),mean_value))+
geom_hline(yintercept=0,linetype="dashed")+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0)+
(geom_point(aes(age_size=SubjectInfo.testAge/30)) |> rename_geom_aes(new_aes = c("size" = "age_size")))+
(geom_jitter(data=target_looking_item_subject_level |> mutate(Trials.targetImage = reorder(Trials.targetImage, AoA_Est_target)), aes(x=SubjectInfo.subjID, y=mean_value, color=Trials.targetImage, aoa_size = AoA_Est_target), alpha=0.3, width=0.2) |> rename_geom_aes(new_aes = c("size" = "aoa_size"))) +
xlab("Participant ID")+
ylab("Proportion of time looking at the target over the distractor")+
ggtitle("Mean proportion of target looking across subjects") +
theme(axis.title.x = element_text(face="bold", size=15, vjust=-1),
axis.text.x = element_text(size=10,angle=0,vjust=0.5),
axis.title.y = element_text(face="bold", size=15),
axis.text.y = element_text(size=10),
strip.text.x = element_text(size = 10,face="bold")
) +
scale_y_continuous(breaks = seq(-1, 1, by = 0.2)) +
scale_size_c(aesthetics = "age_size",name = "Age of participant in months", range=c(2,4), guide = guide_legend(order = 2)) +
scale_size_c( aesthetics = "aoa_size",name = "Est. AoA of target words in years", guide = guide_legend(order = 1)) +
scale_color_viridis_d(name = "Target words",option="D") +
guides(
size = guide_legend(position = "bottom", order = 2),
color = guide_legend(position = "right", order = 1)
)Warning in geom_point(aes(age_size = SubjectInfo.testAge/30)): Ignoring unknown
aesthetics: age_size
Warning in geom_jitter(data = mutate(target_looking_item_subject_level, :
Ignoring unknown aesthetics: aoa_size
Warning: The `scale_name` argument of `continuous_scale()` is deprecated as of ggplot2
3.5.0.
Warning: The `trans` argument of `continuous_scale()` is deprecated as of ggplot2 3.5.0.
ℹ Please use the `transform` argument instead.
subj_performancesggsave(here("data","figures","subj_performances.png"),subj_performances,width=9,height=7,bg = "white")Item-level performance
item_performances <- ggplot(target_looking_item_level, aes(reorder(Trials.targetImage, mean_value), mean_value)) +
geom_hline(yintercept=0,linetype="dashed")+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0, alpha=0.2)+
(geom_point(aes(aoa_size=AoA_Est_target)) |> rename_geom_aes(new_aes = c("size" = "aoa_size")))+
(geom_jitter(data=target_looking_item_subject_level, aes(x=Trials.targetImage, y=mean_value, color=SubjectInfo.subjID, age_size = SubjectInfo.testAge/30), alpha=0.3, width=0.2) |> rename_geom_aes(new_aes = c("size" = "age_size"))) +
xlab("Target image")+
ylab("Proportion of target looking")+
ggtitle("Mean proportion of target looking across target items") +
theme(axis.title.x = element_text(face="bold", size=15, vjust=-1),
axis.text.x = element_text(size=10,angle=0,vjust=0.5),
axis.title.y = element_text(face="bold", size=15),
axis.text.y = element_text(size=10),
strip.text.x = element_text(size = 10,face="bold"),
plot.margin = margin(t = 10, r = 10, b = 30, l = 10)
) +
scale_y_continuous(breaks = seq(-1, 1, by = 0.2)) +
scale_size_c(aesthetics = "age_size",name = "Age of participant in months", range=c(2,4), guide = guide_legend(order = 2)) +
scale_size_c( aesthetics = "aoa_size",name = "Est. AoA of target words in years", guide = guide_legend(order = 1)) +
scale_color_viridis_d(name = "Participant IDs",option="D") +
guides(
color = "none"
) Warning in geom_point(aes(aoa_size = AoA_Est_target)): Ignoring unknown
aesthetics: aoa_size
Warning in geom_jitter(data = target_looking_item_subject_level, aes(x =
Trials.targetImage, : Ignoring unknown aesthetics: age_size
item_performancesggsave(here("data","figures","item_performances.png"),item_performances,width=15,height=10,bg = "white")
trial_performances <- ggplot(summarized_data(looking_data_summarized, "Trials.trialID", "corrected_target_looking", c("Trials.trialID", "AoA_Est_target")), aes(reorder(Trials.trialID, mean_value), mean_value)) +
geom_hline(yintercept=0,linetype="dashed")+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0, alpha=0.2)+
(geom_point(aes(aoa_size=AoA_Est_target)) |> rename_geom_aes(new_aes = c("size" = "aoa_size")))+
(geom_jitter(data=looking_data_summarized,aes(x=Trials.trialID, y=corrected_target_looking, color=SubjectInfo.subjID, age_size = SubjectInfo.testAge/30), alpha=0.3, width=0.2) |> rename_geom_aes(new_aes = c("size" = "age_size"))) +
xlab("Trial type")+
ylab("Proportion of target looking")+
ggtitle("Mean proportion of target looking across trial types") +
scale_size_c(aesthetics = "age_size",name = "Age of participant in months", range=c(2,4), guide = guide_legend(order = 2)) +
scale_size_c( aesthetics = "aoa_size",name = "Est. AoA of target words in years", guide = guide_legend(order = 1)) +
scale_color_viridis_d(name = "Participant IDs",option="D") +
coord_cartesian(ylim = c(-1, 1)) +
theme(axis.title.x = element_text(face="bold", size=15, vjust=-1),
axis.text.x = element_text(size=8,angle=45,hjust=1),
axis.title.y = element_text(face="bold", size=15),
axis.text.y = element_text(size=15),
strip.text.x = element_text(size = 10,face="bold"),
aspect.ratio = 1,
plot.margin = margin(t = 10, r = 10, b = 30, l = 10)
) +
guides(
color = "none"
) Warning in geom_point(aes(aoa_size = AoA_Est_target)): Ignoring unknown
aesthetics: aoa_size
Warning in geom_jitter(data = looking_data_summarized, aes(x = Trials.trialID,
: Ignoring unknown aesthetics: age_size
trial_performancesggsave(here("data","figures","trial_performances.png"),trial_performances,width=15,height=10,bg = "white")Condition-based performance by subject
looking_data_by_condition <- summarized_data(looking_data_summarized, "Trials.trialType", "corrected_target_looking", "SubjectInfo.subjID")
looking_by_part_condition <- ggplot(looking_data_by_condition, aes(x = Trials.trialType, y = mean_value)) +
geom_bar(stat = "identity", aes(fill = ifelse(!xor(lower_ci > 0, upper_ci > 0), "Above chance", "Chance"))) +
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0, alpha=0.3) +
geom_jitter(data = looking_data_summarized, aes(x=Trials.trialType, y=corrected_target_looking, aoa_size = AoA_Est_target), width=0.1, alpha=0.3, size=2) +
facet_wrap(~ SubjectInfo.subjID, ncol = 5) + # Create separate bars for each subject
labs(x = "Trial type", y = "Prop. of target looking over the distractor", title = "Proportion of target looking by participant and condition type", fill = "Directional looking pattern") +
theme_minimal() + # Apply minimal theme
theme(axis.text.x = element_text(), strip.text = element_text(size = 8)) +
scale_fill_brewer(palette = "Set4")Warning in geom_jitter(data = looking_data_summarized, aes(x =
Trials.trialType, : Ignoring unknown aesthetics: aoa_size
Warning: Unknown palette: "Set4"
looking_by_part_conditionggsave(here("data","figures","looking_by_participant_condition.png"),looking_by_part_condition,width=15,height=5,bg = "white")Easy vs hard trial plots
Proportion of target looking for easy vs hard trials
avg_corrected_target_looking_by_condition <- looking_data_summarized %>%
group_by(SubjectInfo.subjID, Trials.trialType) %>%
summarize(N=n(),
#mean_age = mean(age),
#mean_age_mo = mean(age_mo),
average_corrected_target_looking=mean(corrected_target_looking,na.rm=TRUE),
se=sd(corrected_target_looking,na.rm=T)/sqrt(N),
ci=qt(0.975, N-1)*sd(corrected_target_looking,na.rm=T)/sqrt(N),
lower_ci=average_corrected_target_looking-ci,
upper_ci=average_corrected_target_looking+ci,
lower_se=average_corrected_target_looking-se,
upper_se=average_corrected_target_looking+se,
average_critical_window_looking=mean(mean_target_looking_critical_window,na.rm=TRUE),
critical_window_ci = qt(0.975, N-1)*sd(mean_target_looking_critical_window,na.rm=T)/sqrt(N),
critical_window_lower_ci=average_critical_window_looking-critical_window_ci,
critical_window_upper_ci=average_critical_window_looking+critical_window_ci)`summarise()` has grouped output by 'SubjectInfo.subjID'. You can override
using the `.groups` argument.
#baseline-corrected target looking summarized overall
overall_corrected_target_looking_by_condition <- avg_corrected_target_looking_by_condition %>%
group_by(Trials.trialType) %>%
summarize(N=n(),
corrected_target_looking=mean(average_corrected_target_looking,na.rm=TRUE),
ci=qt(0.975, N-1)*sd(average_corrected_target_looking,na.rm=T)/sqrt(N),
lower_ci=corrected_target_looking-ci,
upper_ci=corrected_target_looking+ci)
overall_corrected_target_looking_by_condition %>%
knitr::kable()| Trials.trialType | N | corrected_target_looking | ci | lower_ci | upper_ci |
|---|---|---|---|---|---|
| easy | 4 | -0.0536085 | 0.3000025 | -0.3536110 | 0.2463940 |
| easy-distractor | 4 | 0.1586855 | 0.3223870 | -0.1637015 | 0.4810725 |
| hard | 4 | 0.0856846 | 0.1124564 | -0.0267717 | 0.1981410 |
| hard-distractor | 4 | 0.0503629 | 0.3174820 | -0.2671192 | 0.3678449 |
set.seed(2)
avg_corrected_target_looking_by_condition <- avg_corrected_target_looking_by_condition |> filter(Trials.trialType == "easy" | Trials.trialType == "hard")
overall_corrected_target_looking_by_condition <- overall_corrected_target_looking_by_condition |>
filter(Trials.trialType == "easy" | Trials.trialType == "hard")
jitterer <- position_jitter(width = .05,seed=1)
overall_condition_plot <- ggplot(avg_corrected_target_looking_by_condition, aes(x=Trials.trialType,y=average_corrected_target_looking, fill=Trials.trialType))+
geom_half_violin(data=filter(avg_corrected_target_looking_by_condition, Trials.trialType=="easy"),position = position_nudge(x = -.1, y = 0), width=1,trim = FALSE, alpha = .8,color=NA,side="l")+
geom_half_violin(data=filter(avg_corrected_target_looking_by_condition, Trials.trialType=="hard"),position = position_nudge(x = .1, y = 0), width=1,trim = FALSE, alpha = .8,color=NA,side="r")+
geom_path(aes(group=SubjectInfo.subjID),color="black",fill=NA,alpha=0.15,size=0.75,position=jitterer)+ geom_point(aes(color=Trials.trialType,group=SubjectInfo.subjID), size = 2.5, alpha=0.15,position=jitterer)+
geom_point(data=overall_corrected_target_looking_by_condition,aes(y=corrected_target_looking),color="black",size=5)+
geom_line(data=overall_corrected_target_looking_by_condition,aes(y=corrected_target_looking,group=1),color="black",size=3)+
geom_errorbar(data=overall_corrected_target_looking_by_condition,aes(y=corrected_target_looking,ymin=lower_ci,ymax=upper_ci),width=0,size=1.2,color="black")+
geom_hline(yintercept=0,linetype="dashed")+
theme(legend.position="none")+
xlab("Distractor Difficulty Condition")+
ylab("Baseline-Corrected\nProportion Target Looking")+
theme(axis.title.x = element_text(face="bold", size=20),
axis.text.x = element_text(size=14),
axis.title.y = element_text(face="bold", size=20),
axis.text.y = element_text(size=16),
strip.text.x = element_text(size = 16,face="bold"))Warning in geom_path(aes(group = SubjectInfo.subjID), color = "black", fill =
NA, : Ignoring unknown parameters: `fill`
overall_condition_plotggsave(here("data","figures","condition_based_looking.png"),overall_condition_plot,width=9,height=6,bg = "white")Target image difficulty
# summarize average accuracy within participant (by word alone)
condition_based_looking <- looking_data_summarized |>
filter(!grepl("distractor", Trials.trialType)) |>
distinct(SubjectInfo.subjID, Trials.trialID, Trials.targetImage, corrected_target_looking)
target_looking_by_target_word <- looking_data_summarized |>
filter(!grepl("distractor", Trials.trialType)) |>
group_by(SubjectInfo.subjID, Trials.targetImage) |>
summarize(target_looking_diff = corrected_target_looking[match("easy", Trials.trialType)] - corrected_target_looking[match("hard", Trials.trialType)],
baseline_window_looking_diff = mean_target_looking_baseline_window[match("easy", Trials.trialType)] - mean_target_looking_baseline_window[match("hard", Trials.trialType)],
critical_window_looking_diff = mean_target_looking_critical_window[match("easy", Trials.trialType)] - mean_target_looking_critical_window[match("hard", Trials.trialType)])`summarise()` has grouped output by 'SubjectInfo.subjID'. You can override
using the `.groups` argument.
#clean names for individual images for plot
overall_target_looking_by_word <- target_looking_by_target_word %>%
filter(!is.na(target_looking_diff)) %>%
group_by(Trials.targetImage) %>%
summarize(N=n(),
corrected_target_looking=mean(target_looking_diff,na.rm=TRUE),
ci=qt(0.975, N-1)*sd(target_looking_diff,na.rm=T)/sqrt(N),
lower_ci=corrected_target_looking-ci,
upper_ci=corrected_target_looking+ci
)Warning: There were 2 warnings in `summarize()`.
The first warning was:
ℹ In argument: `ci = qt(0.975, N - 1) * sd(target_looking_diff, na.rm =
T)/sqrt(N)`.
ℹ In group 1: `Trials.targetImage = "acorn"`.
Caused by warning in `qt()`:
! NaNs produced
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
word_prefs <- ggplot(overall_target_looking_by_word,aes(reorder(Trials.targetImage,corrected_target_looking),corrected_target_looking))+
geom_hline(yintercept=0,linetype="dashed")+
geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0)+
geom_point(aes(size=N))+
xlab("Target Word")+
ylab("Easy trial prefered target looking")+
theme(axis.title.x = element_text(face="bold", size=15),
axis.text.x = element_text(size=10,angle=90,vjust=0.5,hjust=1),
axis.title.y = element_text(face="bold", size=15),
axis.text.y = element_text(size=10),
strip.text.x = element_text(size = 10,face="bold")
) +
scale_y_continuous(breaks = seq(-1, 1, by = 0.2)) +
scale_size_continuous(name = "Number of participants")
word_prefsggsave(here("data","figures","easy_trial_pref_by_word.png"),word_prefs,width=9,height=6,bg = "white")Baseline image-pair preferences
saliency_effects <- looking_data_summarized |>
# Calculating the proportion of time looking at the target word even if it isn't the target word in that particular study
mutate(original_target_looking_baseline_window = ifelse(grepl("distractor", Trials.trialType), 1 - mean_target_looking_baseline_window, mean_target_looking_baseline_window),
original_target_looking_critical_window = ifelse(grepl("distractor", Trials.trialType), 1 - mean_target_looking_critical_window, mean_target_looking_critical_window)) |>
summarize(mean_baseline_looking = mean(original_target_looking_baseline_window),
mean_critical_looking = mean(original_target_looking_critical_window),
.by = Trials.imagePair)CLIP similarity plots
Comparing proportion of looking time to CLIP cosine similarities
proportions <- looking_data_summarized |>
filter(!is.na(corrected_target_looking)) |>
summarize(N=n(), prop_target_looking = mean(corrected_target_looking+0.5),
prop_distractor_looking = mean(-corrected_target_looking+0.5),
.by = c(Trials.trialID, Trials.targetImage, Trials.distractorImage)) |>
filter(N > 2)
ggplot(summarized_data(looking_data_summarized, "Trials.trialID", "corrected_target_looking", c("Trials.trialID", "Trials.targetImage", "Trials.distractorImage", "text_similarity", "cor")), aes(x=cor, y = mean_value)) +
geom_point() +
geom_smooth(method="lm") +
geom_label_repel(aes(label=paste(Trials.targetImage, "-", Trials.distractorImage))) +
ylab("Probability of looking at target") +
xlab("CLIP text similarity") +
ggpubr::stat_cor(method = "pearson")`geom_smooth()` using formula = 'y ~ x'
Examining linear correlation between similarity types
cor(data.frame(trial_metadata$multimodal_similarity, trial_metadata$image_similarity, trial_metadata$text_similarity, trial_metadata$cor)) trial_metadata.multimodal_similarity
trial_metadata.multimodal_similarity 1.0000000
trial_metadata.image_similarity 0.9421492
trial_metadata.text_similarity 0.9069870
trial_metadata.cor 0.7947394
trial_metadata.image_similarity
trial_metadata.multimodal_similarity 0.9421492
trial_metadata.image_similarity 1.0000000
trial_metadata.text_similarity 0.7721763
trial_metadata.cor 0.6225654
trial_metadata.text_similarity
trial_metadata.multimodal_similarity 0.9069870
trial_metadata.image_similarity 0.7721763
trial_metadata.text_similarity 1.0000000
trial_metadata.cor 0.8804124
trial_metadata.cor
trial_metadata.multimodal_similarity 0.7947394
trial_metadata.image_similarity 0.6225654
trial_metadata.text_similarity 0.8804124
trial_metadata.cor 1.0000000
cor.test(trial_metadata$image_similarity, trial_metadata$text_similarity)
Pearson's product-moment correlation
data: trial_metadata$image_similarity and trial_metadata$text_similarity
t = 6.6562, df = 30, p-value = 2.264e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5795199 0.8830943
sample estimates:
cor
0.7721763