Rows: 4008 Columns: 25
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (6): object, globalID, category, childSiblingAges, feedback, age_half
dbl (16): aoa, sessionID, Frequency_normalized_val, TotalCount_normalized_v...
dttm (3): startTimestamp, endTimestamp, session1_time
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 49 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): object, category
dbl (4): mean_item, sd_item, margin_of_error, cv
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
numeric_cols <-names(sessions_data)base_names <-unique(gsub("_[12]$", "", numeric_cols[grepl("numeric_", numeric_cols)]))# Create plots for each numeric column pairplots <-map(base_names, function(base) { col1 <-paste0(base, "_1") col2 <-paste0(base, "_2")ggplot(sessions_data, aes_string(x = col1, y = col2)) +geom_jitter() +geom_smooth(method="lm") +labs(x =paste("Session 1"), y =paste("Session 2"), title =paste("Session 1 vs. 2:", base)) +theme_minimal() + ggpubr::stat_cor()})
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
# View all plots in a gridplot_grid(plotlist = plots, ncol =3)
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Correlation by participant
numeric_data <- sessions_data |>select(globalID, object, category, contains("numeric")) |>pivot_longer(cols =contains("numeric"), names_to =c("question", "session"), names_pattern ="(.*)_(.*)" ) |>filter(!is.na(value))wide_data <- numeric_data |>pivot_wider(names_from = session,values_from = value )# Group by globalID and question, and compute correlation across objectscorrelation_data <- wide_data |>group_by(globalID, question) |>summarise(n =n(),cor_test =list(cor.test(`1`, `2`, use ="complete.obs")),.groups ="drop" ) |>mutate(correlation_value = purrr::map_dbl(cor_test, ~ .x$estimate),p_value = purrr::map_dbl(cor_test, ~ .x$p.value),significant = p_value <0.05,ci_low =map_dbl(cor_test, ~ .x$conf.int[1]), ci_high =map_dbl(cor_test, ~ .x$conf.int[2]) )# Plot with custom y-axis breaks and significance labelingggplot(correlation_data, aes(x = globalID, y = correlation_value, fill = question)) +geom_bar(stat ="identity", position =position_dodge(width =0.8)) +# Adjust the dodge widthgeom_text(aes(label =ifelse(significant, "*", "")),vjust =-0.5,position =position_dodge(width =0.8), # Match dodge widthsize =4,color ="red" ) +geom_errorbar(aes(ymin = ci_low, ymax = ci_high),width =0.2,position =position_dodge(width =0.8) # Match dodge width ) +scale_y_continuous(breaks =seq(-1, 1, by =0.2),limits =c(-1, 1),expand =c(0, 0) # Remove space at the top and bottom ) +theme_minimal() +labs(title ="Session 1 vs 2 Correlation Across Objects by Participant",x ="Participant ID",y ="Session 1-Session 2 correlation",fill ="Question" ) +theme(axis.text.x =element_text(angle =45, hjust =1),axis.ticks =element_blank() # Remove axis ticks )
Correlation by object
group_counts <- wide_data |>group_by(object, question, category) |>summarise(n_pairs =sum(!is.na(`1`) &!is.na(`2`)), .groups ="drop") |>filter(n_pairs >=5)filtered_data <- wide_data |>inner_join(group_counts, by =c("object", "question", "category"))cor_per_object_variable <- filtered_data |>group_by(object, question, category) |>summarise(n =n(),cor_test =list(cor.test(`1`, `2`, use ="complete.obs")),.groups ="drop" ) |>mutate(correlation_value =map_dbl(cor_test, ~ .x$estimate),p_value =map_dbl(cor_test, ~ .x$p.value),conf_low =map_dbl(cor_test, ~ .x$conf.int[1]), conf_high =map_dbl(cor_test, ~ .x$conf.int[2]) )# Average per object, calculate CI for the mean of correlationsavg_correlation_per_object <- cor_per_object_variable |>group_by(object, category) |>summarise(avg_correlation =mean(correlation_value, na.rm =TRUE),sd_correlation =sd(correlation_value, na.rm =TRUE),avg_count =mean(n),n =n(),ci_low =mean(conf_low, na.rm =TRUE), # Lower bound of CIci_high =mean(conf_high, na.rm =TRUE), # Upper bound of CIt_test =list(t.test(correlation_value)),.groups ="drop" ) |>mutate(p_value =map_dbl(t_test, ~ .x$p.value),significant = p_value <0.05 )# Sort by average correlation and plot with CIsavg_correlation_per_object <- avg_correlation_per_object |>arrange(desc(avg_correlation)) # |> filter(avg_correlation > 0.5 | avg_correlation < 0.3)ggplot(avg_correlation_per_object, aes(x =reorder(object, -avg_correlation), y = avg_correlation)) +geom_bar(stat ="identity", aes(fill = category)) +geom_errorbar(aes(ymin = ci_low, ymax = ci_high),width =0.2 ) +geom_text(aes(label =ifelse(significant, "*", "")),vjust =-0.5,size =4 ) +scale_color_discrete(name ="Category") +scale_y_continuous(breaks =seq(-0.5, 1, by =0.2),limits =c(-0.5, 1) ) +theme_minimal() +labs(title ="Average Correlation Across Session 1 and 2",x ="Object",y ="Average Correlation (95% CI)",color ="Category"# Add a label for the color legend ) +theme(axis.text.x =element_text(angle =90, hjust =1, size=14))