── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.1.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)library(scales)
Attaching package: 'scales'
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
library(viridis)
Loading required package: viridisLite
Attaching package: 'viridis'
The following object is masked from 'package:scales':
viridis_pal
library(here)
here() starts at /Users/visuallearninglab/Documents/kenya_draw
library(stringr)
Helpers
# Helper function to summarize datasummarized_data <-function(data, x_var, y_var, group_var) {return(data %>%group_by_at(c(x_var, group_var)) %>%summarise(mean_value =mean(.data[[y_var]], na.rm =TRUE),sd_value =sd(.data[[y_var]], na.rm =TRUE),n =n(),se = sd_value /sqrt(n()),ci_lower = mean_value -qt(1- (0.05/2), n -1) * se,ci_upper = mean_value +qt(1- (0.05/2), n -1) * se,.groups ='drop') )}# Flexible age plot functionage_plot <-function(df_participant, df_summary, x_var ="age", y_var, group_var ="location",facet_var =NULL, plot_title,plot_subtitle ="Participant means and age means with 95% CIs", y_label,y_format ="number", # "percent" or "number"smooth_method ="loess") {# Create base plot with participant-level data p <-ggplot(data = df_participant, aes_string(color = group_var, x = x_var, y = y_var)) +# Participant-level meansgeom_jitter(alpha =0.3, size =1.5, width =0.1) +# Age-level means with 95% CIgeom_pointrange(data = df_summary,aes(x = .data[[x_var]], y = mean_value,ymin = ci_lower, ymax = ci_upper), size =0.8) +# Smoothed trendgeom_smooth(aes_string(x = x_var, y = y_var),method = smooth_method, se =TRUE, alpha =0.15)#+coord_cartesian(ylim = c(NA, 1))# Add faceting - all plots in single rowif (!is.null(facet_var)) {if (length(facet_var) ==1) { p <- p +facet_wrap(as.formula(paste("~", facet_var)), scales ="free_x", nrow =1) } elseif (length(facet_var) ==2) { p <- p +facet_grid(as.formula(paste(facet_var[1], "~", facet_var[2])), scales ="free_x") } } else { p <- p +facet_wrap(as.formula(paste("~", group_var)), nrow =1) }# Format y-axisif (y_format =="percent") { p <- p +scale_y_continuous(labels =percent_format(accuracy =1)) } else { p <- p +scale_y_continuous(labels =number_format(accuracy =0.01)) }# Add labels and theme p <- p +labs(title = plot_title,subtitle = plot_subtitle,x ="Age (years)",y = y_label ) +scale_color_viridis_d(begin =0, end =0.5) +theme_classic(base_size =12)# Add correlation if single facet#if (is.null(facet_var) || length(facet_var) == 1) {# p <- p + ggpubr::stat_cor(alpha = 0.4)#}return(p)}
library(dplyr)library(ggplot2)library(scales) # percent_format# 1) participant‑level meansdf_part <- df %>%group_by(participant_id, age, location) %>%# keep location for facetssummarise(recognizability =mean(recognizability, na.rm =TRUE),distance =mean(distance, na.rm=TRUE),distance_euclidean =mean(distance_euclidan, na.rm=TRUE),.groups ="drop") |>filter(age <10)# 2) age‑wise mean + CI within each locationdf_age_rec <-summarized_data(df_part, "age", "recognizability", "location")# Create recognizability plotsrec.plot1 <-age_plot(df_participant = df_part,df_summary = df_age_rec,y_var ="recognizability",plot_title ="Drawing Recognizability by Age Across Locations",y_label ="Recognizability",y_format ="number")
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.
df_part_cat <- df |>group_by(participant_id, age, location, drawing_category) |>summarise(recognizability =mean(recognizability, na.rm =TRUE),distance =mean(distance, na.rm=TRUE),distance_euclidean =mean(distance_euclidan, na.rm=TRUE),.groups ="drop") |>filter(age <10)df_age_cat <-summarized_data(df_part_cat, "age", "recognizability", c("location", "drawing_category"))rec.plot2 <-age_plot(df_participant = df_part_cat,df_summary = df_age_cat,y_var ="recognizability",plot_title ="Drawing Recognizability by Age Across Locations and Categories",plot_subtitle ="Participant means and age means with 95% CIs",y_label ="Recognizability",facet_var=c("location", "drawing_category"),y_format ="number")
Quantifying how similar the drawings are one to another. Lower values indicate drawings are more consistent within that bucket.
# For distance plot with categories:df_age_dist <-summarized_data(df_part, "age", "distance", c("location"))df_age_dist_euclidean <-summarized_data(df_part, "age", "distance_euclidean", c("location"))df_age_dist_cat <-summarized_data(df_part_cat, "age", "distance", c("location", "drawing_category"))centroid.plot1 <-age_plot(df_participant = df_part,df_summary = df_age_dist_euclidean,y_var ="distance_euclidean",plot_title ="Drawing Euclidean distance from Centroid by Age\n Across Locations and Categories",y_label ="Euclidean distance",y_format ="number")centroid.plot2 <-age_plot(df_participant = df_part,df_summary = df_age_dist,y_var ="distance",plot_title ="Drawing cosine dissimilarity from Centroid by Age\n Across Locations and Categories",y_label ="Cosine dissimilarity",y_format ="number")centroid.plot3 <-age_plot(df_participant = df_part_cat,df_summary = df_age_dist_cat,y_var ="distance",plot_title ="Drawing dissimilarity from Centroid by Age\n Across Locations and Categories",plot_subtitle ="Participant means and age means with 95% CIs",y_label ="Cosine distance from centroid",facet_var=c("location", "drawing_category"),y_format ="number")