── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.3.0
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.1.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
df_summ <- read_csv ("df_summ.csv" )
Rows: 111669 Columns: 15
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): dataset_id, role, target, structure, language
dbl (10): trial_id, condition_id, room_num, stage_num, trial_num, rep_num, o...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
You may have “default” preferences
Do something basic
ggplot (df_summ, aes (x = trial_num, y = total_num_words)) +
geom_point ()
ggplot (df_summ, aes (x = trial_num, y = total_num_words, color = dataset_id)) +
geom_point ()
What geom?
some to consider:
geom_point() geom_smooth() stat_summary()
ggplot (df_summ, aes (x = rep_num, y = total_num_words, color = dataset_id)) +
geom_smooth (method = "glm" , se = F) +
stat_summary ()
`geom_smooth()` using formula = 'y ~ x'
No summary function supplied, defaulting to `mean_se()`
can do details of what summary function or what functional form and how to group
ggplot (df_summ, aes (x = rep_num, y = total_num_words)) +
geom_smooth (aes (group = interaction (dataset_id, stage_num), color = dataset_id), method = "lm" , formula = y ~ log (x)) +
geom_point ()
What axes?
trial versus rep num
ggplot (df_summ, aes (x = rep_num, y = total_num_words, color = dataset_id)) +
geom_smooth (aes (group = interaction (dataset_id, stage_num)), method = "lm" , formula = y ~ log (x))
pct vs raw number vs pct change
pct_change <- df_summ |>
filter (dataset_id != "mankewitz2025_compositional" ) |>
group_by (role, room_num, target, stage_num, option_size, game_id, group_size, structure, language) |>
arrange (rep_num) |>
mutate (last_words = lag (total_num_words), last_trial = lag (trial_id)) |>
filter (! is.na (last_words)) |>
mutate (change = total_num_words / last_words)
pct_change |>
ggplot (aes (x = rep_num, y = log (change), color = dataset_id)) +
geom_smooth (aes (group = interaction (dataset_id, stage_num)), method = "lm" , formula = y ~ log (x))
here’s a crazier one looking at how much shorter the next description is based on the length of the last one.
pct_change |>
ggplot (aes (x = last_words, y = log (change), color = dataset_id)) +
geom_smooth (method = "lm" , formula = y ~ log (x))
What data?
do you want stage 1 only? do you have any exclusions to apply? describer only?
are there categories that should be collapsed?
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = log (last_words), y = log (change), color = dataset_id)) +
geom_smooth ()
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Facets & groupings
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = log (last_words), y = log (change), color = as.factor (group_size))) +
geom_smooth ()
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = log (last_words), y = log (change), color = as.factor (condition_id))) +
geom_smooth () +
# facet_grid(group_size ~ option_size)
facet_wrap (~ group_size)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Think about color!
Loading required package: viridisLite
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = log (last_words), y = log (change), color = as.factor (group_size))) +
geom_smooth () +
scale_color_viridis (discrete = T)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Add back some rawer data or representation of spread
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = log (last_words), y = log (change), color = as.factor (condition_id))) +
geom_point (alpha = .01 ) +
geom_smooth () +
facet_wrap (~ group_size)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = log (last_words), y = log (change), color = as.factor (condition_id))) +
geom_point (alpha = .01 , color = "grey" ) +
geom_smooth () +
facet_wrap (~ group_size)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = log (last_words), y = log (change), group = condition_id, color = as.factor (group_size))) +
geom_point (alpha = .01 , color = "grey" ) +
geom_smooth () +
scale_color_viridis (discrete = T)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Annotations, label, themes
Are there any “zero-points” or baselines that should be marked?
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = log (last_words), y = log (change), color = as.factor (condition_id))) +
geom_point (alpha = .01 , color = "grey" ) +
geom_hline (yintercept = 0 , lty = "dashed" ) +
geom_smooth () +
facet_wrap (~ group_size)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Here we are still doing log scale, but labelling with actual numbers
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = last_words, y = change, color = as.factor (condition_id))) +
scale_x_log10 () +
scale_y_log10 () +
geom_point (alpha = .01 , color = "grey" ) +
geom_hline (yintercept = 1 , lty = "dashed" ) +
geom_smooth () +
facet_wrap (~ group_size)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Good labels for things
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = last_words, y = change, color = as.factor (condition_id))) +
scale_x_log10 () +
scale_y_log10 () +
geom_point (alpha = .01 , color = "grey" ) +
geom_hline (yintercept = 1 , lty = "dashed" ) +
geom_smooth () +
labs (x = "Words on previous repetition" , y = "Multiplicative change in length" ) +
facet_wrap (~ group_size) +
theme (legend.position = "none" )
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Not a strict ordering, can revisit different things!
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = last_words, y = change, color = as.factor (rep_num))) +
scale_x_log10 () +
scale_y_log10 () +
geom_point (alpha = .01 , color = "grey" ) +
geom_hline (yintercept = 1 , lty = "dashed" ) +
geom_smooth () +
scale_color_viridis (discrete = T) +
labs (x = "Words on previous repetition" , y = "Multiplicative change in length" , color = "Repetition number" ) +
theme (legend.position = "bottom" )
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = last_words, y = change, color = as.factor (rep_num))) +
scale_x_log10 () +
scale_y_log10 () +
geom_point (alpha = .01 , color = "grey" ) +
geom_hline (yintercept = 1 , lty = "dashed" ) +
geom_smooth () +
facet_wrap (~ group_size) +
scale_color_viridis (discrete = T) +
labs (x = "Words on previous repetition" , y = "Multiplicative change in length" , color = "Repetition number" ) +
theme (legend.position = "bottom" )
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
For really nice graphs, sometimes need to do things to legends
pct_change |>
filter (role == "describer" ) |>
filter (stage_num == 1 ) |>
ggplot (aes (x = last_words, y = change, color = as.factor (rep_num))) +
scale_x_log10 () +
scale_y_log10 () +
geom_point (alpha = .01 , color = "grey" ) +
geom_hline (yintercept = 1 , lty = "dashed" ) +
geom_smooth () +
facet_wrap (~ group_size) +
scale_color_viridis (discrete = T) +
guides (color = guide_legend (override.aes = list (linewidth = 5 , fill = NA ))) +
labs (x = "Words on previous repetition" , y = "Multiplicative change in length" , color = "Repetition number" ) +
theme (legend.position = "bottom" )
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'