Packages
Loading required package: tidyverse
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Loading required package: glue
Loading required package: here
here() starts at /Users/foreilly/Documents/levante-pilots
Loading required package: viridis
Loading required package: viridisLite
Loading required package: quarto
Loading required package: ggthemes
Loading required package: GGally
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
Helper functions
Get trog data
Code
trog <- load_task_data ("trog" )
Joining with `by = join_by(site, run_id)`
Code
# A tibble: 43,083 × 15
site task_id dataset user_id run_id trial_id trial_number item_uid
<chr> <chr> <chr> <chr> <chr> <chr> <int> <chr>
1 ca_pilot trog ca_western_pi… 6Pxwzw… mjqOp… EorkQZi… 95 trog_co…
2 ca_pilot trog ca_western_pi… nHKQ5o… AEGug… UNLxJn1… 95 trog_co…
3 ca_pilot trog ca_western_pi… BJJ8ph… z3wtQ… CHxnCH4… 95 trog_co…
4 ca_pilot trog ca_western_pi… Tyqqwm… Bolov… oJWrD3N… 95 trog_co…
5 ca_pilot trog ca_western_pi… HTGMsm… LMtew… MMMY4Eq… 95 trog_co…
6 ca_pilot trog ca_western_pi… flAV0x… 88aBE… UxcuTZB… 95 trog_co…
7 ca_pilot trog ca_western_pi… s1woiH… RBmbZ… CTygipV… 95 trog_co…
8 ca_pilot trog ca_western_pi… XBQzGK… HFUML… EEX7AFO… 95 trog_co…
9 ca_pilot trog ca_western_pi… XBQzGK… raenP… LDKchCO… 95 trog_co…
10 ca_pilot trog ca_western_pi… NvrKIV… TVpt6… BpOaDfs… 95 trog_co…
# ℹ 43,073 more rows
# ℹ 7 more variables: item_group <chr>, item <chr>, correct <lgl>, rt <chr>,
# rt_numeric <dbl>, timestamp <dttm>, age <dbl>
Exploring item types
Code
base:: table (trog$ item_group)
2combo 3combo abovebelow additive
2193 2177 1818 356
adjective causal comparative compprepcond
2207 322 2071 115
conditional conjcoord depclause disjunctive
987 390 333 1198
embedding genderpronoun gerund inon
1638 84 275 1962
negative neither notonly noun
2186 1732 1786 1483
pluralmorph pluralpronoun postmod preploc
2096 2150 2185 371
prepphrase relclause revactive revpassive
339 1907 2109 2028
revpassrelclause temporal verb xnoty
136 355 2253 1841
Code
sort (unique (trog$ item_group))
[1] "2combo" "3combo" "abovebelow" "additive"
[5] "adjective" "causal" "comparative" "compprepcond"
[9] "conditional" "conjcoord" "depclause" "disjunctive"
[13] "embedding" "genderpronoun" "gerund" "inon"
[17] "negative" "neither" "notonly" "noun"
[21] "pluralmorph" "pluralpronoun" "postmod" "preploc"
[25] "prepphrase" "relclause" "revactive" "revpassive"
[29] "revpassrelclause" "temporal" "verb" "xnoty"
Code
sort (unique (trog$ item_uid))
[1] "trog_2combo_big_cup"
[2] "trog_2combo_boy_run"
[3] "trog_2combo_dog_sit"
[4] "trog_2combo_red_ball"
[5] "trog_3combo_boy_jump_box"
[6] "trog_3combo_girl_sit_table"
[7] "trog_3combo_man_eat_apple"
[8] "trog_3combo_woman_carry_bag"
[9] "trog_abovebelow_comb_below_spoon"
[10] "trog_abovebelow_pencil_above_flower"
[11] "trog_abovebelow_square_below_star"
[12] "trog_abovebelow_star_above_circle"
[13] "trog_additive_hose_drink_sheep_eat"
[14] "trog_adjective_black"
[15] "trog_adjective_long"
[16] "trog_adjective_red"
[17] "trog_adjective_tall"
[18] "trog_causal_she_trip_rock_drop_book"
[19] "trog_comparative_box_bigger_cup"
[20] "trog_comparative_fork_longer_pencil"
[21] "trog_comparative_horse_taller_wall"
[22] "trog_comparative_shoe_bigger_bird"
[23] "trog_compprepcond_instead_homework_she_do_puzzle"
[24] "trog_conditional_teacher_give_if_stand_line"
[25] "trog_conditional_we_dance_if_music_play"
[26] "trog_conditional_we_picnic_if_park_sunny"
[27] "trog_conjcoord_kid_clean_but_forget"
[28] "trog_conjcoord_monkey_eat_nor_swing"
[29] "trog_conjcoord_say_sunny_however_rain"
[30] "trog_depclause_she_gardener_wear_hat_flower"
[31] "trog_disjunctive_although_hot_i_wear"
[32] "trog_disjunctive_despite_noise_she_focus"
[33] "trog_disjunctive_he_like_however_choose"
[34] "trog_disjunctive_he_wear_despite_size"
[35] "trog_embedding_book_pencil_on_red"
[36] "trog_embedding_boy_dog_chase_big"
[37] "trog_embedding_cat_cow_chase_black"
[38] "trog_embedding_circle_star_in_red"
[39] "trog_genderpronoun_he_sit_tree"
[40] "trog_genderpronoun_horse_look_her"
[41] "trog_genderpronoun_lady_carry_him"
[42] "trog_genderpronoun_she_sit_chair"
[43] "trog_gerund_bump_table_case_book_fall"
[44] "trog_inon_circle_in_star"
[45] "trog_inon_cup_in_box"
[46] "trog_inon_fork_on_shoe"
[47] "trog_inon_pencil_on_box"
[48] "trog_negative_boy_not_run"
[49] "trog_negative_dog_not_drink"
[50] "trog_negative_dog_not_sit"
[51] "trog_negative_girl_not_jump"
[52] "trog_neither_boy_hat_nor_shoe"
[53] "trog_neither_boy_nor_horse_run"
[54] "trog_neither_dog_nor_ball_brown"
[55] "trog_neither_pencil_long_nor_red"
[56] "trog_notonly_box_notonly_big_blue"
[57] "trog_notonly_girl_notonly_food_drink"
[58] "trog_notonly_notonly_bird_flower_blue"
[59] "trog_notonly_notonly_girl_cat_sit"
[60] "trog_noun_apple"
[61] "trog_noun_bird"
[62] "trog_noun_comb"
[63] "trog_noun_shoe"
[64] "trog_pluralmorph_boy_stand_chairs"
[65] "trog_pluralmorph_boys_pick_apples"
[66] "trog_pluralmorph_cats_look_ball"
[67] "trog_pluralmorph_girl_drop_cups"
[68] "trog_pluralpronoun_cow_look_them"
[69] "trog_pluralpronoun_elephant_carry_them"
[70] "trog_pluralpronoun_they_jump_wall"
[71] "trog_pluralpronoun_they_sit_table"
[72] "trog_postmod_boy_chasing_horse_tall"
[73] "trog_postmod_circle_in_star_yellow"
[74] "trog_postmod_cow_chasing_cat_brown"
[75] "trog_postmod_duck_following_turtle_walking"
[76] "trog_postmod_pencil_on_shoe_blue"
[77] "trog_preploc_car_truck_follow_drive"
[78] "trog_preploc_fish_swim_beneath_whale"
[79] "trog_preploc_plane_gray_above_cloud"
[80] "trog_prepphrase_he_find_under_couch"
[81] "trog_relclause_dog_chase_horse_that_brown"
[82] "trog_relclause_girl_chase_dog_that_big"
[83] "trog_relclause_pencil_on_book_that_yellow"
[84] "trog_relclause_person_chase_dog_that_big"
[85] "trog_relclause_square_in_star_that_blue"
[86] "trog_revactive_boy_chase_sheep"
[87] "trog_revactive_cow_push_lady"
[88] "trog_revactive_girl_push_horse"
[89] "trog_revactive_man_chase_dog"
[90] "trog_revpassive_cow_pushed_man"
[91] "trog_revpassive_elephant_pushed_boy"
[92] "trog_revpassive_girl_chased_horse"
[93] "trog_revpassive_horse_chased_man"
[94] "trog_revpassrelclause_girl_wearing_backpack_shown_flower"
[95] "trog_temporal_student_open_notebook_draw_tree"
[96] "trog_verb_eating"
[97] "trog_verb_picking"
[98] "trog_verb_running"
[99] "trog_verb_sitting"
[100] "trog_xnoty_box_not_chair_red"
[101] "trog_xnoty_boy_sit_not_eat"
[102] "trog_xnoty_cat_big_not_black"
[103] "trog_xnoty_horse_not_boy_stand"
Code
length (unique (trog$ item_uid))
Get the sum scores
Code
[1] "site" "task_id" "dataset" "user_id" "run_id"
[6] "trial_id" "trial_number" "item_uid" "item_group" "item"
[11] "correct" "rt" "rt_numeric" "timestamp" "age"
Code
trog_runs <- trog |>
group_by (site, user_id, run_id) |>
summarise (
correct = mean (correct, na.rm = TRUE ),
age = mean (age, na.rm = TRUE ),
n_items = n_distinct (item_uid), # number of unique items completed
.groups = "drop"
)
ggplot (trog_runs, aes (x = age, y = correct)) +
geom_point (alpha = 0.5 ) +
geom_smooth (method = "gam" , formula = y ~ s (x, bs = "re" )) +
theme_minimal (base_family = "sans" ) +
ylim (0 , 1 ) +
facet_wrap (~ site) +
labs (
x = "Age (years)" ,
y = "Proportion correct" ,
caption = "Note: Each point represents a participant run; smoothed trend by site"
)
Warning: Removed 105 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 105 rows containing missing values or values outside the scale range
(`geom_point()`).
Code
# Note: The lab theme in plotting_helper.R sets "Source Sans Pro" as the base font,
# which isn't available on my system. Overriding with theme_minimal() for compatibility.
Item completion patterns across countries
Code
ggplot (trog_runs, aes (x = n_items)) +
geom_histogram (binwidth = 1 ) +
theme_minimal (base_family = "sans" ) +
facet_wrap (~ site)
Examining by item group
Code
trog_type <- trog |>
group_by (site, user_id, run_id, item_group) |>
summarise (correct = mean (correct, na.rm = TRUE ),
age = mean (age, na.rm = TRUE ))
`summarise()` has grouped output by 'site', 'user_id', 'run_id'. You can
override using the `.groups` argument.
Code
ggplot (trog_type, aes (x = age, y = correct, col = item_group)) +
geom_smooth (method = "lm" , se = FALSE ) +
theme_minimal (base_family = "sans" ) + # note over-writing the theme.
viridis:: scale_color_viridis (discrete= TRUE ) +
facet_wrap (~ site)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 1947 rows containing non-finite outside the scale range
(`stat_smooth()`).
Grouping items by grammatical structure
Code
[1] "compprepcond" "conjcoord" "disjunctive" "gerund"
[5] "postmod" "preploc" "relclause" "revpassrelclause"
[9] "verb" "adjective" "2combo" "negative"
[13] "3combo" "pluralpronoun" "revactive" "pluralmorph"
[17] "comparative" "revpassive" "inon" "xnoty"
[21] "abovebelow" "notonly" "neither" "embedding"
[25] "additive" "temporal" "conditional" "prepphrase"
[29] "depclause" "causal" "noun" "genderpronoun"
Code
trog_grouped <- trog |>
mutate (grammar_group = case_when (
item_group %in% c ("noun" , "verb" , "adjective" ) ~ "single words" ,
item_group %in% c ("pluralmorph" , "pluralpronoun" ) ~ "morphology" ,
item_group %in% c ("2combo" , "3combo" , "prepphrase" , "additive" ,
"inon" , "abovebelow" , "comparative" , "preploc" ) ~ "composition and prepositions" ,
item_group %in% c ("negative" , "temporal" , "causal" , "conditional" , "genderpronoun" ) ~ "basic sentences" ,
item_group %in% c ("revactive" , "revpassive" ) ~ "reversible constructions" ,
item_group %in% c ("relclause" , "embedding" , "depclause" , "postmod" , "gerund" , "revpassrelclause" ) ~ "complex sentences" ,
item_group %in% c ("relclause" , "embedding" , "depclause" , "postmod" , "gerund" , "revpassrelclause" , "compprepcond" ) ~ "complex sentences" ,
item_group %in% c ("disjunctive" , "neither" , "notonly" , "xnoty" , "conjcoord" ) ~ "logical structures" ,
item_group == "stimulus" ~ "stimulus"
))
trog_grouped |>
count (grammar_group)
# A tibble: 7 × 2
grammar_group n
<chr> <int>
1 basic sentences 3934
2 complex sentences 6589
3 composition and prepositions 11287
4 logical structures 6947
5 morphology 4246
6 reversible constructions 4137
7 single words 5943
Code
trog_grammar_blocks <- trog_grouped |>
group_by (site, user_id, run_id, grammar_group) |>
summarise (
correct = ifelse (all (is.na (correct)), NA_real_ , mean (correct, na.rm = TRUE )),
age = mean (age, na.rm = TRUE ),
.groups = "drop"
) |>
filter (! is.na (grammar_group)) |>
mutate (grammar_group = fct_reorder (grammar_group, correct, .desc = TRUE ))
library (ggthemes)
ggplot (trog_grammar_blocks, aes (x = age, y = correct, col = site)) +
geom_point (aes (alpha = ! is.na (correct)), size = 1 , show.legend = FALSE ) +
scale_alpha_manual (values = c (` TRUE ` = 0.6 , ` FALSE ` = 0.1 )) +
geom_smooth (data = trog_grammar_blocks |> filter (! is.na (correct)),
method = "lm" , se = FALSE , span = 2 ) +
geom_hline (yintercept = 0.25 , lty = 2 , colour = "black" ) +
ggthemes:: scale_color_solarized () +
theme_minimal (base_family = "sans" ) +
ylim (0 , 1 ) +
facet_wrap (~ grammar_group)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 628 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 628 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 26 rows containing missing values or values outside the scale range
(`geom_smooth()`).
Now grouping items by difficulty (instead of grammar group)
Code
item_difficulty <- trog |>
group_by (site, item_uid) |>
summarise (p_correct = mean (correct, na.rm = TRUE )) |>
pivot_wider (names_from = site, values_from = p_correct)
`summarise()` has grouped output by 'site'. You can override using the
`.groups` argument.
Code
# Calculate mean difficulty across sites
item_difficulty <- item_difficulty |>
mutate (mean_accuracy = rowMeans (across (- item_uid), na.rm = TRUE ))
# Arrange by difficulty and assign blocks
trog_difficulty_raw <- item_difficulty |>
arrange (mean_accuracy) |>
mutate (block = ceiling (row_number () / 4 ))
Bar plot
Code
trog_difficulty_raw_summary <- trog_difficulty_raw |>
group_by (block) |>
summarise (mean_difficulty = mean (mean_accuracy, na.rm = TRUE ))
ggplot (trog_difficulty_raw_summary, aes (x = block, y = mean_difficulty)) +
geom_line () +
geom_point () +
labs (x = "Block" , y = "Average Item Accuracy" ,
title = "Average difficulty by block, (based on proportion correct across all sites)" ) +
theme_minimal ()
Examining difficulty block v grammar group [Bar chart showing how items from different grammar groups are distributed across difficulty blocks]
Code
trog_difficulty_raw <- trog_difficulty_raw |>
left_join (trog_grouped, by = "item_uid" )
ggplot (trog_difficulty_raw, aes (x = factor (block), fill = grammar_group)) +
geom_bar (position = "fill" ) +
scale_fill_viridis_d () +
labs (x = "Block" , y = "Proportion" , fill = "Grammar Group" ,
title = "Grammar Group Composition Across Difficulty Blocks" ) +
theme_minimal ()
Difficulty patterns across sites
Code
library (GGally)
item_difficulty |>
select (- item_uid) |>
ggpairs (title = "Pairwise Site-Level Item Difficulties" ) +
theme_minimal (base_family = "sans" )
Warning: Removed 8 rows containing non-finite outside the scale range
(`stat_density()`).
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 8 rows containing missing values
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 4 rows containing missing values
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 4 rows containing non-finite outside the scale range
(`stat_density()`).
Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
Removed 4 rows containing missing values
Warning: Removed 8 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_point()`).
Figuring out the file
Code
readRDS (here ("02_scoring_outputs" , "irt_outputs" , "multigroup_task_rxx.rds" )) # not this
# A tibble: 27 × 5
site task_id item_type model_type rxx
<chr> <chr> <chr> <dbl> <dbl>
1 ca_pilot egma-math Rasch 1 0.863
2 co_pilot egma-math Rasch 1 0.931
3 de_pilot egma-math Rasch 1 0.880
4 ca_pilot hearts-and-flowers 2PL 1 0.903
5 co_pilot hearts-and-flowers 2PL 1 0.768
6 de_pilot hearts-and-flowers 2PL 1 0.797
7 ca_pilot matrix-reasoning Rasch 1 0.951
8 co_pilot matrix-reasoning Rasch 1 0.927
9 de_pilot matrix-reasoning Rasch 1 0.878
10 ca_pilot memory-game Rasch 1 0.855
# ℹ 17 more rows
Code
library (here)
readRDS (here ("02_scoring_outputs" , "irt_outputs" , "item_coefs.rds" ))
# A tibble: 7,950 × 8
site task_id dims params item term value model
<chr> <chr> <dbl> <fct> <chr> <fct> <dbl> <fct>
1 ca_pilot egma-math 1 Rasch math_identify_2_1 difficulty 3.76 Rasch
2 ca_pilot egma-math 1 Rasch math_identify_9_1 difficulty 3.42 Rasch
3 ca_pilot egma-math 1 Rasch math_identify_0_1 difficulty 4.72 Rasch
4 ca_pilot egma-math 1 Rasch math_line_1_10_1 difficulty 0.607 Rasch
5 ca_pilot egma-math 1 Rasch math_line_25_100_1 difficulty 0.443 Rasch
6 ca_pilot egma-math 1 Rasch math_line_2_10_1 difficulty 0.612 Rasch
7 ca_pilot egma-math 1 Rasch math_line_30_100_1 difficulty 0.482 Rasch
8 ca_pilot egma-math 1 Rasch math_line_33_100_1 difficulty -0.990 Rasch
9 ca_pilot egma-math 1 Rasch math_line_4_10_1 difficulty 0.872 Rasch
10 ca_pilot egma-math 1 Rasch math_line_5_10_1 difficulty 1.25 Rasch
# ℹ 7,940 more rows
Code
readRDS (here ("02_scoring_outputs" , "irt_outputs" , "joint_task_results.rds" ))
# A tibble: 18 × 14
task_id data data_filtered data_wide data_prepped groups guess model_type
<chr> <list> <list> <list> <list> <list> <lgl> <dbl>
1 egma-m… <tibble> <tibble> <df> <df> <chr> NA 1
2 egma-m… <tibble> <tibble> <df> <df> <chr> NA 1
3 mental… <tibble> <tibble> <df> <df> <chr> NA 1
4 mental… <tibble> <tibble> <df> <df> <chr> NA 1
5 same-d… <tibble> <tibble> <df> <df> <chr> NA 1
6 same-d… <tibble> <tibble> <df> <df> <chr> NA 1
7 trog <tibble> <tibble> <df> <df> <chr> NA 1
8 trog <tibble> <tibble> <df> <df> <chr> NA 1
9 vocab <tibble> <tibble> <df> <df> <chr> NA 1
10 vocab <tibble> <tibble> <df> <df> <chr> NA 1
11 hearts… <tibble> <tibble> <df> <df> <chr> NA 1
12 hearts… <tibble> <tibble> <df> <df> <chr> NA 1
13 matrix… <tibble> <tibble> <df> <df> <chr> NA 1
14 matrix… <tibble> <tibble> <df> <df> <chr> NA 1
15 memory… <tibble> <tibble> <df> <df> <chr> NA 1
16 memory… <tibble> <tibble> <df> <df> <chr> NA 1
17 theory… <tibble> <tibble> <df> <df> <chr> NA 1
18 theory… <tibble> <tibble> <df> <df> <chr> NA 1
# ℹ 6 more variables: item_type <chr>, model_str <chr>, mod <list>,
# coefs <list>, scores <list>, bic <dbl>
Code
readRDS (here ("02_scoring_outputs" , "irt_outputs" , "independent_item_coefs.rds" ))
# A tibble: 7,950 × 8
site task_id dims params item term value model
<chr> <chr> <dbl> <fct> <chr> <fct> <dbl> <fct>
1 ca_pilot egma-math 1 Rasch math_identify_2_1 difficulty 3.76 Rasch
2 ca_pilot egma-math 1 Rasch math_identify_9_1 difficulty 3.42 Rasch
3 ca_pilot egma-math 1 Rasch math_identify_0_1 difficulty 4.72 Rasch
4 ca_pilot egma-math 1 Rasch math_line_1_10_1 difficulty 0.607 Rasch
5 ca_pilot egma-math 1 Rasch math_line_25_100_1 difficulty 0.443 Rasch
6 ca_pilot egma-math 1 Rasch math_line_2_10_1 difficulty 0.612 Rasch
7 ca_pilot egma-math 1 Rasch math_line_30_100_1 difficulty 0.482 Rasch
8 ca_pilot egma-math 1 Rasch math_line_33_100_1 difficulty -0.990 Rasch
9 ca_pilot egma-math 1 Rasch math_line_4_10_1 difficulty 0.872 Rasch
10 ca_pilot egma-math 1 Rasch math_line_5_10_1 difficulty 1.25 Rasch
# ℹ 7,940 more rows
Load results from IRT model
Code
# previous version
# best_multigroup <- readRDS(here("02_scoring_outputs", "irt_outputs", "multigroup_best_outputs.rds"))
best_multigroup <- readRDS (here ("02_scoring_outputs" , "irt_outputs" , "item_coefs.rds" ))
multigroup_scores <- readRDS (here ("02_scoring_outputs" , "scores" , "scores_multigroup.rds" ))
Multigroup models
Developmental trends in trog ability estimates by site
This plot shows how IRT-derived ability estimates (theta scores, i.e. estimated latent ability) from the TROG task vary with age across different sites.
Code
run_ages <- trog |>
select (site, run_id, user_id, age) |>
distinct ()
# Theta scores (computed per person, per task, per site), plotted against age
multigroup_scores_trog <- multigroup_scores |>
filter (task_id == "trog" ) |>
select (site, task_id, user_id, run_id, metric_type, metric_value) |>
left_join (run_ages)
Joining with `by = join_by(site, user_id, run_id)`
Code
colnames (multigroup_scores_trog)
[1] "site" "task_id" "user_id" "run_id" "metric_type"
[6] "metric_value" "age"
Code
ggplot (multigroup_scores_trog, aes (x = age, y = metric_value, col = site)) +
geom_point () +
theme_minimal (base_family = "sans" ) +
geom_smooth ()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 102 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 102 rows containing missing values or values outside the scale range
(`geom_point()`).
Organizing trog items into difficulty-based blocks using IRT parameters
Code
library (dplyr)
library (stringr)
library (glue)
# Filter for trog item difficulty estimates
trog_coefs <- best_multigroup |>
filter (task_id == "trog" , term == "difficulty" , str_detect (item, "^trog_" ))
# Calculate mean IRT difficulty across sites
irt_difficulty <- trog_coefs |>
group_by (item) |>
summarise (mean_d = mean (value, na.rm = TRUE ), .groups = "drop" )
# Rebuild item IDs to match grammar group mapping
trog_groups <- trog_grouped |>
select (item_uid, item_group, grammar_group) |>
distinct () |>
mutate (item = glue ("{item_uid}_1" )) |> # assumes item names in coefs end with _1
select (item, grammar_group)
# Join difficulty estimates with grammar group labels
trog_difficulty_irt <- left_join (trog_groups, irt_difficulty, by = "item" )
# Reverse code mean_d so that higher means more difficult
trog_difficulty_irt <- trog_difficulty_irt |>
mutate (mean_d_reversed = - mean_d)
ggplot (trog_difficulty_irt, aes (x = fct_reorder (grammar_group, mean_d_reversed), y = mean_d_reversed, fill = grammar_group)) +
geom_boxplot (alpha = 0.8 ) +
scale_fill_viridis_d (option = "D" , begin = 0.1 , end = 0.9 ) +
coord_flip () +
labs (
x = "Grammar Group" ,
y = "Reversed IRT Difficulty (higher = harder)" ,
title = "Reversed TROG Item Difficulty by Grammar Group"
) +
theme_minimal (base_family = "sans" ) +
theme (legend.position = "none" )
Code
# Note narrow spread for some categories like logical structures suggest consistent item behavior.
Plot
Code
ggplot (trog_difficulty_irt, aes (x = reorder (item, mean_d),
y = mean_d, col = grammar_group)) +
geom_point () +
coord_flip () +
scale_color_viridis_d () + # same palette as the bar chart
labs (
x = "Item (ordered by mean difficulty)" ,
y = "Mean Difficulty (d)" ,
title = "TROG Item Difficulty (Mean Across Sites)" ,
colour = "Grammar Group"
) +
theme_minimal () +
theme (axis.text.y = element_text (size = 6 ))
Comparing IRT and raw difficulty
Code
difficulty_compare <- trog_difficulty_raw |>
mutate (item = glue ("{item_uid}_1" )) |>
left_join (trog_difficulty_irt, by = "item" )
ggplot (difficulty_compare, aes (x = mean_accuracy, y = mean_d, colour = grammar_group.y)) +
geom_point () +
scale_color_viridis_d () +
geom_smooth (method = "lm" ) +
labs (
x = "Raw Accuracy" ,
y = "IRT Difficulty" ,
title = "Raw vs IRT Difficulty" ,
colour = "Grammar Group"
) +
theme_minimal ()
`geom_smooth()` using formula = 'y ~ x'
Code
colnames (difficulty_compare)
[1] "item_uid" "ca_pilot" "co_pilot" "de_pilot"
[5] "mean_accuracy" "block" "site" "task_id"
[9] "dataset" "user_id" "run_id" "trial_id"
[13] "trial_number" "item_group" "item" "correct"
[17] "rt" "rt_numeric" "timestamp" "age"
[21] "grammar_group.x" "grammar_group.y" "mean_d" "mean_d_reversed"
Code
# Check correlation
cor (difficulty_compare$ mean_accuracy, difficulty_compare$ mean_d, use = "complete.obs" )