Code
library(tidyverse)
library(here)
here() starts at /Users/mcfrank/Projects/levante-pilots
Code
library(glue)
library(ggforce)
library(ggthemes)
source(here("03_summaries", "plotting_helper.R"))
library(tidyverse)
library(here)
here() starts at /Users/mcfrank/Projects/levante-pilots
library(glue)
library(ggforce)
library(ggthemes)
source(here("03_summaries", "plotting_helper.R"))
<- c("co_pilot")
sites
<- read_rds(here("00_prepped_data","co_pilot","participants.rds")) |>
participant_runs unnest(ages) |>
unnest(groups) |>
rename(group_name = name)
Combine all task scores into one plot!
<- list.files(here("02_scored_data"), pattern = "*.rds",
score_files full.names = TRUE)
<- score_files |> map(read_rds)
score_list # exclude_tasks <- c("hostile-attribution", "pa-es")
# score_list <- read_rds("scores/combined_scores.rds")
<- participant_runs |>
participants_info mutate(school = group_name |> gsub(pattern="Coleg", replacement="Col") |> str_extract("Col.")) |>
group_by(user_id) |>
select(user_id, school, grade, age, sex, teacher_id) |>
mutate(age = round(age, 1)) |>
rename(db_grade = grade) |>
distinct()
Merge in grade info. Note 1000 is used for missing in Year/Month data.
<- read_csv(here("03_summaries","score_reports","data_co","grade_info_co.csv")) |>
grade_info ::clean_names() |>
janitormutate(month = ifelse(year == 1000, NA, month),
year = ifelse(year == 1000, NA, year),
school = group |> gsub(pattern="Coleg", replacement="Col") |> str_extract("Col.")) |>
rename(user_id = uid) |>
mutate(approx_age = as.numeric(ym("2024-06") - ym(paste(year, month)))/365) |>
select(user_id, school, grade, year, month, approx_age)
Rows: 1404 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): School, email, password, uid, group
dbl (5): Grade, Bundle, month, year, id
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `approx_age = as.numeric(ym("2024-06") - ym(paste(year,
month)))/365`.
Caused by warning:
! 533 failed to parse.
<- left_join(participants_info, grade_info) participants
Joining with `by = join_by(user_id, school)`
unique(participants$school)
[1] "ColD" "ColC" "ColA" "ColB" "ColE"
unique(participants$grade)
[1] 6 5 NA 4 2 3 1 0
table(participants$school, participants$grade)
0 1 2 3 4 5 6
ColA 0 0 0 0 0 0 0
ColB 13 30 21 30 32 22 22
ColC 22 36 66 43 17 17 37
ColD 33 27 31 36 27 27 25
ColE 0 0 0 0 0 0 0
table(participants$school, round(participants$age))
5 6 7 8 9 10 11 12 13 14 15
ColA 0 0 0 3 5 6 1 5 0 0 0
ColB 4 18 24 29 24 37 30 22 3 0 0
ColC 1 0 22 52 32 24 7 2 1 0 0
ColD 10 37 24 34 29 27 27 16 1 0 1
ColE 2 18 10 17 26 22 25 19 7 2 0
<- score_list |>
all_scores bind_rows() |>
rename(task = task_id) |>
left_join(participants) |>
filter(!is.na(age), age >= 5, age <= 12)
Joining with `by = join_by(user_id)`
<- all_scores |>
scores mutate(metric_type = if_else(str_detect(metric_type, "ability"), "ability", metric_type)) |>
filter(!is.na(metric_value)) |>
inner_join(task_metrics) |>
left_join(task_categories) |>
mutate(score = metric_value)
Joining with `by = join_by(task, metric_type)`
Joining with `by = join_by(task)`
<- levels(scores$task_category)
task_categories_vec <- ptol_pal()(length(task_categories_vec)) |> set_names(task_categories_vec) task_pal
Scores with missing grade info.
|>
scores group_by(task) |>
filter(grade == "") |>
count()
# A tibble: 0 × 2
# Groups: task [0]
# ℹ 2 variables: task <chr>, n <int>
<- scores |>
dist_all_schools group_by(task_category, task) |>
summarise(n = n(), Mean = mean(score),
Median = median(score), SD = sd(score)) |>
rename(c("Category"="task_category", "Task"="task"))
`summarise()` has grouped output by 'task_category'. You can override using the
`.groups` argument.
dist_all_schools
# A tibble: 14 × 6
# Groups: Category [7]
Category Task n Mean Median SD
<fct> <chr> <int> <dbl> <dbl> <dbl>
1 Executive function hearts-and-flowers 164 0.0000159 -0.00166 0.884
2 Executive function mefs 144 70.7 71 16.1
3 Executive function memory-game 164 -0.0551 0.0559 1.59
4 Executive function same-different-selection 186 -0.165 -0.237 0.862
5 Math egma-math 581 -0.0626 -0.0445 0.905
6 Reasoning matrix-reasoning 206 -0.0118 0.0671 0.426
7 Spatial cognition mental-rotation 168 -0.0751 -0.231 0.713
8 Language trog 253 -0.100 -0.0200 0.982
9 Language vocab 150 -0.138 0.0201 0.949
10 Reading pa-es 168 0.658 0.55 0.250
11 Reading sre-es 297 0.110 0.106 0.0673
12 Reading swr-es 353 0.0839 0.619 2.09
13 Social cognition hostile-attribution 187 0.222 0.167 0.217
14 Social cognition theory-of-mind 190 -0.0249 -0.0475 0.885
All scores combining all schools in one plot
ggplot(scores, aes(x = "", y = score)) +
::facet_nested_wrap(vars(task_category, task), nrow = 2,
ggh4xnest_line = element_line(), solo_line = TRUE,
axes = "x",scales = "free_y") +
geom_sina(aes(colour = task_category), alpha = 0.8) +
scale_colour_manual(values = task_pal) +
labs(y = "Score") +
guides(colour = "none") +
theme(axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
strip.text = element_text(size = 10))
ggsave("plots_co/all_scores_all_schools.png", width = 14, height = 8)
Just math and reading
ggplot(scores |> filter(task_category %in% c("Math", "Reading")),
aes(x = "", y = score)) +
::facet_nested_wrap(vars(task_category, task), nrow = 1,
ggh4xnest_line = element_line(), solo_line = TRUE,
axes = "x",scales = "free_y") +
geom_sina(aes(colour = task_category), alpha = 0.8) +
scale_colour_manual(values = task_pal) +
labs(y = "Score") +
guides(colour = "none") +
theme(axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
strip.text = element_text(size = 15))
ggsave("plots/reports/math_read_all_schools.png", width = 14, height = 8)
<- scores |>
dist_by_school group_by(school, task_category, task) |>
summarise(n = n(), Mean = mean(score),
Median = median(score), SD = sd(score)) |>
rename(c("Category"="task_category", "Task"="task"))
`summarise()` has grouped output by 'school', 'task_category'. You can override
using the `.groups` argument.
dist_by_school
# A tibble: 61 × 7
# Groups: school, Category [33]
school Category Task n Mean Median SD
<chr> <fct> <chr> <int> <dbl> <dbl> <dbl>
1 ColA Executive function hearts-and-flowers 4 0.579 0.552 0.136
2 ColA Executive function mefs 4 76 72.5 8.04
3 ColA Executive function memory-game 4 1.25 1.20 0.724
4 ColA Executive function same-different-selection 4 0.236 -0.220 1.05
5 ColA Math egma-math 17 0.380 0.153 0.633
6 ColA Reasoning matrix-reasoning 9 0.242 0.223 0.351
7 ColA Spatial cognition mental-rotation 9 0.392 0.414 0.665
8 ColA Language vocab 4 0.692 0.993 0.653
9 ColA Reading pa-es 4 0.5 0.5 0
10 ColA Reading sre-es 9 0.164 0.156 0.0888
# ℹ 51 more rows
All scores, sina plots
ggplot(scores, aes(x = school, y = score)) +
::facet_nested_wrap(vars(task_category, task), nrow = 2,
ggh4xnest_line = element_line(), solo_line = TRUE,
axes = "x",scales = "free_y") +
geom_sina(aes(color = task_category), alpha = 0.8) +
scale_color_manual(values = task_pal) +
labs(x="School", y = "Score") +
guides(color = "none") +
theme(strip.text = element_text(size = 10))
ggsave("plots/reports/all_scores_by_school_sina.png", width = 14, height = 8)
All scores, violin plots
ggplot(scores, aes(x = school, y = score)) +
::facet_nested_wrap(vars(task_category, task), nrow = 2,
ggh4xnest_line = element_line(), solo_line = TRUE,
axes = "x",scales = "free_y") +
geom_violin(trim = F, aes(fill = task_category), alpha = 0.8) +
geom_boxplot(width=0.1) +
scale_fill_manual(values = task_pal) +
labs(x="School", y = "Score") +
guides(fill = "none") +
theme(strip.text = element_text(size = 10))
ggsave("plots/reports/all_scores_by_school_violin.png", width = 14, height = 8)
All scores, one school on one plot
<- function(sch) {
school_plot |> filter(school==sch) |> group_by(task) |>
scores mutate(n = n_distinct(user_id), task_label = glue("{task}\n(n = {n})")) |>
ungroup() |>
ggplot(aes(x = "", y = score)) +
::facet_nested_wrap(vars(task_category, task), nrow = 2,
ggh4xnest_line = element_line(), solo_line = TRUE,
axes = "x",scales = "free_y") +
geom_sina(aes(colour = task_category), alpha = 0.8) +
scale_colour_manual(values = task_pal) +
labs(y = "Score") +
guides(colour = "none") +
theme(axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
strip.text = element_text(size = 10))
}
school_plot("ColA")
# ggsave("plots_co/all_scores_schoolA.png", width = 14, height = 8)
school_plot("ColB")
# ggsave("plots_co/all_scores_schoolB.png", width = 14, height = 8)
school_plot("ColC")
# ggsave("plots_co/all_scores_schoolC.png", width = 14, height = 8)
school_plot("ColD")
# ggsave("plots_co/all_scores_schoolD.png", width = 14, height = 8)
school_plot("ColE")
# ggsave("plots_co/all_scores_schoolE.png", width = 14, height = 8)
Just math & reading scores, one school on one plot
<- function(sch) {
school_math_read_plot |>
scores filter(school == sch, task_category %in% c("Math", "Reading")) |>
group_by(task) |>
mutate(n = n_distinct(user_id), task_label = glue("{task}\n(n = {n})")) |>
ungroup() |>
ggplot(aes(x = "", y = score)) +
::facet_nested_wrap(vars(task_category, task), nrow = 1,
ggh4xnest_line = element_line(), solo_line = TRUE,
axes = "x",scales = "free_y") +
geom_sina(aes(colour = task_category), alpha = 0.8) +
scale_colour_manual(values = task_pal) +
labs(y = "Score") +
guides(colour = "none") +
theme(axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
strip.text = element_text(size = 10))
}
school_math_read_plot("ColA")
ggsave("plots_co/math_read_schoolA.png", width = 14, height = 8)
school_math_read_plot("ColB")
ggsave("plots_co/math_read_schoolB.png", width = 14, height = 8)
school_math_read_plot("ColC")
ggsave("plots_co/math_read_schoolC.png", width = 14, height = 8)
school_math_read_plot("ColD")
ggsave("plots_co/math_read_schoolD.png", width = 14, height = 8)
school_math_read_plot("ColE")
ggsave("plots_co/math_read_schoolE.png", width = 14, height = 8)
Combine all distributions into one csv
<- dist_all_schools |> mutate(school = "all")
dist_all_schools <- rbind(dist_all_schools, dist_by_school)
all_dist write_csv(all_dist, "data_co/scores_summary_reports.csv")
Top 5% in each task from each school
<- function(df) {
top_5_percent <- quantile(df$score, 0.95)
threshold return(df |> filter(score >= threshold))
}
<- scores |>
top_5_percent_users group_by(school, task) |>
do(top_5_percent(.)) |>
select(school, task_category, task, user_id, score)
write_csv(top_5_percent_users, "data_co/top5perc_task_school.csv")
Bottom 5% in each task from each school
<- function(df) {
bottom_5_percent <- quantile(df$score, 0.05)
threshold return(df |> filter(score <= threshold))
}
<- scores |>
bottom_5_percent_users group_by(school, task) |>
do(bottom_5_percent(.)) |>
select(school, task_category, task, user_id, score)
write_csv(bottom_5_percent_users, "data_co/bottom5perc_task_school.csv")
I wonder if, for example, you could send us the percentage of students per school and grade in the different terciles or quartiles of each measure. I believe differences between grades will be appreciated.
|>
scores filter(task %in% c("egma-math","sre-es","swr-es"), grade != "") |>
ggplot(aes(x = grade, y = score)) +
::facet_nested_wrap(vars(task_category, task), nrow = 1,
ggh4xnest_line = element_line(), solo_line = TRUE,
axes = "x",scales = "free_y") +
geom_jitter(alpha = .4, width = .2)+
geom_quantile(aes(group = 1, col = factor(..quantile..)), method = "rq") +
labs(y = "Score", x = "Grade") +
::scale_color_solarized(name = "Quantile") ggthemes
Warning: The dot-dot notation (`..quantile..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(quantile)` instead.
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
|>
scores filter(task %in% c("egma-math","sre-es","swr-es"), grade != "",
%in% c("ColC", "ColD")) |>
school ggplot(aes(x = grade, y = score)) +
facet_grid(task ~ school, scales = "free_y") +
# ggh4x::facet_nested_grid(vars(task_category, task), nrow = 1,
# nest_line = element_line(), solo_line = TRUE,
# axes = "x",scales = "free_y") +
geom_jitter(alpha = .4, width = .2)+
geom_quantile(aes(group = 1, col = factor(..quantile..)), method = "rq") +
labs(y = "Score", x = "Grade") +
::scale_color_solarized(name = "Quantile") ggthemes
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
<- scores |>
percentile_scores group_by(task) |>
filter(task_category %in% c("Math", "Reading"),
!= "pa-es",
task !is.na(grade), grade != "") |>
mutate(percentile = cut(score,
breaks = quantile(score, probs = seq(0, 1, 0.2)),
include.lowest = TRUE,
labels = c("0-20","20-40","40-60","60-80","80-100")),
fct_grade = as_factor(grade))
<- percentile_scores |>
percentile_summary filter(task_category %in% c("Math", "Reading")) |>
filter(!is.na(percentile)) |>
group_by(task_category, task, school, fct_grade, grade, percentile) |>
::count(.drop = FALSE) |>
dplyrgroup_by(task_category, task, school, grade) |>
mutate(proportion = n / sum(n)) |>
select(-n) |>
pivot_wider(names_from = "percentile", values_from = "proportion", values_fill = 0)
#
# ggplot(percentile_summary, aes(x = grade, y = percentile)) +
# ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 1,
# nest_line = element_line(), solo_line = TRUE,
# axes = "x",scales = "free_y") +
# # geom_point(aes(colour = task_category), alpha = 0.8) +
# geom_tile(aes(fill= proportion)) +
# # scale_colour_manual(values = task_pal) +
# # coord_flip() +
# viridis::scale_fill_viridis() +
# labs(y = "Percentile") +
# guides(colour = "none") +
# theme(axis.ticks.x = element_blank(),
# axis.title.x = element_blank(),
# strip.text = element_text(size = 10))
::kable(filter(percentile_summary, school == "ColB"), digits = 2) knitr
task_category | task | school | fct_grade | grade | 0-20 | 20-40 | 40-60 | 60-80 | 80-100 |
---|---|---|---|---|---|---|---|---|---|
Math | egma-math | ColB | 0 | 0 | 0.92 | 0.08 | 0.00 | 0.00 | 0.00 |
Math | egma-math | ColB | 1 | 1 | 0.43 | 0.36 | 0.18 | 0.04 | 0.00 |
Math | egma-math | ColB | 2 | 2 | 0.14 | 0.71 | 0.07 | 0.00 | 0.07 |
Math | egma-math | ColB | 3 | 3 | 0.00 | 0.38 | 0.33 | 0.19 | 0.10 |
Math | egma-math | ColB | 4 | 4 | 0.00 | 0.16 | 0.26 | 0.05 | 0.53 |
Math | egma-math | ColB | 5 | 5 | 0.00 | 0.20 | 0.20 | 0.40 | 0.20 |
Math | egma-math | ColB | 6 | 6 | 0.00 | 0.00 | 0.00 | 0.25 | 0.75 |
Reading | sre-es | ColB | 2 | 2 | 0.44 | 0.31 | 0.19 | 0.06 | 0.00 |
Reading | sre-es | ColB | 3 | 3 | 0.12 | 0.19 | 0.19 | 0.38 | 0.12 |
Reading | sre-es | ColB | 4 | 4 | 0.12 | 0.12 | 0.12 | 0.38 | 0.25 |
Reading | sre-es | ColB | 5 | 5 | 0.05 | 0.24 | 0.33 | 0.19 | 0.19 |
Reading | sre-es | ColB | 6 | 6 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 |
Reading | swr-es | ColB | 2 | 2 | 0.60 | 0.10 | 0.10 | 0.10 | 0.10 |
Reading | swr-es | ColB | 3 | 3 | 0.05 | 0.21 | 0.37 | 0.32 | 0.05 |
Reading | swr-es | ColB | 4 | 4 | 0.00 | 0.11 | 0.00 | 0.32 | 0.58 |
Reading | swr-es | ColB | 5 | 5 | 0.00 | 0.00 | 0.38 | 0.12 | 0.50 |
Reading | swr-es | ColB | 6 | 6 | 0.00 | 0.00 | 0.00 | 0.17 | 0.83 |
::kable(filter(percentile_summary, school == "ColC"), digits = 2) knitr
task_category | task | school | fct_grade | grade | 0-20 | 20-40 | 40-60 | 60-80 | 80-100 |
---|---|---|---|---|---|---|---|---|---|
Math | egma-math | ColC | 0 | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Math | egma-math | ColC | 1 | 1 | 0.00 | 1.00 | 0.00 | 0.00 | 0.00 |
Math | egma-math | ColC | 2 | 2 | 0.06 | 0.18 | 0.19 | 0.32 | 0.24 |
Math | egma-math | ColC | 3 | 3 | 0.07 | 0.10 | 0.40 | 0.20 | 0.22 |
Math | egma-math | ColC | 4 | 4 | 0.06 | 0.00 | 0.12 | 0.47 | 0.35 |
Math | egma-math | ColC | 5 | 5 | 0.00 | 0.08 | 0.17 | 0.42 | 0.33 |
Math | egma-math | ColC | 6 | 6 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 |
Reading | sre-es | ColC | 2 | 2 | 0.41 | 0.26 | 0.15 | 0.15 | 0.03 |
Reading | sre-es | ColC | 3 | 3 | 0.05 | 0.26 | 0.32 | 0.21 | 0.16 |
Reading | sre-es | ColC | 4 | 4 | 0.00 | 0.17 | 0.17 | 0.33 | 0.33 |
Reading | sre-es | ColC | 5 | 5 | 0.00 | 0.14 | 0.14 | 0.14 | 0.57 |
Reading | sre-es | ColC | 6 | 6 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 |
Reading | swr-es | ColC | 2 | 2 | 0.25 | 0.31 | 0.19 | 0.12 | 0.12 |
Reading | swr-es | ColC | 3 | 3 | 0.09 | 0.18 | 0.41 | 0.23 | 0.09 |
Reading | swr-es | ColC | 4 | 4 | 0.09 | 0.18 | 0.09 | 0.09 | 0.55 |
Reading | swr-es | ColC | 5 | 5 | 0.00 | 0.44 | 0.11 | 0.22 | 0.22 |
::kable(filter(percentile_summary, school == "ColD"), digits = 2) knitr
task_category | task | school | fct_grade | grade | 0-20 | 20-40 | 40-60 | 60-80 | 80-100 |
---|---|---|---|---|---|---|---|---|---|
Math | egma-math | ColD | 0 | 0 | 0.86 | 0.07 | 0.04 | 0.00 | 0.04 |
Math | egma-math | ColD | 1 | 1 | 0.42 | 0.46 | 0.08 | 0.04 | 0.00 |
Math | egma-math | ColD | 2 | 2 | 0.33 | 0.17 | 0.28 | 0.22 | 0.00 |
Math | egma-math | ColD | 3 | 3 | 0.16 | 0.12 | 0.34 | 0.12 | 0.25 |
Math | egma-math | ColD | 4 | 4 | 0.07 | 0.22 | 0.26 | 0.26 | 0.19 |
Math | egma-math | ColD | 5 | 5 | 0.04 | 0.15 | 0.19 | 0.41 | 0.22 |
Math | egma-math | ColD | 6 | 6 | 0.00 | 0.13 | 0.07 | 0.27 | 0.53 |
Reading | sre-es | ColD | 2 | 2 | 0.79 | 0.07 | 0.14 | 0.00 | 0.00 |
Reading | sre-es | ColD | 3 | 3 | 0.42 | 0.26 | 0.16 | 0.11 | 0.05 |
Reading | sre-es | ColD | 4 | 4 | 0.15 | 0.15 | 0.23 | 0.31 | 0.15 |
Reading | sre-es | ColD | 5 | 5 | 0.00 | 0.07 | 0.27 | 0.47 | 0.20 |
Reading | sre-es | ColD | 6 | 6 | 0.17 | 0.17 | 0.17 | 0.08 | 0.42 |
Reading | swr-es | ColD | 2 | 2 | 0.81 | 0.10 | 0.00 | 0.05 | 0.05 |
Reading | swr-es | ColD | 3 | 3 | 0.21 | 0.32 | 0.26 | 0.21 | 0.00 |
Reading | swr-es | ColD | 4 | 4 | 0.12 | 0.41 | 0.18 | 0.29 | 0.00 |
Reading | swr-es | ColD | 5 | 5 | 0.20 | 0.07 | 0.33 | 0.27 | 0.13 |
Reading | swr-es | ColD | 6 | 6 | 0.07 | 0.07 | 0.21 | 0.21 | 0.43 |