Score Reports

Author

LEVANTE DCC Team

Published

January 22, 2025

Code

library(tidyverse)
library(here)

here() starts at /Users/mcfrank/Projects/levante-pilots

Code

library(glue)
library(ggforce)
library(ggthemes)
source(here("03_summaries", "plotting_helper.R"))

Code

sites <- c("co_pilot")

participant_runs <- read_rds(here("00_prepped_data","co_pilot","participants.rds")) |>
  unnest(ages) |>
  unnest(groups) |>
  rename(group_name = name)

Combine all task scores into one plot!

Code

score_files <- list.files(here("02_scored_data"), pattern = "*.rds",
                          full.names = TRUE)
score_list <- score_files |> map(read_rds)
# exclude_tasks <- c("hostile-attribution", "pa-es")
# score_list <- read_rds("scores/combined_scores.rds")

Code

participants_info <- participant_runs |>
  mutate(school = group_name |> gsub(pattern="Coleg", replacement="Col") |> str_extract("Col.")) |>
  group_by(user_id) |>
  select(user_id, school, grade, age, sex, teacher_id) |>
  mutate(age = round(age, 1)) |>
  rename(db_grade = grade) |>
  distinct()

Merge in grade info. Note 1000 is used for missing in Year/Month data.

Code

grade_info <- read_csv(here("03_summaries","score_reports","data_co","grade_info_co.csv")) |>
  janitor::clean_names() |>
  mutate(month = ifelse(year == 1000, NA, month), 
         year = ifelse(year == 1000, NA, year), 
         school = group |> gsub(pattern="Coleg", replacement="Col") |> str_extract("Col.")) |>
  rename(user_id = uid) |>
  mutate(approx_age = as.numeric(ym("2024-06") - ym(paste(year, month)))/365) |>
  select(user_id, school, grade, year, month, approx_age)

Rows: 1404 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): School, email, password, uid, group
dbl (5): Grade, Bundle, month, year, id

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `approx_age = as.numeric(ym("2024-06") - ym(paste(year,
  month)))/365`.
Caused by warning:
!  533 failed to parse.

Code

participants <- left_join(participants_info, grade_info)

Joining with `by = join_by(user_id, school)`

Code

unique(participants$school)

[1] "ColD" "ColC" "ColA" "ColB" "ColE"

Code

unique(participants$grade)

[1]  6  5 NA  4  2  3  1  0

Students per school and grade/age

Code

table(participants$school, participants$grade)

      
        0  1  2  3  4  5  6
  ColA  0  0  0  0  0  0  0
  ColB 13 30 21 30 32 22 22
  ColC 22 36 66 43 17 17 37
  ColD 33 27 31 36 27 27 25
  ColE  0  0  0  0  0  0  0

Code

table(participants$school, round(participants$age))

      
        5  6  7  8  9 10 11 12 13 14 15
  ColA  0  0  0  3  5  6  1  5  0  0  0
  ColB  4 18 24 29 24 37 30 22  3  0  0
  ColC  1  0 22 52 32 24  7  2  1  0  0
  ColD 10 37 24 34 29 27 27 16  1  0  1
  ColE  2 18 10 17 26 22 25 19  7  2  0

Load all scores

Code

all_scores <- score_list |>
  bind_rows() |>
  rename(task = task_id) |>
  left_join(participants) |>
  filter(!is.na(age), age >= 5, age <= 12)

Joining with `by = join_by(user_id)`

Code

scores <- all_scores |>
  mutate(metric_type = if_else(str_detect(metric_type, "ability"), "ability", metric_type)) |>
  filter(!is.na(metric_value)) |>
  inner_join(task_metrics) |>
  left_join(task_categories) |>
  mutate(score = metric_value)

Joining with `by = join_by(task, metric_type)`
Joining with `by = join_by(task)`

Code

task_categories_vec <- levels(scores$task_category)
task_pal <- ptol_pal()(length(task_categories_vec)) |> set_names(task_categories_vec)

Scores with missing grade info.

Code

scores |>
  group_by(task) |> 
  filter(grade == "") |>
  count()

# A tibble: 0 × 2
# Groups:   task [0]
# ℹ 2 variables: task <chr>, n <int>

Distributions across all schools

Code

dist_all_schools <- scores |> 
  group_by(task_category, task) |>
  summarise(n = n(), Mean = mean(score), 
            Median = median(score), SD = sd(score)) |>
  rename(c("Category"="task_category", "Task"="task"))

`summarise()` has grouped output by 'task_category'. You can override using the
`.groups` argument.

Code

dist_all_schools

# A tibble: 14 × 6
# Groups:   Category [7]
   Category           Task                         n       Mean   Median      SD
   <fct>              <chr>                    <int>      <dbl>    <dbl>   <dbl>
 1 Executive function hearts-and-flowers         164  0.0000159 -0.00166  0.884 
 2 Executive function mefs                       144 70.7       71       16.1   
 3 Executive function memory-game                164 -0.0551     0.0559   1.59  
 4 Executive function same-different-selection   186 -0.165     -0.237    0.862 
 5 Math               egma-math                  581 -0.0626    -0.0445   0.905 
 6 Reasoning          matrix-reasoning           206 -0.0118     0.0671   0.426 
 7 Spatial cognition  mental-rotation            168 -0.0751    -0.231    0.713 
 8 Language           trog                       253 -0.100     -0.0200   0.982 
 9 Language           vocab                      150 -0.138      0.0201   0.949 
10 Reading            pa-es                      168  0.658      0.55     0.250 
11 Reading            sre-es                     297  0.110      0.106    0.0673
12 Reading            swr-es                     353  0.0839     0.619    2.09  
13 Social cognition   hostile-attribution        187  0.222      0.167    0.217 
14 Social cognition   theory-of-mind             190 -0.0249    -0.0475   0.885

All scores combining all schools in one plot

Code

ggplot(scores, aes(x = "", y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 2,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_sina(aes(colour = task_category), alpha = 0.8) +
  scale_colour_manual(values = task_pal) +
  labs(y = "Score") +
  guides(colour = "none") +
  theme(axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        strip.text = element_text(size = 10))

Code

ggsave("plots_co/all_scores_all_schools.png", width = 14, height = 8)

Just math and reading

Code

ggplot(scores |> filter(task_category %in% c("Math", "Reading")), 
       aes(x = "", y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 1,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_sina(aes(colour = task_category), alpha = 0.8) +
  scale_colour_manual(values = task_pal) +
  labs(y = "Score") +
  guides(colour = "none") +
  theme(axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        strip.text = element_text(size = 15))

Code

ggsave("plots/reports/math_read_all_schools.png", width = 14, height = 8)

Distributions by school

Code

dist_by_school <- scores |> 
  group_by(school, task_category, task) |>
  summarise(n = n(), Mean = mean(score), 
            Median = median(score), SD = sd(score)) |>
  rename(c("Category"="task_category", "Task"="task"))

`summarise()` has grouped output by 'school', 'task_category'. You can override
using the `.groups` argument.

Code

dist_by_school

# A tibble: 61 × 7
# Groups:   school, Category [33]
   school Category           Task                         n   Mean Median     SD
   <chr>  <fct>              <chr>                    <int>  <dbl>  <dbl>  <dbl>
 1 ColA   Executive function hearts-and-flowers           4  0.579  0.552 0.136 
 2 ColA   Executive function mefs                         4 76     72.5   8.04  
 3 ColA   Executive function memory-game                  4  1.25   1.20  0.724 
 4 ColA   Executive function same-different-selection     4  0.236 -0.220 1.05  
 5 ColA   Math               egma-math                   17  0.380  0.153 0.633 
 6 ColA   Reasoning          matrix-reasoning             9  0.242  0.223 0.351 
 7 ColA   Spatial cognition  mental-rotation              9  0.392  0.414 0.665 
 8 ColA   Language           vocab                        4  0.692  0.993 0.653 
 9 ColA   Reading            pa-es                        4  0.5    0.5   0     
10 ColA   Reading            sre-es                       9  0.164  0.156 0.0888
# ℹ 51 more rows

All scores, sina plots

Code

ggplot(scores, aes(x = school, y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 2,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_sina(aes(color = task_category), alpha = 0.8) + 
  scale_color_manual(values = task_pal) +
  labs(x="School", y = "Score") +
  guides(color = "none") +
  theme(strip.text = element_text(size = 10))

Code

ggsave("plots/reports/all_scores_by_school_sina.png", width = 14, height = 8)

All scores, violin plots

Code

ggplot(scores, aes(x = school, y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 2,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_violin(trim = F, aes(fill = task_category), alpha = 0.8) + 
  geom_boxplot(width=0.1) +
  scale_fill_manual(values = task_pal) +
  labs(x="School", y = "Score") +
  guides(fill = "none") +
  theme(strip.text = element_text(size = 10))

Code

ggsave("plots/reports/all_scores_by_school_violin.png", width = 14, height = 8)

All scores, one school on one plot

Code

school_plot <- function(sch) {
  scores |> filter(school==sch) |> group_by(task) |>
    mutate(n = n_distinct(user_id), task_label = glue("{task}\n(n = {n})")) |>
    ungroup() |>
    ggplot(aes(x = "", y = score)) +
    ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 2,
                             nest_line = element_line(), solo_line = TRUE,
                             axes = "x",scales = "free_y") +
    geom_sina(aes(colour = task_category), alpha = 0.8) +
    scale_colour_manual(values = task_pal) +
    labs(y = "Score") +
    guides(colour = "none") +
    theme(axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          strip.text = element_text(size = 10))
}

school_plot("ColA")

Code

# ggsave("plots_co/all_scores_schoolA.png", width = 14, height = 8)

school_plot("ColB")

Code

# ggsave("plots_co/all_scores_schoolB.png", width = 14, height = 8)

school_plot("ColC")

Code

# ggsave("plots_co/all_scores_schoolC.png", width = 14, height = 8)

school_plot("ColD")

Code

# ggsave("plots_co/all_scores_schoolD.png", width = 14, height = 8)

school_plot("ColE")

Code

# ggsave("plots_co/all_scores_schoolE.png", width = 14, height = 8)

Just math & reading scores, one school on one plot

Code

school_math_read_plot <- function(sch) {
  scores |> 
    filter(school == sch, task_category %in% c("Math", "Reading")) |> 
    group_by(task) |> 
    mutate(n = n_distinct(user_id), task_label = glue("{task}\n(n = {n})")) |>
    ungroup() |>
    ggplot(aes(x = "", y = score)) +
    ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 1,
                             nest_line = element_line(), solo_line = TRUE,
                             axes = "x",scales = "free_y") +
    geom_sina(aes(colour = task_category), alpha = 0.8) +
    scale_colour_manual(values = task_pal) +
    labs(y = "Score") +
    guides(colour = "none") +
    theme(axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          strip.text = element_text(size = 10))
}

school_math_read_plot("ColA")

Code

ggsave("plots_co/math_read_schoolA.png", width = 14, height = 8)

school_math_read_plot("ColB")

Code

ggsave("plots_co/math_read_schoolB.png", width = 14, height = 8)

school_math_read_plot("ColC")

Code

ggsave("plots_co/math_read_schoolC.png", width = 14, height = 8)

school_math_read_plot("ColD")

Code

ggsave("plots_co/math_read_schoolD.png", width = 14, height = 8)

school_math_read_plot("ColE")

Code

ggsave("plots_co/math_read_schoolE.png", width = 14, height = 8)

Save distribution files

Combine all distributions into one csv

Code

dist_all_schools <- dist_all_schools |> mutate(school = "all")
all_dist <- rbind(dist_all_schools, dist_by_school)
write_csv(all_dist, "data_co/scores_summary_reports.csv")

Top 5% in each task from each school

Code

top_5_percent <- function(df) {
  threshold <- quantile(df$score, 0.95)
  return(df |> filter(score >= threshold))
}

top_5_percent_users <- scores |>
  group_by(school, task) |>
  do(top_5_percent(.)) |>
  select(school, task_category, task, user_id, score)
write_csv(top_5_percent_users, "data_co/top5perc_task_school.csv")

Bottom 5% in each task from each school

Code

bottom_5_percent <- function(df) {
  threshold <- quantile(df$score, 0.05)
  return(df |> filter(score <= threshold))
}

bottom_5_percent_users <- scores |>
  group_by(school, task) |>
  do(bottom_5_percent(.)) |>
  select(school, task_category, task, user_id, score)

write_csv(bottom_5_percent_users, "data_co/bottom5perc_task_school.csv")

Percentiles across grades

I wonder if, for example, you could send us the percentage of students per school and grade in the different terciles or quartiles of each measure. I believe differences between grades will be appreciated.

Code

scores |>
  filter(task %in% c("egma-math","sre-es","swr-es"), grade != "") |>
  ggplot(aes(x = grade, y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 1,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_jitter(alpha = .4, width = .2)+ 
  geom_quantile(aes(group = 1, col = factor(..quantile..)), method = "rq") + 
  labs(y = "Score", x = "Grade") +
  ggthemes::scale_color_solarized(name = "Quantile")

Warning: The dot-dot notation (`..quantile..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(quantile)` instead.

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

Code

scores |>
  filter(task %in% c("egma-math","sre-es","swr-es"), grade != "", 
         school %in% c("ColC", "ColD")) |>
  ggplot(aes(x = grade, y = score)) +
  facet_grid(task ~ school, scales = "free_y") + 
  # ggh4x::facet_nested_grid(vars(task_category, task), nrow = 1,
  #                          nest_line = element_line(), solo_line = TRUE,
  #                          axes = "x",scales = "free_y") +
  geom_jitter(alpha = .4, width = .2)+ 
  geom_quantile(aes(group = 1, col = factor(..quantile..)), method = "rq") + 
  labs(y = "Score", x = "Grade") +
  ggthemes::scale_color_solarized(name = "Quantile")

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique

Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique

Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique

Code

percentile_scores <- scores |>
  group_by(task) |>
  filter(task_category %in% c("Math", "Reading"), 
         task != "pa-es",
         !is.na(grade), grade != "") |>
  mutate(percentile = cut(score, 
                          breaks = quantile(score, probs = seq(0, 1, 0.2)),
                          include.lowest = TRUE,
                          labels = c("0-20","20-40","40-60","60-80","80-100")), 
         fct_grade = as_factor(grade))

percentile_summary <- percentile_scores |>
  filter(task_category %in% c("Math", "Reading")) |>
  filter(!is.na(percentile)) |>
  group_by(task_category, task, school, fct_grade, grade, percentile) |>
  dplyr::count(.drop = FALSE) |>
  group_by(task_category, task, school, grade) |>
  mutate(proportion = n / sum(n)) |>
  select(-n) |>
  pivot_wider(names_from = "percentile", values_from = "proportion", values_fill = 0)

#   
#   ggplot(percentile_summary, aes(x = grade, y = percentile)) +
#   ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 1,
#                            nest_line = element_line(), solo_line = TRUE,
#                            axes = "x",scales = "free_y") +
#   # geom_point(aes(colour = task_category), alpha = 0.8) +
#   geom_tile(aes(fill= proportion)) + 
#   # scale_colour_manual(values = task_pal) +
#   # coord_flip() + 
#   viridis::scale_fill_viridis() +
#   labs(y = "Percentile") +
#   guides(colour = "none") +
#   theme(axis.ticks.x = element_blank(),
#         axis.title.x = element_blank(),
#         strip.text = element_text(size = 10))

Code

knitr::kable(filter(percentile_summary, school == "ColB"), digits = 2)

task_category	task	school	fct_grade	grade	0-20	20-40	40-60	60-80	80-100
Math	egma-math	ColB	0	0	0.92	0.08	0.00	0.00	0.00
Math	egma-math	ColB	1	1	0.43	0.36	0.18	0.04	0.00
Math	egma-math	ColB	2	2	0.14	0.71	0.07	0.00	0.07
Math	egma-math	ColB	3	3	0.00	0.38	0.33	0.19	0.10
Math	egma-math	ColB	4	4	0.00	0.16	0.26	0.05	0.53
Math	egma-math	ColB	5	5	0.00	0.20	0.20	0.40	0.20
Math	egma-math	ColB	6	6	0.00	0.00	0.00	0.25	0.75
Reading	sre-es	ColB	2	2	0.44	0.31	0.19	0.06	0.00
Reading	sre-es	ColB	3	3	0.12	0.19	0.19	0.38	0.12
Reading	sre-es	ColB	4	4	0.12	0.12	0.12	0.38	0.25
Reading	sre-es	ColB	5	5	0.05	0.24	0.33	0.19	0.19
Reading	sre-es	ColB	6	6	0.00	0.00	0.00	0.00	1.00
Reading	swr-es	ColB	2	2	0.60	0.10	0.10	0.10	0.10
Reading	swr-es	ColB	3	3	0.05	0.21	0.37	0.32	0.05
Reading	swr-es	ColB	4	4	0.00	0.11	0.00	0.32	0.58
Reading	swr-es	ColB	5	5	0.00	0.00	0.38	0.12	0.50
Reading	swr-es	ColB	6	6	0.00	0.00	0.00	0.17	0.83

Code

knitr::kable(filter(percentile_summary, school == "ColC"), digits = 2)

task_category	task	school	fct_grade	grade	0-20	20-40	40-60	60-80	80-100
Math	egma-math	ColC	0	0	1.00	0.00	0.00	0.00	0.00
Math	egma-math	ColC	1	1	0.00	1.00	0.00	0.00	0.00
Math	egma-math	ColC	2	2	0.06	0.18	0.19	0.32	0.24
Math	egma-math	ColC	3	3	0.07	0.10	0.40	0.20	0.22
Math	egma-math	ColC	4	4	0.06	0.00	0.12	0.47	0.35
Math	egma-math	ColC	5	5	0.00	0.08	0.17	0.42	0.33
Math	egma-math	ColC	6	6	0.00	0.00	0.00	0.00	1.00
Reading	sre-es	ColC	2	2	0.41	0.26	0.15	0.15	0.03
Reading	sre-es	ColC	3	3	0.05	0.26	0.32	0.21	0.16
Reading	sre-es	ColC	4	4	0.00	0.17	0.17	0.33	0.33
Reading	sre-es	ColC	5	5	0.00	0.14	0.14	0.14	0.57
Reading	sre-es	ColC	6	6	0.00	0.00	0.00	0.00	1.00
Reading	swr-es	ColC	2	2	0.25	0.31	0.19	0.12	0.12
Reading	swr-es	ColC	3	3	0.09	0.18	0.41	0.23	0.09
Reading	swr-es	ColC	4	4	0.09	0.18	0.09	0.09	0.55
Reading	swr-es	ColC	5	5	0.00	0.44	0.11	0.22	0.22

Code

knitr::kable(filter(percentile_summary, school == "ColD"), digits = 2)

task_category	task	school	fct_grade	grade	0-20	20-40	40-60	60-80	80-100
Math	egma-math	ColD	0	0	0.86	0.07	0.04	0.00	0.04
Math	egma-math	ColD	1	1	0.42	0.46	0.08	0.04	0.00
Math	egma-math	ColD	2	2	0.33	0.17	0.28	0.22	0.00
Math	egma-math	ColD	3	3	0.16	0.12	0.34	0.12	0.25
Math	egma-math	ColD	4	4	0.07	0.22	0.26	0.26	0.19
Math	egma-math	ColD	5	5	0.04	0.15	0.19	0.41	0.22
Math	egma-math	ColD	6	6	0.00	0.13	0.07	0.27	0.53
Reading	sre-es	ColD	2	2	0.79	0.07	0.14	0.00	0.00
Reading	sre-es	ColD	3	3	0.42	0.26	0.16	0.11	0.05
Reading	sre-es	ColD	4	4	0.15	0.15	0.23	0.31	0.15
Reading	sre-es	ColD	5	5	0.00	0.07	0.27	0.47	0.20
Reading	sre-es	ColD	6	6	0.17	0.17	0.17	0.08	0.42
Reading	swr-es	ColD	2	2	0.81	0.10	0.00	0.05	0.05
Reading	swr-es	ColD	3	3	0.21	0.32	0.26	0.21	0.00
Reading	swr-es	ColD	4	4	0.12	0.41	0.18	0.29	0.00
Reading	swr-es	ColD	5	5	0.20	0.07	0.33	0.27	0.13
Reading	swr-es	ColD	6	6	0.07	0.07	0.21	0.21	0.43