Score Reports

Author

LEVANTE DCC Team

Published

January 22, 2025

Code
library(tidyverse)
library(here)
here() starts at /Users/mcfrank/Projects/levante-pilots
Code
library(glue)
library(ggforce)
library(ggthemes)
source(here("03_summaries", "plotting_helper.R"))
Code
sites <- c("co_pilot")

participant_runs <- read_rds(here("00_prepped_data","co_pilot","participants.rds")) |>
  unnest(ages) |>
  unnest(groups) |>
  rename(group_name = name)

Combine all task scores into one plot!

Code
score_files <- list.files(here("02_scored_data"), pattern = "*.rds",
                          full.names = TRUE)
score_list <- score_files |> map(read_rds)
# exclude_tasks <- c("hostile-attribution", "pa-es")
# score_list <- read_rds("scores/combined_scores.rds")
Code
participants_info <- participant_runs |>
  mutate(school = group_name |> gsub(pattern="Coleg", replacement="Col") |> str_extract("Col.")) |>
  group_by(user_id) |>
  select(user_id, school, grade, age, sex, teacher_id) |>
  mutate(age = round(age, 1)) |>
  rename(db_grade = grade) |>
  distinct()

Merge in grade info. Note 1000 is used for missing in Year/Month data.

Code
grade_info <- read_csv(here("03_summaries","score_reports","data_co","grade_info_co.csv")) |>
  janitor::clean_names() |>
  mutate(month = ifelse(year == 1000, NA, month), 
         year = ifelse(year == 1000, NA, year), 
         school = group |> gsub(pattern="Coleg", replacement="Col") |> str_extract("Col.")) |>
  rename(user_id = uid) |>
  mutate(approx_age = as.numeric(ym("2024-06") - ym(paste(year, month)))/365) |>
  select(user_id, school, grade, year, month, approx_age) 
Rows: 1404 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): School, email, password, uid, group
dbl (5): Grade, Bundle, month, year, id

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `approx_age = as.numeric(ym("2024-06") - ym(paste(year,
  month)))/365`.
Caused by warning:
!  533 failed to parse.
Code
participants <- left_join(participants_info, grade_info)
Joining with `by = join_by(user_id, school)`
Code
unique(participants$school)
[1] "ColD" "ColC" "ColA" "ColB" "ColE"
Code
unique(participants$grade)
[1]  6  5 NA  4  2  3  1  0

Students per school and grade/age

Code
table(participants$school, participants$grade)
      
        0  1  2  3  4  5  6
  ColA  0  0  0  0  0  0  0
  ColB 13 30 21 30 32 22 22
  ColC 22 36 66 43 17 17 37
  ColD 33 27 31 36 27 27 25
  ColE  0  0  0  0  0  0  0
Code
table(participants$school, round(participants$age))
      
        5  6  7  8  9 10 11 12 13 14 15
  ColA  0  0  0  3  5  6  1  5  0  0  0
  ColB  4 18 24 29 24 37 30 22  3  0  0
  ColC  1  0 22 52 32 24  7  2  1  0  0
  ColD 10 37 24 34 29 27 27 16  1  0  1
  ColE  2 18 10 17 26 22 25 19  7  2  0

Load all scores

Code
all_scores <- score_list |>
  bind_rows() |>
  rename(task = task_id) |>
  left_join(participants) |>
  filter(!is.na(age), age >= 5, age <= 12) 
Joining with `by = join_by(user_id)`
Code
scores <- all_scores |>
  mutate(metric_type = if_else(str_detect(metric_type, "ability"), "ability", metric_type)) |>
  filter(!is.na(metric_value)) |>
  inner_join(task_metrics) |>
  left_join(task_categories) |>
  mutate(score = metric_value)
Joining with `by = join_by(task, metric_type)`
Joining with `by = join_by(task)`
Code
task_categories_vec <- levels(scores$task_category)
task_pal <- ptol_pal()(length(task_categories_vec)) |> set_names(task_categories_vec)

Scores with missing grade info.

Code
scores |>
  group_by(task) |> 
  filter(grade == "") |>
  count()
# A tibble: 0 × 2
# Groups:   task [0]
# ℹ 2 variables: task <chr>, n <int>

Distributions across all schools

Code
dist_all_schools <- scores |> 
  group_by(task_category, task) |>
  summarise(n = n(), Mean = mean(score), 
            Median = median(score), SD = sd(score)) |>
  rename(c("Category"="task_category", "Task"="task"))
`summarise()` has grouped output by 'task_category'. You can override using the
`.groups` argument.
Code
dist_all_schools
# A tibble: 14 × 6
# Groups:   Category [7]
   Category           Task                         n       Mean   Median      SD
   <fct>              <chr>                    <int>      <dbl>    <dbl>   <dbl>
 1 Executive function hearts-and-flowers         164  0.0000159 -0.00166  0.884 
 2 Executive function mefs                       144 70.7       71       16.1   
 3 Executive function memory-game                164 -0.0551     0.0559   1.59  
 4 Executive function same-different-selection   186 -0.165     -0.237    0.862 
 5 Math               egma-math                  581 -0.0626    -0.0445   0.905 
 6 Reasoning          matrix-reasoning           206 -0.0118     0.0671   0.426 
 7 Spatial cognition  mental-rotation            168 -0.0751    -0.231    0.713 
 8 Language           trog                       253 -0.100     -0.0200   0.982 
 9 Language           vocab                      150 -0.138      0.0201   0.949 
10 Reading            pa-es                      168  0.658      0.55     0.250 
11 Reading            sre-es                     297  0.110      0.106    0.0673
12 Reading            swr-es                     353  0.0839     0.619    2.09  
13 Social cognition   hostile-attribution        187  0.222      0.167    0.217 
14 Social cognition   theory-of-mind             190 -0.0249    -0.0475   0.885 

All scores combining all schools in one plot

Code
ggplot(scores, aes(x = "", y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 2,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_sina(aes(colour = task_category), alpha = 0.8) +
  scale_colour_manual(values = task_pal) +
  labs(y = "Score") +
  guides(colour = "none") +
  theme(axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        strip.text = element_text(size = 10))

Code
ggsave("plots_co/all_scores_all_schools.png", width = 14, height = 8)

Just math and reading

Code
ggplot(scores |> filter(task_category %in% c("Math", "Reading")), 
       aes(x = "", y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 1,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_sina(aes(colour = task_category), alpha = 0.8) +
  scale_colour_manual(values = task_pal) +
  labs(y = "Score") +
  guides(colour = "none") +
  theme(axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        strip.text = element_text(size = 15))

Code
ggsave("plots/reports/math_read_all_schools.png", width = 14, height = 8)

Distributions by school

Code
dist_by_school <- scores |> 
  group_by(school, task_category, task) |>
  summarise(n = n(), Mean = mean(score), 
            Median = median(score), SD = sd(score)) |>
  rename(c("Category"="task_category", "Task"="task"))
`summarise()` has grouped output by 'school', 'task_category'. You can override
using the `.groups` argument.
Code
dist_by_school
# A tibble: 61 × 7
# Groups:   school, Category [33]
   school Category           Task                         n   Mean Median     SD
   <chr>  <fct>              <chr>                    <int>  <dbl>  <dbl>  <dbl>
 1 ColA   Executive function hearts-and-flowers           4  0.579  0.552 0.136 
 2 ColA   Executive function mefs                         4 76     72.5   8.04  
 3 ColA   Executive function memory-game                  4  1.25   1.20  0.724 
 4 ColA   Executive function same-different-selection     4  0.236 -0.220 1.05  
 5 ColA   Math               egma-math                   17  0.380  0.153 0.633 
 6 ColA   Reasoning          matrix-reasoning             9  0.242  0.223 0.351 
 7 ColA   Spatial cognition  mental-rotation              9  0.392  0.414 0.665 
 8 ColA   Language           vocab                        4  0.692  0.993 0.653 
 9 ColA   Reading            pa-es                        4  0.5    0.5   0     
10 ColA   Reading            sre-es                       9  0.164  0.156 0.0888
# ℹ 51 more rows

All scores, sina plots

Code
ggplot(scores, aes(x = school, y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 2,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_sina(aes(color = task_category), alpha = 0.8) + 
  scale_color_manual(values = task_pal) +
  labs(x="School", y = "Score") +
  guides(color = "none") +
  theme(strip.text = element_text(size = 10))

Code
ggsave("plots/reports/all_scores_by_school_sina.png", width = 14, height = 8)

All scores, violin plots

Code
ggplot(scores, aes(x = school, y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 2,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_violin(trim = F, aes(fill = task_category), alpha = 0.8) + 
  geom_boxplot(width=0.1) +
  scale_fill_manual(values = task_pal) +
  labs(x="School", y = "Score") +
  guides(fill = "none") +
  theme(strip.text = element_text(size = 10))

Code
ggsave("plots/reports/all_scores_by_school_violin.png", width = 14, height = 8)

All scores, one school on one plot

Code
school_plot <- function(sch) {
  scores |> filter(school==sch) |> group_by(task) |>
    mutate(n = n_distinct(user_id), task_label = glue("{task}\n(n = {n})")) |>
    ungroup() |>
    ggplot(aes(x = "", y = score)) +
    ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 2,
                             nest_line = element_line(), solo_line = TRUE,
                             axes = "x",scales = "free_y") +
    geom_sina(aes(colour = task_category), alpha = 0.8) +
    scale_colour_manual(values = task_pal) +
    labs(y = "Score") +
    guides(colour = "none") +
    theme(axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          strip.text = element_text(size = 10))
}

school_plot("ColA")

Code
# ggsave("plots_co/all_scores_schoolA.png", width = 14, height = 8)

school_plot("ColB")

Code
# ggsave("plots_co/all_scores_schoolB.png", width = 14, height = 8)

school_plot("ColC")

Code
# ggsave("plots_co/all_scores_schoolC.png", width = 14, height = 8)

school_plot("ColD")

Code
# ggsave("plots_co/all_scores_schoolD.png", width = 14, height = 8)

school_plot("ColE")

Code
# ggsave("plots_co/all_scores_schoolE.png", width = 14, height = 8)

Just math & reading scores, one school on one plot

Code
school_math_read_plot <- function(sch) {
  scores |> 
    filter(school == sch, task_category %in% c("Math", "Reading")) |> 
    group_by(task) |> 
    mutate(n = n_distinct(user_id), task_label = glue("{task}\n(n = {n})")) |>
    ungroup() |>
    ggplot(aes(x = "", y = score)) +
    ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 1,
                             nest_line = element_line(), solo_line = TRUE,
                             axes = "x",scales = "free_y") +
    geom_sina(aes(colour = task_category), alpha = 0.8) +
    scale_colour_manual(values = task_pal) +
    labs(y = "Score") +
    guides(colour = "none") +
    theme(axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          strip.text = element_text(size = 10))
}

school_math_read_plot("ColA")

Code
ggsave("plots_co/math_read_schoolA.png", width = 14, height = 8)

school_math_read_plot("ColB")

Code
ggsave("plots_co/math_read_schoolB.png", width = 14, height = 8)

school_math_read_plot("ColC")

Code
ggsave("plots_co/math_read_schoolC.png", width = 14, height = 8)

school_math_read_plot("ColD")

Code
ggsave("plots_co/math_read_schoolD.png", width = 14, height = 8)

school_math_read_plot("ColE")

Code
ggsave("plots_co/math_read_schoolE.png", width = 14, height = 8)

Save distribution files

Combine all distributions into one csv

Code
dist_all_schools <- dist_all_schools |> mutate(school = "all")
all_dist <- rbind(dist_all_schools, dist_by_school)
write_csv(all_dist, "data_co/scores_summary_reports.csv")

Top 5% in each task from each school

Code
top_5_percent <- function(df) {
  threshold <- quantile(df$score, 0.95)
  return(df |> filter(score >= threshold))
}

top_5_percent_users <- scores |>
  group_by(school, task) |>
  do(top_5_percent(.)) |>
  select(school, task_category, task, user_id, score)
write_csv(top_5_percent_users, "data_co/top5perc_task_school.csv")

Bottom 5% in each task from each school

Code
bottom_5_percent <- function(df) {
  threshold <- quantile(df$score, 0.05)
  return(df |> filter(score <= threshold))
}

bottom_5_percent_users <- scores |>
  group_by(school, task) |>
  do(bottom_5_percent(.)) |>
  select(school, task_category, task, user_id, score)

write_csv(bottom_5_percent_users, "data_co/bottom5perc_task_school.csv")

Percentiles across grades

I wonder if, for example, you could send us the percentage of students per school and grade in the different terciles or quartiles of each measure. I believe differences between grades will be appreciated.

Code
scores |>
  filter(task %in% c("egma-math","sre-es","swr-es"), grade != "") |>
  ggplot(aes(x = grade, y = score)) +
  ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 1,
                           nest_line = element_line(), solo_line = TRUE,
                           axes = "x",scales = "free_y") +
  geom_jitter(alpha = .4, width = .2)+ 
  geom_quantile(aes(group = 1, col = factor(..quantile..)), method = "rq") + 
  labs(y = "Score", x = "Grade") +
  ggthemes::scale_color_solarized(name = "Quantile") 
Warning: The dot-dot notation (`..quantile..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(quantile)` instead.
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x

Code
scores |>
  filter(task %in% c("egma-math","sre-es","swr-es"), grade != "", 
         school %in% c("ColC", "ColD")) |>
  ggplot(aes(x = grade, y = score)) +
  facet_grid(task ~ school, scales = "free_y") + 
  # ggh4x::facet_nested_grid(vars(task_category, task), nrow = 1,
  #                          nest_line = element_line(), solo_line = TRUE,
  #                          axes = "x",scales = "free_y") +
  geom_jitter(alpha = .4, width = .2)+ 
  geom_quantile(aes(group = 1, col = factor(..quantile..)), method = "rq") + 
  labs(y = "Score", x = "Grade") +
  ggthemes::scale_color_solarized(name = "Quantile") 
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique

Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Smoothing formula not specified. Using: y ~ x
Warning in rq.fit.br(wx, wy, tau = tau, ...): Solution may be nonunique

Code
percentile_scores <- scores |>
  group_by(task) |>
  filter(task_category %in% c("Math", "Reading"), 
         task != "pa-es",
         !is.na(grade), grade != "") |>
  mutate(percentile = cut(score, 
                          breaks = quantile(score, probs = seq(0, 1, 0.2)),
                          include.lowest = TRUE,
                          labels = c("0-20","20-40","40-60","60-80","80-100")), 
         fct_grade = as_factor(grade))

percentile_summary <- percentile_scores |>
  filter(task_category %in% c("Math", "Reading")) |>
  filter(!is.na(percentile)) |>
  group_by(task_category, task, school, fct_grade, grade, percentile) |>
  dplyr::count(.drop = FALSE) |>
  group_by(task_category, task, school, grade) |>
  mutate(proportion = n / sum(n)) |>
  select(-n) |>
  pivot_wider(names_from = "percentile", values_from = "proportion", values_fill = 0)

#   
#   ggplot(percentile_summary, aes(x = grade, y = percentile)) +
#   ggh4x::facet_nested_wrap(vars(task_category, task), nrow = 1,
#                            nest_line = element_line(), solo_line = TRUE,
#                            axes = "x",scales = "free_y") +
#   # geom_point(aes(colour = task_category), alpha = 0.8) +
#   geom_tile(aes(fill= proportion)) + 
#   # scale_colour_manual(values = task_pal) +
#   # coord_flip() + 
#   viridis::scale_fill_viridis() +
#   labs(y = "Percentile") +
#   guides(colour = "none") +
#   theme(axis.ticks.x = element_blank(),
#         axis.title.x = element_blank(),
#         strip.text = element_text(size = 10))
Code
knitr::kable(filter(percentile_summary, school == "ColB"), digits = 2)
task_category task school fct_grade grade 0-20 20-40 40-60 60-80 80-100
Math egma-math ColB 0 0 0.92 0.08 0.00 0.00 0.00
Math egma-math ColB 1 1 0.43 0.36 0.18 0.04 0.00
Math egma-math ColB 2 2 0.14 0.71 0.07 0.00 0.07
Math egma-math ColB 3 3 0.00 0.38 0.33 0.19 0.10
Math egma-math ColB 4 4 0.00 0.16 0.26 0.05 0.53
Math egma-math ColB 5 5 0.00 0.20 0.20 0.40 0.20
Math egma-math ColB 6 6 0.00 0.00 0.00 0.25 0.75
Reading sre-es ColB 2 2 0.44 0.31 0.19 0.06 0.00
Reading sre-es ColB 3 3 0.12 0.19 0.19 0.38 0.12
Reading sre-es ColB 4 4 0.12 0.12 0.12 0.38 0.25
Reading sre-es ColB 5 5 0.05 0.24 0.33 0.19 0.19
Reading sre-es ColB 6 6 0.00 0.00 0.00 0.00 1.00
Reading swr-es ColB 2 2 0.60 0.10 0.10 0.10 0.10
Reading swr-es ColB 3 3 0.05 0.21 0.37 0.32 0.05
Reading swr-es ColB 4 4 0.00 0.11 0.00 0.32 0.58
Reading swr-es ColB 5 5 0.00 0.00 0.38 0.12 0.50
Reading swr-es ColB 6 6 0.00 0.00 0.00 0.17 0.83
Code
knitr::kable(filter(percentile_summary, school == "ColC"), digits = 2)
task_category task school fct_grade grade 0-20 20-40 40-60 60-80 80-100
Math egma-math ColC 0 0 1.00 0.00 0.00 0.00 0.00
Math egma-math ColC 1 1 0.00 1.00 0.00 0.00 0.00
Math egma-math ColC 2 2 0.06 0.18 0.19 0.32 0.24
Math egma-math ColC 3 3 0.07 0.10 0.40 0.20 0.22
Math egma-math ColC 4 4 0.06 0.00 0.12 0.47 0.35
Math egma-math ColC 5 5 0.00 0.08 0.17 0.42 0.33
Math egma-math ColC 6 6 0.00 0.00 0.00 0.00 1.00
Reading sre-es ColC 2 2 0.41 0.26 0.15 0.15 0.03
Reading sre-es ColC 3 3 0.05 0.26 0.32 0.21 0.16
Reading sre-es ColC 4 4 0.00 0.17 0.17 0.33 0.33
Reading sre-es ColC 5 5 0.00 0.14 0.14 0.14 0.57
Reading sre-es ColC 6 6 0.00 0.00 0.00 0.00 1.00
Reading swr-es ColC 2 2 0.25 0.31 0.19 0.12 0.12
Reading swr-es ColC 3 3 0.09 0.18 0.41 0.23 0.09
Reading swr-es ColC 4 4 0.09 0.18 0.09 0.09 0.55
Reading swr-es ColC 5 5 0.00 0.44 0.11 0.22 0.22
Code
knitr::kable(filter(percentile_summary, school == "ColD"), digits = 2)
task_category task school fct_grade grade 0-20 20-40 40-60 60-80 80-100
Math egma-math ColD 0 0 0.86 0.07 0.04 0.00 0.04
Math egma-math ColD 1 1 0.42 0.46 0.08 0.04 0.00
Math egma-math ColD 2 2 0.33 0.17 0.28 0.22 0.00
Math egma-math ColD 3 3 0.16 0.12 0.34 0.12 0.25
Math egma-math ColD 4 4 0.07 0.22 0.26 0.26 0.19
Math egma-math ColD 5 5 0.04 0.15 0.19 0.41 0.22
Math egma-math ColD 6 6 0.00 0.13 0.07 0.27 0.53
Reading sre-es ColD 2 2 0.79 0.07 0.14 0.00 0.00
Reading sre-es ColD 3 3 0.42 0.26 0.16 0.11 0.05
Reading sre-es ColD 4 4 0.15 0.15 0.23 0.31 0.15
Reading sre-es ColD 5 5 0.00 0.07 0.27 0.47 0.20
Reading sre-es ColD 6 6 0.17 0.17 0.17 0.08 0.42
Reading swr-es ColD 2 2 0.81 0.10 0.00 0.05 0.05
Reading swr-es ColD 3 3 0.21 0.32 0.26 0.21 0.00
Reading swr-es ColD 4 4 0.12 0.41 0.18 0.29 0.00
Reading swr-es ColD 5 5 0.20 0.07 0.33 0.27 0.13
Reading swr-es ColD 6 6 0.07 0.07 0.21 0.21 0.43