Load & merge data

Load libraries

Load processed 24-25 data

### 0.4 Set data folder Path
#Folder path
raw_data_folder_path <- "District Partners/Nashville/Nashville - HS Pathways - 2025/1. Data & Analysis - Secure"

### 0.5 Load data
# Loading our final analysis file
course_data_unified <- ers_read_sharepoint(folder_path = raw_data_folder_path,
                    file_name_with_extension = "/Course Schedule Analysis/09_cs_post_analysis_24-25_with_avid_FERPA.csv")

# Rename
school_data <- course_data_unified

Modify data

Update teacher period count

  • M_num_periods is calculated in the 24-25 09-script before Study Hall and Independent Study are filtered out. So, I want to recalculate periods taught per teacher after these courses are filtered out to see if this changes their utilization.

  • Why do we care? With the new bell schedule (ERS 23-24), MNPS teachers receive supplemental pay for teaching 6 periods (instead of 5). I’m not sure teaching one section/period of Independent Study/Study Hall would trigger this supplemental pay, which would impact which teachers trigger this in our cost per pathway analysis.

# check a teacher teaching with 6+ periods
employee_id <- 143369

### IMPT!!! M_num_periods is calculated in the 24-25 09-script *BEFORE* Study Hall and Independent Study are filtered out
tchr_period_check_before <- course_data_unified %>% 
  filter(D_employee_id == employee_id) %>% 
  group_by(D_employee_id, D_period, C_course_name_final, C_class_id, M_num_periods, M_courses_prepped, M_class_size) %>% 
  summarise(stu_count = n_distinct(D_stu_id))

# calculate new period count per teacher
school_data <- school_data %>% 
  group_by(D_employee_id) %>% 
  mutate(M_num_periods_new = n_distinct(D_period))

# check the same teacher
tchr_period_check_after <- school_data %>%
  filter(D_employee_id == employee_id) %>% 
  group_by(D_employee_id, D_period, C_course_name_final, C_class_id, M_num_periods, M_num_periods_new, M_courses_prepped, M_class_size) %>% 
  summarise(stu_count = n_distinct(D_stu_id))

# check how many teachers this impacts
tchr_period_no_match <- school_data %>% 
  group_by(D_employee_id,
           M_num_periods,
           M_num_periods_new) %>% 
  mutate(match_flag = if_else(M_num_periods == M_num_periods_new,
                               "Match",
                               "No Match")) %>% 
  filter(match_flag == "No Match") %>% 
  group_by(D_employee_id, D_job_title, D_period, C_course_name_final, C_class_id, M_num_periods, M_num_periods_new, M_courses_prepped, M_class_size) %>% 
  summarise(row = n())

cat("Teachers where period count doesn't match: ",
    n_distinct(tchr_period_no_match$D_employee_id), "\n", sep = "")
## Teachers where period count doesn't match: 17

Filter out uncredited courses

# Filter out rows with C_course_subject_area that is Uncredited: ACT Prep, HS Placeholder, College Career Readiness
school_data <- school_data %>%
  filter(C_course_subject_area != "Uncredited" & C_course_subject_area != "EE Principles of Transition")

# Review the job title of teachers assigned to CBL courses
course_data_unified %>% 
  filter(str_detect(C_course_name_clean, "CBL")) %>% 
  group_by(D_job_title) %>% 
  summarise(stu_count = n_distinct(D_stu_id),
            class_count = n_distinct(C_class_id),
            tchr_count = n_distinct(D_employee_id)) %>% 
  arrange(desc(stu_count))
## # A tibble: 14 × 4
##    D_job_title                                 stu_count class_count tchr_count
##    <chr>                                           <int>       <int>      <int>
##  1 Coach College and Career Readiness                 98          15          3
##  2 Teacher CTE Health Sciences                        32           3          1
##  3 Teacher Social Studies History                     22           1          1
##  4 Teacher Business Education                         20           1          1
##  5 Teacher Ex Ed Interventionist                      20           2          2
##  6 Teacher Recovery A Plus Lab                        18           2          1
##  7 Teacher Math                                       14           1          1
##  8 Teacher Ex Ed Comprehensive                        13           6          1
##  9 Teacher Health Physical Education Secondary        11           1          1
## 10 Teacher Arts Drama and Dance                       10           1          1
## 11 Coach Academy                                       8           2          1
## 12 Teacher JAGTN                                       6           1          1
## 13 <NA>                                                3           4          1
## 14 Teacher CTE Business and Marketing                  1           1          1

School names

# create list of school names and ids
school_ids <- school_data %>% 
  group_by(D_location_id,
           D_location_name) %>% 
  summarise(count = n())

# create shorter school name fields
# 1) create mapping table for C_primary_school
school_map_1 <- tribble(
  ~C_primary_school,                      ~C_primary_school_clean,
  "MNPS Antioch High",                    "Antioch",
  "MNPS Cane Ridge High",                 "Cane Ridge",
  "MNPS Glencliff High",                  "Glencliff",
  "MNPS Hillsboro High",                  "Hillsboro",
  "MNPS Hunters Lane High",               "Hunters Lane",
  "MNPS James Lawson High",               "Lawson",
  "MNPS Maplewood High",                  "Maplewood",
  "MNPS McGavock High",                   "McGavock",
  "MNPS Overton High",                    "Overton",
  "MNPS Pearl-Cohn High",                 "Pearl-Cohn",
  "MNPS Stratford STEM Magnet High",      "Stratford STEM",
  "MNPS Whites Creek High",               "Whites Creek")

# 2) create school name table for D_location_name
school_map_2 <- tribble(
  ~D_location_name,                ~D_location_name_clean,
  "Antioch High",                    "Antioch",
  "Cane Ridge High",                 "Cane Ridge",
  "Glencliff High",                  "Glencliff",
  "Hillsboro High",                  "Hillsboro",
  "Hunters Lane High",               "Hunters Lane",
  "James Lawson High",               "Lawson",
  "Maplewood High",                  "Maplewood",
  "McGavock High",                   "McGavock",
  "Overton High",                    "Overton",
  "Pearl-Cohn High",                 "Pearl-Cohn",
  "Stratford STEM Magnet High",      "Stratford STEM",
  "Whites Creek High",               "Whites Creek")

# update school_data with mergable school names
school_data <- school_data %>% 
  left_join(school_map_1, by = "C_primary_school")

school_data <- school_data %>% 
  left_join(school_map_2, by = "D_location_name")

Load pathways offerings by school

# load data
pathway_offerings <- ers_read_sharepoint(
  folder_path = raw_data_folder_path,
  file_name_with_extension =
    "/Sources of Truth/Pathway Offerings 24-25 & 25-26.xlsx")

# remove the 25-26 change details field
pathway_offerings <- pathway_offerings %>% 
  select(-c(C_labor_market_data,
            `C_school_pathway_include_flag_25-26`,
            `C_change_details_25-26`)) %>% 
  rename(C_academy_name = C_school_academy_name,
         C_pathway_name = C_school_pathway_name,
         C_pathway_type = C_school_pathway_type,
         C_pathway_status = "C_school_pathway_status_25-26",
         C_pathway_include_flag = "C_school_pathway_include_flag_24-25")

Load pathway completer data

# create a historical completer weights table based on the rate PER SCHOOL
# load pathway completer data
pathway_completers <- ers_read_sharepoint(
  folder_path = raw_data_folder_path,
  file_name_with_extension = "/Sources of Truth/2024-25 Pathway Completers.xlsx")

# check for duplicate students
dup_stu_ids <- pathway_completers %>% 
  group_by(D_stu_id) %>% 
  mutate(stu_count = n()) %>% 
  filter(stu_count > 1)

# summarize by pathway by school
pathway_completers_summary <- pathway_completers %>% 
  group_by(D_location_id,
           D_location_name,
           C_pathway_name) %>% 
  summarise(
    completer_count = n_distinct(D_stu_id[C_pathway_completion == "YES"]),
    not_completer_count = n_distinct(D_stu_id[C_pathway_completion == "NO"]),
    pathway_count = n_distinct(D_stu_id))

Load pathway course coding

# load pathway course coding data
pathway_course_coding <- ers_read_sharepoint(
  folder_path = raw_data_folder_path,
  file_name_with_extension = "/Sources of Truth/2025 Pathway Course Coding.xlsx")

# count number of duplicate MNPS and state course codes
pathway_course_coding <- pathway_course_coding %>%
  group_by(D_course_id) %>% 
  mutate(mnps_count = n()) %>% 
  group_by(C_pathway_course_id) %>% 
  mutate(state_count = n())

# check: there should only be one row per mnps D_course_id (confirmed!)
pathway_course_coding %>% 
  group_by(mnps_count) %>% 
  summarise(course_count = n())
## # A tibble: 1 × 2
##   mnps_count course_count
##        <int>        <int>
## 1          1          228
# check how many D_course_ids count towards each state_course_id
# e.g., there are 4 state courses where 3 diff mnps D_course_ids count towards it
pathway_course_coding %>%
  group_by(C_pathway_course_id) %>% 
  summarise(mnps_id_count = n_distinct(D_course_id)) %>% 
  group_by(mnps_id_count) %>% 
  summarise(pathway_id_count = n_distinct(C_pathway_course_id))
## # A tibble: 4 × 2
##   mnps_id_count pathway_id_count
##           <int>            <int>
## 1             1              131
## 2             2               39
## 3             3                5
## 4             4                1

Load state and SPOS pathway data

State data

# load TN programs of study data
state_pathways <- ers_read_sharepoint(
  folder_path = raw_data_folder_path,
  file_name_with_extension =
    "/Sources of Truth/2024-25 TN CTE Programs of Study Guide.xlsx")

# trim course names (just to be safe!) and create pathway type flag
state_pathways <- state_pathways %>%
  rename(C_pathway_course_name = C_state_course_name,
         C_pathway_name = C_state_pathway,
         C_pathway_course_id = C_state_course_id,
         C_pathway_year_1 = C_state_course_year_1,
         C_pathway_year_2 = C_state_course_year_2,
         C_pathway_year_3 = C_state_course_year_3,
         C_pathway_year_4 = C_state_course_year_4,
         C_pathway_elective = C_state_course_elective,) %>% 
  mutate(C_pathway_course_name = trimws(C_pathway_course_name),
         C_pathway_name = trimws(C_pathway_name),
         C_pathway_course_id = trimws(C_pathway_course_id),
         C_pathway_type = "TISA")

# calculate number of possible pathways per course
state_pathways_clean <- state_pathways %>% 
  group_by(C_pathway_course_id) %>% 
  mutate(num_pathways = n()) %>%
  # remove notes column
  select(-"C_state_course_notes") %>% 
  # filter away courses with more than 7 possible pathways
  ## see summary of courses below!
  filter(num_pathways <= 7)

# count the number of unique course names and ids
cat("Unique state IDs: ",
    n_distinct(state_pathways_clean$C_pathway_course_id), "\n",
    "Unique state course names: ",
    n_distinct(state_pathways_clean$C_pathway_course_name), "\n", sep = "")
## Unique state IDs: 885
## Unique state course names: 885
# for reference: excluded courses that count towards more than 7 pathways
state_pathways %>% 
  group_by(C_pathway_course_name) %>% 
  summarise(num_pathways = n()) %>%
  filter(num_pathways > 7) %>% 
  arrange(C_pathway_course_name)
## # A tibble: 6 × 2
##   C_pathway_course_name                                  num_pathways
##   <chr>                                                         <int>
## 1 Introduction to Geographical Information Systems (GIS)           13
## 2 JAG TN Course I                                                  57
## 3 JAG TN Course II                                                 57
## 4 JAG TN Course III                                                57
## 5 JAG TN Course IV                                                 57
## 6 Preparing for the ACT, Postsecondary, and Career                 57

SPOS data

# load MNPS SPOS courses
spos_pathways <- ers_read_sharepoint(
  folder_path = raw_data_folder_path,
  file_name_with_extension =
    "/Sources of Truth/2025-26 SPOS Pathway Courses.xlsx")

# trim course names (just to be safe!) and create pathway type flag
spos_pathways <- spos_pathways %>%
  rename(C_pathway_course_id = C_state_course_id,
         C_pathway_year_1 = C_pathway_course_year_1,
         C_pathway_year_2 = C_pathway_course_year_2,
         C_pathway_year_3 = C_pathway_course_year_3,
         C_pathway_year_4 = C_pathway_course_year_4) %>% 
  mutate(C_pathway_course_name = trimws(C_pathway_course_name),
         C_pathway_name = trimws(C_pathway_name),
         C_pathway_course_id = trimws(C_pathway_course_id),
         C_pathway_type = "SPOS")

# calculate number of possible pathways per course
spos_pathways <- spos_pathways %>% 
  group_by(C_pathway_course_id) %>% 
  mutate(num_pathways = 1) %>% 
  select(-c(C_course_subject_area,
            D_course_id,
            D_course_name,
            count,
            C_pathway_course_year))

Merge state and SPOS data

# append state and spos data into one sheet
total_pathways <- bind_rows(
  state_pathways_clean,
  spos_pathways) %>% 
  # create column to code pathway level
  mutate(
    C_pathway_level = case_when(
      C_pathway_year_1 == "X" ~ "Level I",
      C_pathway_year_2 == "X" ~ "Level II",
      C_pathway_year_3 == "X" ~ "Level III",
      C_pathway_year_4 == "X" ~ "Level IV",
      C_pathway_elective == "X" ~ "Elective",
      TRUE ~ NA_character_))
# build column order for final output
cols_order <- c(
  "C_pathway_course_id",
  "C_pathway_course_name",
  "C_pathway_type",
  "num_pathways",
  unlist(lapply(1:7, function(i)
    paste0("pathway_", i, c("_cluster", "_pathway", "_level")))))

# transform data so each state course id has only one row and data for each pathway is transposed into columns
total_pathways_wide <- total_pathways %>%
  mutate(.orig_row = row_number()) %>%
  select(C_pathway_course_id,
         C_pathway_course_name,
         C_pathway_type,
         num_pathways,
         C_state_cluster,
         C_pathway_name,
         C_pathway_level, .orig_row) %>%
  distinct() %>%
  group_by(C_pathway_course_id,
           C_pathway_course_name,
           C_pathway_type,
           num_pathways) %>%
  arrange(C_state_cluster, .orig_row, .by_group = TRUE) %>%
  mutate(pw = row_number()) %>%
  filter(pw <= 7) %>%
  ungroup() %>%
  
  # drop row index so it doesn't become an ID in the pivot
  select(-.orig_row) %>%
  rename(
    cluster     = C_state_cluster,
    pathway     = C_pathway_name,
    level       = C_pathway_level) %>%
  tidyr::pivot_wider(
    id_cols    = c(C_pathway_course_id,
                   C_pathway_course_name,
                   C_pathway_type,
                   num_pathways),
    names_from = pw,
    values_from = c(cluster, pathway, level),
    names_glue = "pathway_{pw}_{.value}") %>%
  
  # ensure exactly one row per course id
  group_by(C_pathway_course_id) %>% slice(1) %>% ungroup() %>%

  # add any missing pathway columns (if some n never appears) and reorder exactly
  { missing <- setdiff(cols_order, names(.))
    if (length(missing) > 0) {
      . <- dplyr::bind_cols(.,tibble::as_tibble(setNames(rep(list(NA), length(missing)), missing))) }
    dplyr::select(., any_of(cols_order)) }

# ensure the wide table truly has 1 row per state course id
cat("Unique state IDs: ",
    n_distinct(total_pathways_wide$C_pathway_course_id), "\n",
    "Unique state course names: ",
    n_distinct(total_pathways_wide$C_pathway_course_name), "\n",
    "Unique state (ID, name) pairs: ",
    nrow(distinct(total_pathways_wide,
                  C_pathway_course_id,
                  C_pathway_course_name)), "\n", sep = "")
## Unique state IDs: 907
## Unique state course names: 907
## Unique state (ID, name) pairs: 907
# export to double-check in excel
#ers_write_sharepoint(total_pathways_wide, 
#                     folder_path = raw_data_folder_path, 
#                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_total_pathways_wide_2025.xlsx")

Merge pathway & state data

# merge state pathway data with one row per course with the MNPS pathway course coding data

# Join: keep all rows from pathway_course_coding; repeat wide cols per match
pathway_course_coding_wide <- pathway_course_coding %>%
  # select columns we want to join
  select(-any_of(c("D_course_name",
                   "C_pathway_course_name",
                   "count",
                   "mnps_count",
                   "state_count"))) %>%
  left_join(
    total_pathways_wide,
    by = "C_pathway_course_id")

# check for any IDs/names that didn't match
## should only be 3 JAG TN courses (confirmed!) - these were filtered out because they have 57 (!!!) possible pathways
### NEXT STEP: check which school(s) offer these courses, can manually assign these to a pathway later
pathway_course_coding %>%
  select(-any_of(c("D_course_name", "count", "mnps_count", "state_count"))) %>%
  anti_join(
    total_pathways_wide,
    by = "C_pathway_course_id") %>% 
  select(-C_course_subject_area)
## # A tibble: 3 × 5
## # Groups:   C_pathway_course_id [3]
##   D_course_id D_course_name_clean C_pathway_course_id C_pathway_course_name
##   <chr>       <chr>               <chr>               <chr>                
## 1 JAG2520S1   JAG TN I            C25H20              JAG TN Course I      
## 2 JAG2521S1   JAG TN II           C25H21              JAG TN Course II     
## 3 JAG2522S1   JAG TN III          C25H22              JAG TN Course III    
## # ℹ 1 more variable: C_course_EPSO_type <chr>
# filter out 3 JAG TN courses
pathway_course_coding_wide <- pathway_course_coding_wide %>% 
  filter(!C_pathway_course_id %in% c("C25H20",
                                     "C25H21",
                                     "C25H22"))


# summary
pathway_course_coding_wide %>% 
  group_by(num_pathways) %>% 
  summarise(course_count = n())
## # A tibble: 6 × 2
##   num_pathways course_count
##          <dbl>        <int>
## 1            1          181
## 2            2           18
## 3            3            9
## 4            4            6
## 5            6            5
## 6            7            6
# number of duplicate D_course_ids
dup_count <- pathway_course_coding_wide %>%
  count(D_course_id) %>%
  filter(n > 1) %>%
  nrow()

cat("Number of duplicate D_course_ids: ", dup_count, "\n", sep = "")
## Number of duplicate D_course_ids: 0
# pathway coure coding long
# create a list of columns from state pathway data
coding_cols <- grep("^pathway_[1-7]_(cluster|pathway|level)$",
                  names(pathway_course_coding_wide), value = TRUE)

# move pathway columns from wide to long so each class has a row for each possible state pathway (e.g., a Accounting I class has a row for pathways Accounting, Banking & Finance, and Business Management)
pathway_course_coding_long <- pathway_course_coding_wide %>%
  pivot_longer(
    cols = all_of(coding_cols),
    names_to = c("which_pathway", "field"),
    names_pattern = "pathway_(\\d+)_(cluster|pathway|level)",
    values_to = "val",
    values_drop_na = TRUE) %>%
    # spread "field" so we get two columns: cluster, pathway (plus which_pathway)
  pivot_wider(names_from = field, values_from = val)

# rename columns
pathway_course_coding_long <- pathway_course_coding_long %>% 
  rename(C_state_cluster = cluster,
         C_pathway_name = pathway,
         C_pathway_level = level)

# export to double-check in excel
#ers_write_sharepoint(pathway_course_coding_long, 
#                     folder_path = raw_data_folder_path, 
#                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_pathway_course_coding_long_2025.xlsx")

Merge with school data

# Join so everything in school_data stays, and if there’s no match in pathway_course_coding_joined for the same D_course_id, you’ll get NA

# drop the colliding columns from the right table
coding_for_join <- pathway_course_coding_wide %>%
  select(-D_course_name_clean, -C_course_subject_area, -C_course_EPSO_type)

# left join: keep all rows in school_data, join on D_course_id, non-matches -> NA
school_data_joined <- school_data %>%
  left_join(coding_for_join, by = "D_course_id")

Calculate pathway metrics

# create final course name for coded pathway courses
school_data_joined <- school_data_joined %>%
  group_by(C_class_id) %>% 
  mutate(C_pathway_course_name_final = str_c(sort(unique(C_pathway_course_name)),
                                       collapse = " / ")) %>% 
  ungroup()

# count students in per course per C_class_id
school_data_joined <- school_data_joined %>%
  group_by(C_class_id,
           D_course_id,
           C_pathway_course_id) %>% 
  mutate(M_stu_per_course = n_distinct(D_stu_id),
         C_state_course_flag = if_else(is.na(C_pathway_course_id),
                                       FALSE,
                                       TRUE)) %>% 
  ungroup()

# count students in pathway, EPSO, and SWD courses
school_data_joined <- school_data_joined %>%
  group_by(C_class_id) %>% 
  mutate(M_stu_per_pw_course =
           n_distinct(D_stu_id[C_state_course_flag == TRUE]),
         M_stu_per_epso_course =
           n_distinct(D_stu_id[C_course_EPSO == TRUE]),
         M_stu_per_swd_course =
           n_distinct(D_stu_id[C_course_swd_flag == TRUE]),
         M_stu_per_eld_course =
           n_distinct(D_stu_id[C_course_eld_flag == TRUE])) %>% 
  ungroup()

# check teachers teaching with 6+ periods
tchr_check_after <- school_data_joined %>% 
  filter(D_employee_id == 143369) %>% 
  group_by(D_period,
           C_class_id,
           C_course_name_final,
           C_pathway_course_name_final,
           C_class_size_exclude,
           M_class_size,
           M_num_periods,
           M_num_periods_new) %>% 
  summarise(stu_count = n_distinct(D_stu_id))

Standard roll-up tables

Check double bookings

# check to see how many classes have a different number of unique students vs. class size after double-bookings have been resolved

db_check <- school_data_joined %>%
  filter(C_class_size_exclude != "Exclude") %>% 
  group_by(C_class_id, C_course_name, C_class_size_exclude) %>%
  summarise(class_M_num_students = mean(M_num_students),
            class_M_class_size = mean(M_class_size)) %>% 
  group_by(C_class_id, C_course_name, C_class_size_exclude, class_M_num_students, class_M_class_size) %>%
  summarise(db_size = class_M_num_students - class_M_class_size)

# create summary
db_check_summary <- db_check %>% 
  group_by(db_size) %>% 
  summarise(count = n())

db_check_summary
## # A tibble: 9 × 2
##   db_size count
##     <dbl> <int>
## 1       0  4488
## 2       1    49
## 3       2    12
## 4       3     3
## 5       6     2
## 6       7     1
## 7       8     2
## 8      10     1
## 9      16     2

Pathway roll-up tables

All Sections Table

# create a list of all state pathways
list_of_pathways <- state_pathways %>% 
  group_by(C_state_cluster, C_pathway_name) %>% 
  summarise(course_count = n_distinct(C_pathway_course_id))

# create roll-up by C_class_id for ALL COURSES
### COME BACK TO: excluding CLASSES that are primarily SWD (e.g., AAD/AL)

all_sections_table <- school_data_joined %>%
  ungroup() %>%
  # count the number of students per C_class_id and pathway
  group_by(C_class_id,
           across(matches("^pathway_[1-7]_(cluster|pathway)$"))) %>% 
  mutate(stu_count_in_pathway = n_distinct(D_stu_id)) %>%
  # filter for sections we want to include
  filter(C_class_size_exclude == "Include",
         C_course_swd_flag != TRUE) %>%
  # count the number of pathway D_course_ids per C_class_id
  group_by(C_class_id) %>% 
  mutate(D_course_id_count = n_distinct(D_course_id),
         C_pathway_course_id_final = str_c(sort(unique(C_pathway_course_id)),
                                       collapse = " / "),
         C_pathway_course_name_final = str_c(sort(unique(C_pathway_course_name)),
                                       collapse = " / ")) %>% 
  arrange(C_class_id, desc(C_course_EPSO_type != "Not EPSO")) %>%
  group_by(D_location_id,
           D_location_name_clean,
           C_class_id,
           D_period,
           M_class_size,
           M_num_students,
           M_stu_per_pw_course,
           M_stu_per_epso_course,
           M_stu_per_swd_course,
           M_stu_per_eld_course,
           D_course_id_count,
           C_course_name_final,
           C_pathway_course_id_final,
           C_pathway_course_name_final,
#           C_course_EPSO_type,
           C_primary_school_clean,
           D_employee_id,
           D_job_title,
           M_num_periods,
           M_num_periods_new,
           M_courses_prepped,
           C_teacher_load_exclude,
           C_teachers_to_exclude,
           num_pathways,
           across(matches("^pathway_[1-7]_(cluster|pathway)$"))) %>%
  summarise(row_count = n()) %>% 
  # create flag for duplicate class ids
  mutate(class_count = 1)

# print summary before filter
cat("\nSummary before:\n")
## 
## Summary before:
cat("Rows: ",
    sum(all_sections_table$class_count), "\n",
    "Unique class ids: ",
    n_distinct(all_sections_table$C_class_id), "\n", sep = "")
## Rows: 4226
## Unique class ids: 4222
# Create class ID count flag
all_sections_table <- all_sections_table %>% 
  group_by(C_class_id) %>% 
  mutate(sum_row = sum(class_count)) %>% 
  ungroup() %>% 
  mutate(filter_flag = if_else(sum_row > 1 & is.na(num_pathways),
                               "Exclude",
                               "Include"))

# Filter for duplicates to review
dup_class_ids_check <- all_sections_table %>% 
  ungroup() %>% 
  filter(sum_row == 2)

# Filter away class_ids with duplicates
all_sections_table <- all_sections_table %>% 
  filter(filter_flag != "Exclude")

# Filter away consolidated class_ids where the pathway data 
all_sections_table <- all_sections_table %>%
  group_by(C_class_id) %>%
  mutate(max_row_count = max(row_count, na.rm = TRUE)) %>%
  filter(!(sum_row == 2 & !is.na(row_count) & row_count < max_row_count)) %>%
  ungroup() %>%
  select(-max_row_count)

# filter summary after filter
cat("\nSummary after:\n")
## 
## Summary after:
cat("Rows: ",
    sum(all_sections_table$class_count), "\n",
    "Unique class ids: ",
    n_distinct(all_sections_table$C_class_id), "\n", sep = "")
## Rows: 4222
## Unique class ids: 4222
# remove added columns
all_sections_table <- all_sections_table %>% 
  select(-c(row_count, sum_row, class_count, filter_flag))

# export to double-check in excel
#ers_write_sharepoint(all_sections_table, 
#                     folder_path = raw_data_folder_path, 
#                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_all_sections_table_2025.xlsx")

Checks

# check the number of unique class ids (should match the number of rows in the df)
cat("Unique class ids: ",
    n_distinct(all_sections_table$C_class_id), "\n",
    "Unique teachers: ",
    n_distinct(all_sections_table$D_employee_id), "\n",
    "Unique teachers to include: ",
    n_distinct(all_sections_table$D_employee_id[
      all_sections_table$C_teachers_to_exclude == 0]), "\n",
    "Unique teachers-school pairs: ",
    n_distinct(data.frame(
      all_sections_table$C_primary_school_clean[
        all_sections_table$C_teachers_to_exclude == 0],
      all_sections_table$D_employee_id[
        all_sections_table$C_teachers_to_exclude == 0])), "\n",
    "Unique teachers of pathway courses: ",
    n_distinct(data.frame(
      all_sections_table$D_employee_id[
        all_sections_table$M_stu_per_pw_course > 0])), "\n",
    sep = "")
## Unique class ids: 4222
## Unique teachers: 880
## Unique teachers to include: 878
## Unique teachers-school pairs: 878
## Unique teachers of pathway courses: 205
# check for duplicate C_class_ids
### COME BACK TO THIS! Currently *1 DUPLICATE* to resolve
dup_class_ids <- all_sections_table %>% 
  mutate(row_count = 1) %>% 
  group_by(C_class_id) %>% 
  mutate(sum_row = sum(row_count)) %>%
  filter(sum_row > 1)

# check to see how many classes have a different number of unique students vs. class size after double-bookings have been resolved
# One row per class, then compute the diff
db_check_2 <- all_sections_table %>%
  group_by(C_class_id) %>%
  summarise(
    C_course_name_final   = first(C_course_name_final),
    class_M_num_students  = mean(M_num_students, na.rm = TRUE),
    class_M_class_size    = mean(M_class_size, na.rm = TRUE),
    .groups = "drop") %>%
  mutate(db_size = class_M_num_students - class_M_class_size)

# Tally the differences
db_check_summary <- db_check_2 %>%
  count(db_size, name = "count") %>%
  arrange(db_size)

db_check_summary
## # A tibble: 8 × 2
##   db_size count
##     <dbl> <int>
## 1       0  4191
## 2       1    21
## 3       2     4
## 4       3     1
## 5       7     1
## 6       8     1
## 7      10     1
## 8      16     2

All Sections Table - Long

# create a list of columns from state pathway data
pathway_cols <- grep("^pathway_[1-7]_(cluster|pathway)$",
                  names(all_sections_table), value = TRUE)

# filter away non-pathway courses to join back in later
all_sections_non_pw <- all_sections_table %>% 
  filter(is.na(num_pathways))

all_sections_pw <- all_sections_table %>% 
  filter(!is.na(num_pathways))

# move pathway columns from wide to long so each class has a row for each possible state pathway (e.g., a Accounting I class has a row for pathways Accounting, Banking & Finance, and Business Management)
all_sections_pw_long <- all_sections_pw %>%
  pivot_longer(
    cols = matches("^pathway_\\d+_(cluster|pathway)$"),
    names_to = c("which_pathway", "field"),
    names_pattern = "pathway_(\\d+)_(cluster|pathway)",
    values_to = "val",
    values_drop_na = TRUE) %>%
  pivot_wider(
    names_from = field,
    values_from = val) %>%
  rename(
    C_state_cluster = cluster,
    C_pathway_name  = pathway)

wide_cols_rx <- "^pathway_\\d+_(cluster|pathway)$"

# remove old wide pathway cols from the non-PW slice, then pad long-only cols
all_sections_non_pw_padded <- all_sections_non_pw %>%
  select(-matches(wide_cols_rx)) %>%
  mutate(
    which_pathway   = NA_character_,
    C_state_cluster = NA_character_,
    C_pathway_name  = NA_character_)

# unify columns and stack
common_cols <- union(names(all_sections_pw_long),
                     names(all_sections_non_pw_padded))

all_sections_table_long <- bind_rows(
  all_sections_pw_long %>% select(any_of(common_cols)),
  all_sections_non_pw_padded %>% select(any_of(common_cols))) %>%
  # drop any columns that are entirely NA (including any leftover wide cols)
  select(where(~ !all(is.na(.))))

Completer summary

# join 1) academies_and_pathways (-> school_list), 2) pathway_completers_summary, and 3) stu_per_pw_per_schl

# figure out which school-pathway combos are in each dataframe
keys <- c("D_location_id", "C_pathway_name")

# 1) Normalize column names and de-dup
a <- pathway_offerings %>%
  distinct(D_location_id, C_pathway_name)

b <- pathway_completers_summary %>%
  group_by(D_location_id, C_pathway_name) %>% 
  summarise(n = n()) %>% 
  distinct(D_location_id, C_pathway_name)

c <- all_sections_table_long %>%
  group_by(D_location_id, C_pathway_name) %>% 
  summarise(n = n()) %>% 
  distinct(D_location_id, C_pathway_name)

d <- total_pathways %>% 
  group_by(C_pathway_name, C_pathway_type) %>% 
  summarise(n = n()) %>% 
  distinct(C_pathway_name, C_pathway_type)

# 2) Audit with three presence flags
pathway_completer_audit <- full_join(mutate(a, in_school_list = TRUE),
                                     mutate(b, in_completers  = TRUE),
                                     by = keys) %>%
  full_join(mutate(c, in_students = TRUE), by = keys) %>%
  mutate(
    in_school_list = coalesce(in_school_list, FALSE),
    in_completers  = coalesce(in_completers,  FALSE),
    in_students    = coalesce(in_students,    FALSE),
    status = case_when(
      in_school_list & in_completers & in_students ~ "all_three",
      in_school_list & in_completers               ~ "school+completers",
      in_school_list & in_students                 ~ "school+students",
      in_completers  & in_students                 ~ "completers+students",
      in_school_list                               ~ "only_school_list",
      in_completers                                ~ "only_completer_data",
      in_students                                  ~ "only_student_data")) %>%
  arrange(D_location_id, status)

# quick counts
pathway_completer_audit %>%
  group_by(status) %>%
  summarise(pw_count = n(), .groups = "drop")
## # A tibble: 7 × 2
##   status              pw_count
##   <chr>                  <int>
## 1 all_three                 89
## 2 completers+students        5
## 3 only_completer_data       17
## 4 only_school_list           6
## 5 only_student_data        212
## 6 school+completers          3
## 7 school+students            8
# 6) merge in completer count per pathway per school
pathway_completer_audit <- pathway_completer_audit %>% 
  left_join(pathway_completers_summary, by = c("D_location_id",
                                               "C_pathway_name"))

Pathway weights

Merge in completer data
# select columns to merge
pathway_completer_merge <- pathway_completer_audit %>% 
  select(D_location_id,
         C_pathway_name,
         audit_status = status,
         completer_count,
         not_completer_count)

# merge
all_sections_table_long <- all_sections_table_long %>% 
  left_join(pathway_completer_merge, by = c("D_location_id",
                                            "C_pathway_name")) %>% 
  tidyr::replace_na(list(completer_count = 0))
Merge in pathway offerings data
# join in other info from Pathway Offerings 2024-25 & 2025-26
all_sections_table_long <- all_sections_table_long %>%
  left_join(
    pathway_offerings,
    by = c("D_location_id", "C_pathway_name")) %>% 
  # create flag from source of truth: Pathway Offerings 2024-25
  mutate(pw_offered = if_else(C_pathway_include_flag == "Include",
                              1,
                              0))
Calculate weights per pathway per class
# calculate weights based on:
# 1) completer counts by pathway/school
# 2) our Source of Truth: Pathway Offerings list by pathway/school

all_sections_table_long <- all_sections_table_long %>%
  mutate(
    # 1. completer_count:
    # Fill in missing values (NA) with 0 so that all rows have a valid number.
    # Without this, sums and weights could break when NAs appear.
    completer_count = coalesce(completer_count, 0),
    # 2. not_completer_count:
    not_completer_count = coalesce(not_completer_count, 0),
    # 3. pw_offered:
    # Convert the school-pathway "offered" flag into a clean 0/1 column.
    # If pw_offered is missing (NA), treat it as 0 = not offered.
    pw_offered      = coalesce(pw_offered, 0L),
    # 4. pw_include:
    # A logical (TRUE/FALSE) flag for whether a row should be included in
    # weight calculations. For now, this is the same as pw_offered,
    # but could update it (e.g., also check audit_status).
    pw_include        = (pw_offered == 1L)) %>%
  
  group_by(C_class_id) %>%
  mutate(
    # 1. num_pw_offered:
    # Count how many pathways are actually "included" (pw_include == TRUE)
    # for this class across the school. This gives the number of valid
    # pathways to consider when splitting students.
    num_pw_offered = sum(pw_include, na.rm = TRUE),

    # 2. cc_included:
    # Keep completer_count only for included pathways, set to 0 otherwise.
    # This way, only eligible pathways contribute completer data.
    cc_included     = if_else(pw_include, completer_count, 0),
    
    # 3. sum_cc_included:
    # Total number of completers across all included pathways
    # for this class. Drives proportional weighting.
    sum_cc_included = sum(cc_included, na.rm = TRUE),

    # 4. raw_weight:
    # Initial weighting before normalization.
    raw_weight = case_when(
      # If no pathways are offered → 0
      num_pw_offered == 0 ~ 0,
      # If exactly 1 pathway offered → give it all the weight
      num_pw_offered == 1 ~ if_else(pw_include, 1, 0),
      # If completers exist → proportional by completer_count
      sum_cc_included > 0 ~ cc_included,
      # If no completers but >1 offered → equal split among included
      TRUE ~ if_else(pw_include, 1, 0)),

    # 5. denom:
    # Sum of raw weights across included pathways.
    # Used to normalize into final weights.
    denom  = sum(raw_weight, na.rm = TRUE),
    
    # 6. M_pw_weight:
    # Final normalized weight for this row (between 0 and 1).
    # Guarantees weights across a class sum to 1.
    M_pw_weight = if_else(denom > 0, raw_weight / denom, 0),
    
    # 7. M_pw_stu_count:
    # Apply the weight to the class’s total student count.
    # This produces the allocated number of students for this pathway.
    M_pw_stu_count = M_num_students * M_pw_weight) %>%
  ungroup() %>%
  
  # 8. M_pw_include:
  # Final include/exclude flag for each class–pathway row.
  # Logic:
  #   - "Include" if pathway is in our Pathway Offerings for that school
  #   - AND it received a positive student allocation (M_pw_stu_count > 0).
  #   - Otherwise "Exclude" (either not in Pathway Offerings or got 0 students).
  mutate(M_pw_include = if_else(
    pw_include & M_pw_stu_count > 0,
    "Include",
    "Exclude"))
Checks
  • Multi-pathway (offered) classes with no completers among offered pathways
    • Ideally: an EMPTY result (no such classes)

    • If large counts show up: likely data entry issues (missing completer audits) or a true signal that these courses have no completers yet.

# multi-pathway (offered) classes with no completers among offered pathways
all_sections_table_long %>%
  filter(num_pw_offered > 1,          # multiple offered pathways
         sum_cc_included == 0) %>%    # no completers among offered
  group_by(C_pathway_course_name_final) %>%
  summarise(class_count = n_distinct(C_class_id), .groups = "drop") %>%
  arrange(desc(class_count))
## # A tibble: 0 × 2
## # ℹ 2 variables: C_pathway_course_name_final <chr>, class_count <int>
  • Check multi-pathway courses by Pathway Offerings flag
    • For pw_offered == 1:
      • Most classes should land in “Include” after allocation
      • Some “Exclude” may occur (e.g., all weight legitimately went elsewhere), but should be rare
    • For pw_offered == 0:
      • All rows should be “Exclude” (not offered → not included).
      • If you see many pw_offered == 1 with “Exclude”: re-check weighting rules and inputs.
# check which multi-pathway courses by Pathway Offerings flag 
all_sections_table_long %>%
  group_by(M_pw_include, pw_offered) %>%
  summarise(class_count = n_distinct(C_class_id), .groups = "drop") %>%
  arrange(pw_offered, M_pw_include)
## # A tibble: 3 × 3
##   M_pw_include pw_offered class_count
##   <chr>             <dbl>       <int>
## 1 Exclude               0        3846
## 2 Exclude               1          17
## 3 Include               1         504
# 1) Top pathway/weight per class computed on ALL rows (no filter!)
top_by_class <- all_sections_table_long %>%
  arrange(C_class_id, C_pathway_name) %>%                     # tie-breaker: name
  group_by(C_class_id) %>%
  slice_max(M_pw_weight, with_ties = FALSE) %>%               # pick the single max
  ungroup() %>%
  transmute(
    C_class_id,
    top_pathway = C_pathway_name,
    top_weight  = M_pw_weight,
    top_completer_count = completer_count)

# 2) Reasons table: filter AFTER, then join the top info
reasons <- all_sections_table_long %>%
  filter(
    pw_offered == 1,
    M_pw_include == "Exclude") %>%
  left_join(top_by_class, by = "C_class_id") %>%
  mutate(
    reason = dplyr::case_when(
      num_pw_offered == 1 ~
        "BUG: single offered should get all students",
      audit_status ==
        "only_student_data" ~ "Excluded by audit_status",
      sum_cc_included > 0 & completer_count == 0 ~
        "Proportional: other pathways had completers",
      sum_cc_included == 0 & num_pw_offered > 1 ~
        "Equal split: weight went to other offered row(s)",
      TRUE ~ "Other")) %>%
  group_by(
    D_location_name_clean,
    C_pathway_name,
    num_pw_offered,
    audit_status,
    completer_count,
    not_completer_count,     # your new column
    sum_cc_included,
    top_pathway,             # from the full-class calc
    top_weight,              # from the full-class calc
    top_completer_count,
    reason) %>%
  summarise(class_count = n_distinct(C_class_id)) %>% 
  select(D_location_name_clean, C_pathway_name, class_count, everything())
  • Do weights over included rows sum to 1 when there is at least one offered pathway?
    • A single row: ok == TRUE with count equal to the total number of classes (from All Sections Table -> Checks from earlier!)
    • Any ok == FALSE indicates a normalization bug (weights didn’t sum to 1 when they should).
# Per-class: do weights over included rows sum to 1 when there is at least one offered pathway?
all_sections_table_long %>%
  group_by(C_class_id) %>%
  summarise(
    num_pw_offered = max(num_pw_offered, na.rm = TRUE),
    wt_sum_included = sum(M_pw_weight[pw_include], na.rm = TRUE)
  ) %>%
  mutate(ok = (num_pw_offered == 0 & wt_sum_included == 0) |
              (num_pw_offered > 0  & abs(wt_sum_included - 1) < 1e-9)) %>%
  count(ok)
## # A tibble: 1 × 2
##   ok        n
##   <lgl> <int>
## 1 TRUE   4222
  • Per-class: ensure at least one offered row got students
    • For num_pw_offered == 0: all FALSE (no possible pathways offered = nothing to include).
    • For num_pw_offered > 0: nearly all rows should be any_included == TRUE.
    • If many FALSEs here, investigate: offered pathways might all be getting zero allocation.
# Per-class: ensure at least one offered row got students
all_sections_table_long %>%
  group_by(C_class_id) %>%
  summarise(
    num_pw_offered = max(num_pw_offered, na.rm = TRUE),
    any_included = any(M_pw_include == "Include")) %>%
  count(num_pw_offered, any_included) %>%
  arrange(num_pw_offered, desc(any_included))
## # A tibble: 5 × 3
##   num_pw_offered any_included     n
##            <int> <lgl>        <int>
## 1              0 FALSE         3718
## 2              1 TRUE           446
## 3              2 TRUE            50
## 4              3 TRUE             5
## 5              4 TRUE             3
  • Per-class: ensure at least one offered pathway received students
    • For each C_class_id, confirms whether any pathway-school combo from our Pathway Offerings (pw_offered == 1) ended up included (M_pw_include == "Include")

    • num_pw_offered == 0 → all FALSE (nothing offered, nothing to include).

    • num_pw_offered == 1 → all TRUE (the single offered pathway should get all students).

    • num_pw_offered > 1 → nearly all TRUE (at least one offered pathway should receive students).

    • Note on findings: the 17 classes where offered pathways (included == TRUE) did not receive any students (excluded == TRUE), this aligns with our check from earlier: the other offered pathway had completers so the full weight went to the other pathway. This is likely because those are newly offered pathways at those schools.

# 
all_sections_table_long %>%
  group_by(C_class_id) %>%
  summarise(
    num_pw_offered   = max(num_pw_offered, na.rm = TRUE),
    num_included     = sum(M_pw_include == "Include", na.rm = TRUE),
    num_excluded     = sum(pw_offered == 1 & M_pw_include == "Exclude",
                           na.rm = TRUE), .groups = "drop") %>%
  mutate(
    any_included = num_included > 0,
    any_excluded = num_excluded > 0) %>%
  count(num_pw_offered, any_included, any_excluded) %>%
  arrange(num_pw_offered, desc(any_included), desc(any_excluded))
## # A tibble: 6 × 4
##   num_pw_offered any_included any_excluded     n
##            <int> <lgl>        <lgl>        <int>
## 1              0 FALSE        FALSE         3718
## 2              1 TRUE         FALSE          446
## 3              2 TRUE         TRUE            17
## 4              2 TRUE         FALSE           33
## 5              3 TRUE         FALSE            5
## 6              4 TRUE         FALSE            3
  • List of pathways included vs. excluded per school
    • Create a list to export and double-check against source of truth
pathway_offerings_check <- all_sections_table_long %>% 
  group_by(D_location_id,
           D_location_name_clean,
           C_pathway_name,
           C_pathway_include_flag) %>% 
  summarise(M_stu_per_pw_course = sum(M_stu_per_pw_course),
            class_count = n_distinct(C_class_id),
            M_pw_weight = sum(M_pw_weight),
            M_pw_stu_count = sum(M_pw_stu_count))

Teacher FTE per pathway

# --- 1) Collapse to TEACHER × CLASS (one row per period slot) ---
# Each C_class_id represents a single period. We compute how much of that period
# is counted toward pathways (summing only included rows). This will be 0 or 1.
teacher_class_rollup <- all_sections_table_long %>%
  group_by(D_employee_id, C_class_id) %>%
  summarise(
    # 1 if this class contributes to any included pathway; otherwise 0
    period_equiv_pw = sum(M_pw_weight[pw_include], na.rm = TRUE),
    # sanity: cap at 1 (in case of edge cases)
    period_equiv_pw = pmin(period_equiv_pw, 1),
    # each class id = one period slot
    period_equiv_all = 1,
    # teacher-level period count (repeated in raw; take max/first)
    M_num_periods_teacher = max(M_num_periods, na.rm = TRUE),
    .groups = "drop")

# --- 2) TEACHER totals (periods and FTE) ---
teacher_totals <- teacher_class_rollup %>%
  group_by(D_employee_id) %>%
  summarise(
    # total periods taught from class ids
    total_periods_from_classes = sum(period_equiv_all, na.rm = TRUE),
    # total *pathway* periods (only included)
    total_pw_periods = sum(period_equiv_pw, na.rm = TRUE),

    # FTEs (1.0 FTE = 5 periods)
    M_total_fte    = total_periods_from_classes / 5,
    M_total_pw_fte = total_pw_periods / 5,

    # optional audit: teacher-reported periods (from the repeated column)
    M_num_periods_reported = max(M_num_periods_teacher, na.rm = TRUE),
    M_total_fte_reported  = M_num_periods_reported / 5,
    .groups = "drop")

# --- 3) TEACHER × PATHWAY FTE (uses rows that were allocated students) ---
teacher_pathway_fte <- all_sections_table_long %>%
  filter(M_pw_include != "Exclude") %>%
  group_by(D_employee_id, C_pathway_name) %>%
  summarise(
    M_sections_per_pw = sum(M_pw_weight, na.rm = TRUE), # period-equivalents
    M_fte_per_pw      = M_sections_per_pw / 5,
    .groups = "drop")

# --- 4) Merge back into the main section ---
all_sections_table_long <- all_sections_table_long %>%
  left_join(teacher_totals,      by = "D_employee_id") %>%
  left_join(teacher_pathway_fte, by = c("D_employee_id","C_pathway_name")) %>%
  mutate(M_sections_per_pw = coalesce(M_sections_per_pw, 0),
         M_fte_per_pw      = coalesce(M_fte_per_pw, 0))
Checks
# A) Per teacher: sum of pathway FTEs equals teacher pathway FTE total
check_sum_pw_fte <- teacher_pathway_fte %>%
  group_by(D_employee_id) %>%
  summarise(sum_pathway_fte = sum(M_fte_per_pw, na.rm = TRUE),
            .groups = "drop") %>%
  left_join(select(teacher_totals, D_employee_id, M_total_pw_fte),
            by = "D_employee_id") %>%
  mutate(ok = abs(sum_pathway_fte - M_total_pw_fte) < 1e-9)

check_sum_pw_fte %>% 
  group_by(ok) %>% 
  summarise(tchr_count = n_distinct(D_employee_id))
## # A tibble: 1 × 2
##   ok    tchr_count
##   <lgl>      <int>
## 1 TRUE         116
# 1) Teachers who actually show up in the pathway analysis (allocated students)
pathway_teachers <- all_sections_table_long %>%
  filter(M_pw_include == "Include") %>%
  distinct(D_employee_id)

# 2) Build mismatches directly from teacher_totals, limited to pathway teachers
mismatch_in_pathways <- teacher_totals %>%
  select(D_employee_id, total_periods_from_classes, M_num_periods_reported) %>%
  semi_join(pathway_teachers, by = "D_employee_id") %>%
  mutate(
    # diff > 0 → class rows show MORE periods than reported (duplicates/splits?)
    # diff < 0 → reported periods exceed class rows (advisory/duty/prep missing?)
    diff = total_periods_from_classes - M_num_periods_reported) %>%
  filter(!is.na(diff) & diff != 0) %>%
  arrange(diff)

# 3) Summary: how many pathway teachers by diff size
cat("Teachers of included pathway courses with mismatched period counts: ",
    n_distinct(mismatch_in_pathways$D_employee_id), "\n", sep = "")
## Teachers of included pathway courses with mismatched period counts: 7
mismatch_in_pathways %>%
  count(diff, name = "teacher_count") %>%
  arrange(diff)
## # A tibble: 4 × 2
##    diff teacher_count
##   <dbl>         <int>
## 1    -3             2
## 2    -2             1
## 3    -1             3
## 4     4             1
# 4) Teacher drilldown rows for further inspection
mismatch_in_pathways_check <- all_sections_table_long %>%
  filter(D_employee_id %in% mismatch_in_pathways$D_employee_id)
# 1) Teachers originally flagged as teachers of pathway courses
orig_pathway_teachers <- all_sections_table %>%
  ungroup() %>% 
  filter(M_stu_per_pw_course > 0) %>%
  distinct(D_employee_id)

# 2) Teachers that remain after school-pathway weighting and coding
weighted_pathway_teachers <- all_sections_table_long %>%
  filter(M_pw_include == "Include") %>%
  distinct(D_employee_id)

# 3) Find the teachers who were dropped based on our school-pathway coding
dropped_teachers <- orig_pathway_teachers %>%
  anti_join(weighted_pathway_teachers, by = "D_employee_id")

# 4) Build a diagnostic dataframe for those 65
dropped_pathway_teachers <- all_sections_table_long %>%
  semi_join(dropped_teachers, by = "D_employee_id") %>%
  filter(M_stu_per_pw_course != 0) %>%
  select(D_employee_id,
         D_location_id,
         D_location_name_clean,
         C_primary_school_clean,
         D_job_title,
         C_class_id,
         C_course_name_final,
         C_pathway_course_name_final,
         M_stu_per_pw_course,
         num_pathways,
         which_pathway,
         C_pathway_name,
         pw_offered,
         M_pw_weight,
         M_pw_stu_count) %>%
  arrange(
    case_when(
      grepl("CTE", D_job_title, ignore.case = TRUE)  ~ 1,
      grepl("ROTC", D_job_title, ignore.case = TRUE) ~ 2,
      TRUE                                           ~ 3),
    D_job_title)

# 5) Print summary
cat("# pathway teachers before pathway weighting: ",
    n_distinct(orig_pathway_teachers$D_employee_id), "\n",
    "# pathway teachers after pathway weighting: ",
    n_distinct(weighted_pathway_teachers$D_employee_id), "\n", sep = "")
## # pathway teachers before pathway weighting: 205
## # pathway teachers after pathway weighting: 116

Student:FTE ratios

# Helper: aggregate then compute ratio safely
calc_stu_fte_ratio <- function(df, group_vars, extra_cols = NULL) {
  df %>%
    group_by(across(all_of(group_vars))) %>%
    summarise(
      total_students = sum(M_pw_stu_count, na.rm = TRUE),
      total_fte      = sum(M_pw_weight, na.rm = TRUE) / 5,
      .groups = "drop"
    ) %>%
    mutate(
      stu_fte_ratio = if_else(total_fte > 0, total_students / total_fte, NA_real_)
    ) %>%
    relocate(any_of(extra_cols), .after = 1)
}

# Base: allocated-only rows
base_alloc <- all_sections_table_long %>%
  filter(M_pw_include == "Include",
         M_courses_prepped < 10)

# 1) Pathway
by_pathway <- calc_stu_fte_ratio(
  base_alloc,
  group_vars = c("C_pathway_name"),
  extra_cols = NULL
) %>% mutate(level = "pathway")

# 2) School–Pathway (include school name if helpful)
by_school_pathway <- calc_stu_fte_ratio(
  base_alloc,
  group_vars = c("D_location_id", "D_location_name_clean", "C_pathway_name"),
  extra_cols = c("D_location_name_clean")
) %>% mutate(level = "school_pathway")

# 3) Cluster
by_cluster <- calc_stu_fte_ratio(
  base_alloc,
  group_vars = c("C_state_cluster"),
  extra_cols = NULL
) %>% mutate(level = "cluster")

# 4) School–Cluster
by_school_cluster <- calc_stu_fte_ratio(
  base_alloc,
  group_vars = c("D_location_id", "D_location_name_clean", "C_state_cluster"),
  extra_cols = c("D_location_name_clean")
) %>% mutate(level = "school_cluster")

# Stack all levels into one tidy table
stu_fte_all_levels <- bind_rows(
  by_pathway,
  by_school_pathway,
  by_cluster,
  by_school_cluster
) %>%
  # nice ordering: level then keys
  arrange(level, across(where(is.character)))

Files for export

Organize columns

# FORCE STOP
#stop("Force stop here — nothing below this line will run.")

# get column names
columns <- data.frame(column_names = colnames(all_sections_table_long))

# change column order
all_sections_table_final <- all_sections_table_long %>% 
  select(
    D_location_id,
    D_location_name_clean,
    C_class_id,
    D_period,
    M_class_size,
    M_num_students,
    M_stu_per_pw_course,
    M_stu_per_epso_course,
    M_stu_per_swd_course,
    M_stu_per_eld_course,
    C_course_name_final,
    C_pathway_course_id_final,
    C_pathway_course_name_final,
    C_primary_school_clean,
    D_employee_id,
    D_job_title,
    M_num_periods,
    M_num_periods_new,
    M_courses_prepped,
    C_teacher_load_exclude,
    C_teachers_to_exclude,
    num_pathways,
    C_state_cluster,
    C_pathway_name,
    audit_status,
    completer_count,
    not_completer_count,
    pw_offered,
    pw_include,
    num_pw_offered,
    cc_included,
    sum_cc_included,
    M_pw_weight,
    M_pw_stu_count,
    M_pw_include,
    total_periods_from_classes,
    total_pw_periods,
    M_total_fte,
    M_total_pw_fte,
    M_num_periods_reported,
    M_total_fte_reported,
    M_sections_per_pw,
    M_fte_per_pw)

Export

# export to double-check in excel
ers_write_sharepoint(pathway_course_coding_wide, 
                     folder_path = raw_data_folder_path, 
                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_pathway_course_coding_wide_2025.xlsx")
## [1] "Uploaded /Course Schedule Analysis/00. Testing/01_pathway_course_coding_wide_2025.xlsx to: 'District Partners/Nashville/Nashville - HS Pathways - 2025/1. Data & Analysis - Secure'"
# export to excel
ers_write_sharepoint(all_sections_table_final, 
                     folder_path = raw_data_folder_path, 
                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_all_sections_table_final_2025.xlsx")
## [1] "Uploaded /Course Schedule Analysis/00. Testing/01_all_sections_table_final_2025.xlsx to: 'District Partners/Nashville/Nashville - HS Pathways - 2025/1. Data & Analysis - Secure'"
ers_write_sharepoint(dropped_pathway_teachers, 
                     folder_path = raw_data_folder_path, 
                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_dropped_pathway_teachers_2025.xlsx")
## [1] "Uploaded /Course Schedule Analysis/00. Testing/01_dropped_pathway_teachers_2025.xlsx to: 'District Partners/Nashville/Nashville - HS Pathways - 2025/1. Data & Analysis - Secure'"
ers_write_sharepoint(school_rollup_table, 
                     folder_path = raw_data_folder_path, 
                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_school_rollup_table_2025.xlsx")
## [1] "Uploaded /Course Schedule Analysis/00. Testing/01_school_rollup_table_2025.xlsx to: 'District Partners/Nashville/Nashville - HS Pathways - 2025/1. Data & Analysis - Secure'"
ers_write_sharepoint(stu_fte_all_levels, 
                     folder_path = raw_data_folder_path, 
                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_stu_fte_all_levels_2025.xlsx")
## [1] "Uploaded /Course Schedule Analysis/00. Testing/01_stu_fte_all_levels_2025.xlsx to: 'District Partners/Nashville/Nashville - HS Pathways - 2025/1. Data & Analysis - Secure'"
ers_write_sharepoint(pathway_offerings_check, 
                     folder_path = raw_data_folder_path, 
                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_pathway_offerings_check_2025.xlsx")
## [1] "Uploaded /Course Schedule Analysis/00. Testing/01_pathway_offerings_check_2025.xlsx to: 'District Partners/Nashville/Nashville - HS Pathways - 2025/1. Data & Analysis - Secure'"
ers_write_sharepoint(sections_rollup_table, 
                     folder_path = raw_data_folder_path, 
                     file_name_with_extension = "/Course Schedule Analysis/00. Testing/01_sections_rollup_table_2025.xlsx")
## [1] "Uploaded /Course Schedule Analysis/00. Testing/01_sections_rollup_table_2025.xlsx to: 'District Partners/Nashville/Nashville - HS Pathways - 2025/1. Data & Analysis - Secure'"