9.2_Special_Education

Loading in Data

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(tidyr)

Data was downloaded from: https://rptsvr1.tea.texas.gov/adhocrpt/adste.html as Student Program and Special Populations Reports.

locations <- read.csv("../../location_countyNames copy.csv")
student_programs <- read.csv("../StudPgmStateCounty24state.csv")

head(student_programs)

  SCOPE AGGREGATION.LEVEL      YEAR      COUNTY.NAME REGION ALL.ENROLLMENT
1 STATE      COUNTYREGION 2023-2024  ANDERSON COUNTY      7           7858
2 STATE      COUNTYREGION 2023-2024   ANDREWS COUNTY     18           4269
3 STATE      COUNTYREGION 2023-2024  ANGELINA COUNTY      7          15871
4 STATE      COUNTYREGION 2023-2024   ARANSAS COUNTY      2           2968
5 STATE      COUNTYREGION 2023-2024    ARCHER COUNTY      9           2149
6 STATE      COUNTYREGION 2023-2024 ARMSTRONG COUNTY     16            319
  AT.RISK BILINGUAL DYSLEXIC ECONOMICALLY.DISADVANTAGED
1    3440        79      412                       5439
2    2117       484      271                       2112
3    8503       445     1261                      11411
4    1910         0      264                       1808
5     583         0      165                        709
6      89         0       17                         83
  EMERGENT.BILINGUAL.ENGLISH.LEARNER  ESL FOSTER.CARE GIFTED...TALENTED
1                                776  256          35               337
2                                764  308        -999               293
3                               2205  908          75              1377
4                                180  179          25               141
5                                 52   57        -999               157
6                               -999 -999           0                23
  HOMELESS MILITARY.CONNECTED SECTION.504 SPECIAL.EDUCATION TITLE.I
1       85                285         593              1224    7454
2       64                119         395               594    4222
3      266                329        1555              2416   12286
4      229                 21         290               449    2968
5       23                102         236               286    1057
6     -999                 24          17                40     319

normalize_county_name <- function(name) {
  name <- str_to_title(name)
  if (name == "Dewitt") return("DeWitt")
  if (name == "Mcculloch") return("McCulloch")
  if (name == "Mclennan") return("McLennan")
  if (name == "Mcmullen") return("McMullen")
  return(name)
}

make_data_row <- function(county_name, time_frame) {
   missing_row <-  data.frame(
    Location = county_name,
    Nums = NA,
    Pct = NA,
    TimeFrame = time_frame
  )
  
   return(missing_row)
  
}

Cleaning data

Noticed that there are values of -999.00 in data and per previous documentation, advised by TEA to make it 2.5, so will be part of cleaning process.

clean_start <- student_programs |>
  mutate(across(-COUNTY.NAME, ~ ifelse(.x == -999.00, 2.5, .x))) |>
  select(COUNTY.NAME, SPECIAL.EDUCATION, ALL.ENROLLMENT)

cleaned_data <- clean_start |>
  mutate(COUNTY.NAME = gsub(" County", "", str_to_title(COUNTY.NAME))) |>
  mutate(COUNTY.NAME = gsub(" ", "", COUNTY.NAME)) |>
  mutate(COUNTY.NAME = sapply(COUNTY.NAME, normalize_county_name)) |>
  rename(Location = COUNTY.NAME) |> 
  mutate(TimeFrame = "2023-2024") |>
  group_by(Location) |>
  summarise(Total_SPED= sum(SPECIAL.EDUCATION), 
            Total_Num = sum(ALL.ENROLLMENT)) |>
  select(Location, Total_SPED, Total_Num) |>
  mutate(Pct_SPED = Total_SPED/Total_Num)

#state
texas <- data.frame(
    Location = "Texas",
    Nums = sum(cleaned_data$Total_SPED),
    Pct = sum(cleaned_data$Total_SPED) / sum(cleaned_data$Total_Num),
    TimeFrame = "2023-2024"
  )

loving <- make_data_row("Loving", "2023-2024")

joined <- cleaned_data |>
  mutate(Nums = Total_SPED, 
         Pct = Pct_SPED) |>
  select(Location, Nums, Pct) |>
  mutate(TimeFrame = "2023-2024")|>
  rbind(loving) |>
  rbind(texas)

nums <- joined |>
  select(-Pct)  |>
  rename(Data = Nums) |>
  mutate(DataFormat = "Number")

pcts <- joined |>
  select(-Nums) |>
  rename(Data = Pct) |>
  mutate(DataFormat = "Percent")

final_data <- rbind(nums, pcts) |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
  arrange(desc(LocationType), Location, DataFormat) |>
  left_join(locations, by = "Location")

write.csv(file = "CLEANED_9.2_Special_Education_2023_2024.csv", final_data, row.names = FALSE)