Loading in Data
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tidyr)
Data was downloaded from: https://rptsvr1.tea.texas.gov/adhocrpt/adste.html as Student Program and Special Populations Reports.
locations <- read.csv("../../location_countyNames copy.csv")
student_programs <- read.csv("../StudPgmStateCounty24state.csv")
SCOPE AGGREGATION.LEVEL YEAR COUNTY.NAME REGION ALL.ENROLLMENT
1 STATE COUNTYREGION 2023-2024 ANDERSON COUNTY 7 7858
2 STATE COUNTYREGION 2023-2024 ANDREWS COUNTY 18 4269
3 STATE COUNTYREGION 2023-2024 ANGELINA COUNTY 7 15871
4 STATE COUNTYREGION 2023-2024 ARANSAS COUNTY 2 2968
5 STATE COUNTYREGION 2023-2024 ARCHER COUNTY 9 2149
6 STATE COUNTYREGION 2023-2024 ARMSTRONG COUNTY 16 319
AT.RISK BILINGUAL DYSLEXIC ECONOMICALLY.DISADVANTAGED
1 3440 79 412 5439
2 2117 484 271 2112
3 8503 445 1261 11411
4 1910 0 264 1808
5 583 0 165 709
6 89 0 17 83
EMERGENT.BILINGUAL.ENGLISH.LEARNER ESL FOSTER.CARE GIFTED...TALENTED
1 776 256 35 337
2 764 308 -999 293
3 2205 908 75 1377
4 180 179 25 141
5 52 57 -999 157
6 -999 -999 0 23
HOMELESS MILITARY.CONNECTED SECTION.504 SPECIAL.EDUCATION TITLE.I
1 85 285 593 1224 7454
2 64 119 395 594 4222
3 266 329 1555 2416 12286
4 229 21 290 449 2968
5 23 102 236 286 1057
6 -999 24 17 40 319
normalize_county_name <- function(name) {
name <- str_to_title(name)
if (name == "Dewitt") return("DeWitt")
if (name == "Mcculloch") return("McCulloch")
if (name == "Mclennan") return("McLennan")
if (name == "Mcmullen") return("McMullen")
return(name)
}
make_data_row <- function(county_name, time_frame) {
missing_row <- data.frame(
Location = county_name,
Nums = NA,
Pct = NA,
TimeFrame = time_frame
)
return(missing_row)
}
Cleaning data
Noticed that there are values of -999.00 in data and per previous documentation, advised by TEA to make it 2.5, so will be part of cleaning process.
clean_start <- student_programs |>
mutate(across(-COUNTY.NAME, ~ ifelse(.x == -999.00, 2.5, .x))) |>
select(COUNTY.NAME, SPECIAL.EDUCATION, ALL.ENROLLMENT)
cleaned_data <- clean_start |>
mutate(COUNTY.NAME = gsub(" County", "", str_to_title(COUNTY.NAME))) |>
mutate(COUNTY.NAME = gsub(" ", "", COUNTY.NAME)) |>
mutate(COUNTY.NAME = sapply(COUNTY.NAME, normalize_county_name)) |>
rename(Location = COUNTY.NAME) |>
mutate(TimeFrame = "2023-2024") |>
group_by(Location) |>
summarise(Total_SPED= sum(SPECIAL.EDUCATION),
Total_Num = sum(ALL.ENROLLMENT)) |>
select(Location, Total_SPED, Total_Num) |>
mutate(Pct_SPED = Total_SPED/Total_Num)
#state
texas <- data.frame(
Location = "Texas",
Nums = sum(cleaned_data$Total_SPED),
Pct = sum(cleaned_data$Total_SPED) / sum(cleaned_data$Total_Num),
TimeFrame = "2023-2024"
)
loving <- make_data_row("Loving", "2023-2024")
joined <- cleaned_data |>
mutate(Nums = Total_SPED,
Pct = Pct_SPED) |>
select(Location, Nums, Pct) |>
mutate(TimeFrame = "2023-2024")|>
rbind(loving) |>
rbind(texas)
nums <- joined |>
select(-Pct) |>
rename(Data = Nums) |>
mutate(DataFormat = "Number")
pcts <- joined |>
select(-Nums) |>
rename(Data = Pct) |>
mutate(DataFormat = "Percent")
final_data <- rbind(nums, pcts) |>
mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
arrange(desc(LocationType), Location, DataFormat) |>
left_join(locations, by = "Location")
write.csv(file = "CLEANED_9.2_Special_Education_2023_2024.csv", final_data, row.names = FALSE)