9.6_Economically_Disadvantaged

Loading Data

Data was downloaded from: https://rptsvr1.tea.texas.gov/adhocrpt/adstc.html

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(tidyr)

econ_dis_23_24 <- read.csv('Economically Disadvantaged Report_Statewide_Counties_2023-2024.csv')
locations <- read.csv("../../location_countyNames copy.csv") |>
  select(Location, LocationId)

head(econ_dis_23_24)

        County.Name Eligible.For.Free.Meals.Count
1   ANDERSON COUNTY                          4349
2    ANDREWS COUNTY                          1886
3   ANGELINA COUNTY                          9006
4    ARANSAS COUNTY                           332
5     ARCHER COUNTY                           571
6  ARMSTRONG COUNTY                          -999
  Eligible.For.Free.Meals.Percent Eligible.For.Reduced.Price.Meals.Count
1                           55.34                                    366
2                           44.18                                    214
3                           56.75                                    640
4                           11.19                                      0
5                           26.57                                    138
6                         -999.00                                   -999
  Eligible.For.Reduced.Price.Meals.Percent
1                                     4.66
2                                     5.01
3                                     4.03
4                                     0.00
5                                     6.42
6                                  -999.00
  Other.Economically.Disadvantaged.Count
1                                    724
2                                     12
3                                   1765
4                                   1476
5                                      0
6                                      0
  Other.Economically.Disadvantaged.Percent Not.Economically.Disadvantaged.Count
1                                     9.21                                 2419
2                                     0.28                                 2157
3                                    11.12                                 4460
4                                    49.73                                 1160
5                                     0.00                                 1440
6                                     0.00                                  236
  Not.Economically.Disadvantaged.Percent Total.Count
1                                  30.78        7858
2                                  50.53        4269
3                                  28.10       15871
4                                  39.08        2968
5                                  67.01        2149
6                                  73.98         319

normalize_county_name <- function(name) {
  name <- str_to_title(name)
  if (name == "Dewitt") return("DeWitt")
  if (name == "Mcculloch") return("McCulloch")
  if (name == "Mclennan") return("McLennan")
  if (name == "Mcmullen") return("McMullen")
  return(name)
}

make_data_row <- function(county_name, time_frame) {
   missing_row <-  data.frame(
    Location = county_name,
    Nums = NA,
    Pct = NA,
    TimeFrame = time_frame
  )
  
   return(missing_row)
  
}

Cleaning data

Noticed that there are values of -999.00 in data and per previous documentation, advised by TEA to make it 2.5, so will be part of cleaning process.

clean_start <- econ_dis_23_24 |>
  mutate(across(-County.Name, ~ ifelse(.x == -999.00, 2.5, .x))) |>
  select(County.Name, Not.Economically.Disadvantaged.Count, Not.Economically.Disadvantaged.Percent, Total.Count)

cleaned_data <- clean_start |>
  mutate(County.Name = gsub(" County", "", str_to_title(County.Name))) |>
  mutate(County.Name = gsub(" ", "", County.Name)) |>
  mutate(County.Name = sapply(County.Name, normalize_county_name)) |>
  rename(Location = County.Name) |> 
  mutate(TimeFrame = "2023-2024") |>
  group_by(Location) |>
  summarise(Total_Not_ED = sum(Not.Economically.Disadvantaged.Count), 
            Pct_Not_ED = sum(Not.Economically.Disadvantaged.Percent), 
            Total_Num = sum(Total.Count))

#state
state_total <- sum(cleaned_data$Total_Num)
state_not_ed <- sum(cleaned_data$Total_Not_ED)
state_pct <- 1 - (state_not_ed/state_total)
state_num <- state_total-state_not_ed

texas <- data.frame(
    Location = "Texas",
    Nums = state_num,
    Pct = state_pct,
    TimeFrame = "2023-2024"
  )

loving <- make_data_row("Loving", "2023-2024")

joined <- cleaned_data |>
  mutate(Nums = Total_Num - Total_Not_ED, 
         Pct = 100 - Pct_Not_ED) |>
  select(Location, Nums, Pct) |>
  mutate(TimeFrame = "2023-2024")|>
  rbind(loving) |>
  rbind(texas)

nums <- joined |>
  select(-Pct)  |>
  rename(Data = Nums) |>
  mutate(DataFormat = "Number")

pcts <- joined |>
  select(-Nums) |>
  rename(Data = Pct) |>
  mutate(DataFormat = "Percent")

final_data <- rbind(nums, pcts) |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
  arrange(desc(LocationType), Location, DataFormat) |>
  left_join(locations, by = "Location")

write.csv(file = "CLEANED_9.6_Economically_Disadvantaged_2023_2024.csv", final_data, row.names = FALSE)