4.1_SNAP

Loading in Data

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(tidyr)

Data was obtained from OR request from HHSC that was submitted 3/31/2025 and received 5/2025.

locations <- read.csv("../../location_countyNames copy.csv") |>
  select(Location, LocationId)

snap_children_23 <- read.csv("raw_data/children_snap_counts_23.csv")
snap_children_24 <- read.csv("raw_data/children_snap_counts_24.csv")

snap_totals_23 <- read.csv("raw_data/snap_pct_23.csv")
snap_totals_24 <- read.csv("raw_data/snap_pct_24.csv")

child_pop_23 <- read.csv("raw_data/child_pop_23.csv") |>
  select(Location, Data) |>
  rename(child_pop = Data)

total_pop_23 <- read.csv("raw_data/total_pop_23.csv") |>
  select(Location, Data) |>
  rename(total_pop = Data)

head(snap_children_23)

        County  X0  X1  X2  X3  X4  X5  X6  X7  X8  X9 X10 X11 X12 X13 X14 X15
1 Call Centers  42  64  59  78  97 100  98 103 114 105 106 117 100 104  96 106
2     Anderson 149 189 182 189 191 203 197 195 175 182 175 159 152 161 174 153
3      Andrews  41  62  50  50  46  42  49  52  41  56  50  43  44  43  41  45
4     Angelina 308 377 399 410 392 395 399 404 376 399 405 384 370 371 329 359
5      Aransas  56  73  71  70  82  69  74  73  75  76  66  78  78  73  61  77
6       Archer   9   8   9  11  10   8   7  11  12  16  17  14   9  12  15  16
  X16 X17 Total.Children.Per.County
1  80  75                     1,643
2 139 128                     3,095
3  44  47                       843
4 317 281                     6,676
5  60  54                     1,266
6  13  12                       207

head(snap_totals_23)

          X Oct.22 Nov.22 Dec.22 Jan.23 Feb.23 Mar.23 Apr.23 May.23 Jun.23
1  Anderson  6,942  7,059  7,093  7,123  6,911  6,844  6,808  6,857  6,686
2   Andrews  1,797  1,853  1,871  1,852  1,761  1,740  1,727  1,641  1,596
3  Angelina 14,587 14,699 14,582 14,556 14,149 13,718 13,714 13,466 13,452
4   Aransas  3,413  3,448  3,488  3,464  3,431  3,371  3,341  3,290  3,122
5    Archer    576    596    594    597    612    592    549    512    475
6 Armstrong    120    122    120    105    100    109    109    100    101
  Jul.23 Aug.23 Sep.23 Average...of.Individuals X..Children.under.18
1  6,760  6,855  6,763                    6,892               44.90%
2  1,603  1,567  1,507                    1,710               49.32%
3 13,482 13,526 13,457                   13,949               47.86%
4  3,057  2,992  2,964                    3,282               38.56%
5    468    477    462                      543               38.22%
6    101    113    122                      110               44.70%

Functions for cleaning data for reproducibility and ease.

normalize_county_name <- function(name) {
  name <- str_to_title(name)
  if (name == "Dewitt") return("DeWitt")
  if (name == "Mcculloch") return("McCulloch")
  if (name == "Mclennan") return("McLennan")
  if (name == "Mcmullen") return("McMullen")
  return(name)
}

make_data_row <- function(county_name, time_frame) {
   missing_row <-  data.frame(
    Location = county_name,
    Nums = NA,
    Pct = NA,
    TimeFrame = time_frame
  )
  
   return(missing_row)
  
}

clean_total_children <- function(data, time_frame) {
  nums_child <- data |>
  select(County, Total.Children.Per.County) |>
  mutate(County = ifelse(County == "Total Children Per Age Group", "Texas", County)) |>
  filter(County != "State Office") |>
  slice(2:256)  |>
  mutate(Total.Children.Per.County = as.numeric(gsub(",", "", Total.Children.Per.County))) |>
  rename(Nums = Total.Children.Per.County, 
         Location = County) |>
  mutate(TimeFrame = time_frame, 
         LocationType = ifelse(Location == "Texas", "State", "County")) |>
  left_join(locations, by = "Location") 
  
  return (nums_child)
}

clean_pct_children <- function(nums, pop) {
  pct_child <- nums |>
  left_join(pop, by = "Location") |>
  mutate(Pct = Nums/child_pop) |>
  select(Location, Pct)
  
  return(pct_child)
}

combine_and_finalize <- function (data) {
  nums <- data |>
  select(-Pct)  |>
  rename(Data = Nums) |>
  mutate(DataFormat = "Number")

  pcts <- data |>
  select(-Nums) |>
  rename(Data = Pct) |>
  mutate(DataFormat = "Percent")

final_data <- rbind(nums, pcts) |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
  arrange(desc(LocationType), Location, DataFormat) 

return (final_data)
}

Cleaning data

Children

2023

nums_child_23 <- clean_total_children(snap_children_23, 2023) 
pct_child_23 <- clean_pct_children(nums_child_23, child_pop_23)
joined_23 <- left_join(nums_child_23, pct_child_23, by = "Location")
final_data_23 <- combine_and_finalize(joined_23)

head(final_data_23)

  Location         Data TimeFrame LocationType LocationId DataFormat
1    Texas 1.739248e+06      2023        State         45     Number
2    Texas 2.316720e-01      2023        State         45    Percent
3 Anderson 3.095000e+03      2023       County       6515     Number
4 Anderson 2.885781e-01      2023       County       6515    Percent
5  Andrews 8.430000e+02      2023       County       6516     Number
6  Andrews 1.468897e-01      2023       County       6516    Percent

2024

Population numbers are not yet out for 2024, so only numbers for this year.

nums_child_24 <- clean_total_children(snap_children_24, 2024) 
final_data_24 <- nums_child_24 |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County"), 
         DataFormat = "Number") |>
  arrange(desc(LocationType), Location, DataFormat) 

head(final_data_24)

  Location    Nums TimeFrame LocationType LocationId DataFormat
1    Texas 1630354      2024        State         45     Number
2 Anderson    2974      2024       County       6515     Number
3  Andrews     725      2024       County       6516     Number
4 Angelina    6073      2024       County       6517     Number
5  Aransas    1162      2024       County       6518     Number
6   Archer     178      2024       County       6519     Number

write.csv(file = "4.1_ChildrenSNAP/CLEANED_4.1_ChildrenSNAP_2023.csv", final_data_23, row.names = FALSE)
write.csv(file = "4.1_ChildrenSNAP/CLEANED_4.1_ChildrenSNAP_2024.csv", final_data_24, row.names = FALSE)

Total SNAP

2023

total_cleaned_23 <- snap_totals_23 |>
  select(X, Average...of.Individuals) |>
  rename(Location = X, 
         Nums = Average...of.Individuals) |>
  mutate(Nums = as.numeric(gsub(",", "", Nums))) |>
  filter(Location != "Call Centers" & Location != "State Office" & Location != "Statewide % Children under 18") |>
  mutate(Location = ifelse(Location == "Total", "Texas", Location)) |>
  left_join(total_pop_23, by = "Location") |>
  mutate(Pct = Nums/total_pop) |>
  left_join(locations, by = "Location")

final_total_23 <- combine_and_finalize(total_cleaned_23) |>
  select(-total_pop)

head(final_total_23)

  Location         Data LocationId DataFormat LocationType
1    Texas 3.474927e+06         45     Number        State
2    Texas 1.138715e-01         45    Percent        State
3 Anderson 6.892000e+03       6515     Number       County
4 Anderson 1.198588e-01       6515    Percent       County
5  Andrews 1.710000e+03       6516     Number       County
6  Andrews 9.163005e-02       6516    Percent       County

2024

final_total_24 <- snap_totals_24 |>
  select(X, Average...of.Individuals) |>
  rename(Location = X, 
         Data = Average...of.Individuals) |>
  mutate(Data = as.numeric(gsub(",", "", Data))) |>
  filter(Location != "Call Centers" & Location != "State Office" & Location != "Statewide % Children under 18") |>
  mutate(Location = ifelse(Location == "Total", "Texas", Location)) |>
  left_join(locations, by = "Location") |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County"), 
         DataFormat = "Number") |>
  arrange(desc(LocationType), Location, DataFormat) 

head(final_total_24)

  Location    Data LocationId LocationType DataFormat
1    Texas 3286986         45        State     Number
2 Anderson    6732       6515       County     Number
3  Andrews    1477       6516       County     Number
4 Angelina   12747       6517       County     Number
5  Aransas    2976       6518       County     Number
6   Archer     497       6519       County     Number

2024 population data is not out yet, so only numbers are available for 2024.

write.csv(file = "4.1a_TotalSNAP/CLEANED_4.1a_TotalSNAP_2023.csv", final_total_23, row.names = FALSE)
write.csv(file = "4.1a_TotalSNAP/CLEANED_4.1a_TotalSNAP_2024.csv", final_total_24, row.names = FALSE)