Loading in Data
library (ipumsr)
library (dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library (ggplot2)
library (survey)
Loading required package: grid
Loading required package: Matrix
Loading required package: survival
Attaching package: 'survey'
The following object is masked from 'package:graphics':
dotchart
Attaching package: 'srvyr'
The following object is masked from 'package:stats':
filter
Data was obtained from a data request made to HHSC for individuals under 18 utilizing TANF. This request was made in March of 2025.
data_2023 <- read.csv ("raw_data/FY 2023.csv" )
data_2024 <- read.csv ("raw_data/FY 2024.csv" )
locations <- read.csv ("../../location_countyNames copy.csv" ) |>
select (Location, LocationId)
child_pop_23 <- read.csv ("child_population_2023.csv" ) |>
select (Location, Data) |>
rename (child_pop = Data)
Cleaning the Data
2023 - Since there is 2023 child population data from TDC, 2023 percents were able to be calculated using those estimates and the data pulled from the data request.
cleaned_2023 <- data_2023 |>
select (County, Total.Children.Per.County) |>
filter (County != "Call Centers" & County != "State Office" ) |>
slice_head (n = 255 ) |>
mutate (County = ifelse (County == "Total Children Per Age Group" , "Texas" , County)) |>
mutate (Total.Children.Per.County = as.numeric (gsub ("," , "" , Total.Children.Per.County))) |>
rename (Data = Total.Children.Per.County,
Location = County) |>
left_join (locations, by = "Location" ) |>
left_join (child_pop_23) |>
mutate (DataFormat = "Number" ) |>
select (LocationId, Location, DataFormat, Data, child_pop)
Joining with `by = join_by(Location)`
percents_2023 <- cleaned_2023 |>
mutate (DataFormat = "Percent" ) |>
mutate (Percent = Data/ child_pop) |>
select (- Data) |>
rename (Data = Percent) |>
select (LocationId, Location, DataFormat, Data, child_pop)
final_tanf_2023 <- rbind (cleaned_2023, percents_2023) |>
select (- child_pop) |>
mutate (TimeFrame = 2023 ,
LocationType = ifelse (Location == "Texas" , "State" , "County" )) |>
arrange (desc (LocationType), Location, DataFormat)
2024 - since at the time of cleaning there are not TDC population estimates, only 2024 numbers are able to be produced.
cleaned_2024 <- data_2024 |>
select (County, Total.Children.Per.County) |>
filter (County != "Call Centers" & County != "State Office" ) |>
slice_head (n = 255 ) |>
mutate (County = ifelse (County == "Total Children Per Age Group" , "Texas" , County)) |>
mutate (Total.Children.Per.County = as.numeric (gsub ("," , "" , Total.Children.Per.County))) |>
rename (Data = Total.Children.Per.County,
Location = County) |>
left_join (locations, by = "Location" ) |>
mutate (TimeFrame = 2024 ,
LocationType = ifelse (Location == "Texas" , "State" , "County" ),
DataFormat = "Number" ) |>
arrange (desc (LocationType), Location, DataFormat)
Exports
write.csv (file = "CLEANED_2.5_ChildrenReceivingTANF_2023_NumsandPercents.csv" , final_tanf_2023, row.names = FALSE )
write.csv (file = "CLEANED_2.5_ChildrenReceivingTANF_2024_Nums.csv" , cleaned_2024, row.names = FALSE )