Loading in Data
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.2 ✔ stringr 1.5.1
✔ lubridate 1.9.4 ✔ tibble 3.2.1
✔ purrr 1.0.4 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Data was gathered from HHSC with the request for this specific pull was: 03-31-2025_HHSC Data Request_Every_Texan.pdf
data_2024 <- read.csv ("Medicaid and CHIP Enrollment Under Age 19 by county_Aug 2024_v(Summary).csv" )
locations <- read.csv ("../location_countyNames copy.csv" ) |>
select (LocationId, Location)
#used for calculating percents now that population estimates are updated for 2023
chip_data_2023 <- read.csv ("CLEANED_5.1_CHIP_Numbers_2023.csv" )
medi_data_2023 <- read.csv ("CLEANED_5.1_Medicaid_Numbers_2023.csv" )
child_pop_2023 <- read.csv ("child_population_2023.csv" )
Functions
#function to generate percents
generate_percents <- function (num_data, child_pop_data, timeFrame) {
child_pop_mod <- child_pop_data |>
select (Location, Data) |>
rename (Child_Population = Data)
with_pop <- left_join (num_data, child_pop_mod, by = "Location" ) |>
rename (Chip_Children = Data)
percents <- with_pop |>
mutate (Percents = Chip_Children / Child_Population) |>
select (- Chip_Children, - Child_Population) |>
rename (Data = Percents) |>
mutate (DataFormat = "Percent" ) |>
mutate (TimeFrame = timeFrame) |>
select (Location, LocationId, Data, LocationType, TimeFrame, DataFormat)
return (percents)
}
Updating 2023
chip_percents_2023 <- generate_percents (chip_data_2023, child_pop_2023, 2023 )
medicaid_percents_2023 <- generate_percents (medi_data_2023, child_pop_2023, 2023 )
final_chip_2023 <- rbind (chip_data_2023, chip_percents_2023) |>
filter (DataFormat == "Percent" ) |>
arrange (desc (LocationType), Location)
final_medicaid_2023 <- rbind (medi_data_2023, medicaid_percents_2023) |>
filter (DataFormat == "Percent" ) |>
arrange (desc (LocationType), Location, DataFormat)
write.csv (file = "cleaned_data/CLEANED_Medicaid_Percents_2023.csv" , final_medicaid_2023, row.names = FALSE )
write.csv (file = "cleaned_data/CLEANED_CHIP_Percents_2023.csv" , final_chip_2023, row.names = FALSE )
Updating Nums for 2024
CHIP
chip_data_2024 <- data_2024 |>
select (County, CHIP) |>
mutate (CHIP = as.numeric (gsub ("," , "" , CHIP))) |>
mutate (County = ifelse (County == "Total" , "Texas" , County)) |>
rename (Location = County,
Data = CHIP) |>
mutate (TimeFrame = 2024 ,
DataFormat = "Number" ) |>
left_join (locations, by = "Location" ) |>
mutate (LocationType = ifelse (Location == "Texas" , "State" , "County" )) |>
arrange (desc (LocationType), Location)
Medicaid
medi_data_2024 <- data_2024 |>
select (County, Medicaid) |>
mutate (Medicaid = as.numeric (gsub ("," , "" , Medicaid))) |>
mutate (County = ifelse (County == "Total" , "Texas" , County)) |>
rename (Location = County,
Data = Medicaid) |>
mutate (TimeFrame = 2024 ,
DataFormat = "Number" ) |>
left_join (locations, by = "Location" ) |>
mutate (LocationType = ifelse (Location == "Texas" , "State" , "County" )) |>
arrange (desc (LocationType), Location)
write.csv (file = 'cleaned_data/CLEANED_Medicaid_2024_Numbers.csv' , medi_data_2024, row.names = FALSE )
write.csv (file = 'cleaned_data/CLEANED_CHIP_2024_Numbers.csv' , chip_data_2024, row.names = FALSE )