5.1_Medicaid_5.2_CHIP

Loading in Data

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.2     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.2.1
✔ purrr     1.0.4     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Data was gathered from HHSC with the request for this specific pull was: 03-31-2025_HHSC Data Request_Every_Texan.pdf

data_2024 <- read.csv("Medicaid and CHIP Enrollment Under Age 19 by county_Aug 2024_v(Summary).csv")
locations <- read.csv("../location_countyNames copy.csv") |>
  select(LocationId, Location)

#used for calculating percents now that population estimates are updated for 2023
chip_data_2023 <- read.csv("CLEANED_5.1_CHIP_Numbers_2023.csv")
medi_data_2023 <- read.csv("CLEANED_5.1_Medicaid_Numbers_2023.csv")
child_pop_2023 <- read.csv("child_population_2023.csv")

Functions

#function to generate percents
generate_percents <- function(num_data, child_pop_data, timeFrame) {
  child_pop_mod <- child_pop_data |>
  select(Location, Data) |>
  rename(Child_Population = Data)

  with_pop <- left_join(num_data, child_pop_mod, by = "Location") |>
  rename(Chip_Children = Data)
  
  percents <- with_pop |>
  mutate(Percents = Chip_Children / Child_Population) |>
  select(-Chip_Children, -Child_Population) |>
  rename(Data = Percents) |>
  mutate(DataFormat = "Percent") |>
   mutate(TimeFrame = timeFrame) |>
  select(Location, LocationId, Data, LocationType, TimeFrame, DataFormat)
  
  return(percents)
  
}

Updating 2023

chip_percents_2023 <- generate_percents(chip_data_2023, child_pop_2023, 2023)
medicaid_percents_2023 <- generate_percents(medi_data_2023, child_pop_2023, 2023)

final_chip_2023 <- rbind(chip_data_2023, chip_percents_2023) |>
  filter(DataFormat == "Percent") |>
  arrange(desc(LocationType), Location)

final_medicaid_2023 <- rbind(medi_data_2023, medicaid_percents_2023) |>
  filter(DataFormat == "Percent") |>
  arrange(desc(LocationType), Location, DataFormat)

write.csv(file = "cleaned_data/CLEANED_Medicaid_Percents_2023.csv", final_medicaid_2023, row.names = FALSE)
write.csv(file = "cleaned_data/CLEANED_CHIP_Percents_2023.csv", final_chip_2023, row.names = FALSE)

Updating Nums for 2024

CHIP

chip_data_2024 <- data_2024 |>
  select(County, CHIP) |>
  mutate(CHIP = as.numeric(gsub(",", "", CHIP))) |>
  mutate(County = ifelse(County == "Total", "Texas", County)) |>
  rename(Location = County, 
         Data = CHIP) |>
  mutate(TimeFrame = 2024, 
         DataFormat = "Number") |>
  left_join(locations, by = "Location") |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
  arrange(desc(LocationType), Location)

Medicaid

medi_data_2024 <- data_2024 |>
  select(County, Medicaid) |>
  mutate(Medicaid = as.numeric(gsub(",", "", Medicaid))) |>
  mutate(County = ifelse(County == "Total", "Texas", County)) |>
  rename(Location = County, 
         Data = Medicaid) |>
  mutate(TimeFrame = 2024, 
         DataFormat = "Number") |>
  left_join(locations, by = "Location") |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
  arrange(desc(LocationType), Location)

write.csv(file = 'cleaned_data/CLEANED_Medicaid_2024_Numbers.csv', medi_data_2024, row.names = FALSE)
write.csv(file = 'cleaned_data/CLEANED_CHIP_2024_Numbers.csv', chip_data_2024, row.names = FALSE)