4.2_WICS

4.2_WICS

library(ipumsr)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(survey)
Loading required package: grid
Loading required package: Matrix
Loading required package: survival

Attaching package: 'survey'
The following object is masked from 'package:graphics':

    dotchart
library(srvyr)  

Attaching package: 'srvyr'
The following object is masked from 'package:stats':

    filter
library(readr)
library(tidyr)

Attaching package: 'tidyr'
The following objects are masked from 'package:Matrix':

    expand, pack, unpack
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.2.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ srvyr::filter() masks dplyr::filter(), stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Background: Data was obtained from a request from HHSC for FFY 2024 (October-September)

Broken out by:

  • Women

  • Infants (under age 1)

  • Children(ages 1-14)

  • Statewide and by county

Loading Data

Rows: 255 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Location, DataFormat
dbl (1): LocationId
lgl (2): TimeFrame, Data

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Functions

normalize_county_name <- function(name) {
name <- str_to_title(name)
if (name == "Dewitt") return("DeWitt")
if (name == "Mcculloch") return("McCulloch")
if (name == "Mclennan") return("McLennan")
if (name == "Mcmullen") return("McMullen")
return(name)
}

Cleaning Data

#is all 254 counties are there should be 3048 
fiscal_year <- rbind(october, november, december, january, february, march, april, may, june, july, august, september) |> 
  filter(!is.na(Women_Enr)) |>
  rename(Location = "County Name") |>
  mutate(Location = str_to_title(Location))

Women

women <- fiscal_year |>
  select(Location, Women_Enr, Women_Part, Total_Part) |>
  mutate(
    Women_Part = case_when(
      Women_Part == "<10" ~ 1,
      Women_Part == "-" ~ 0,
      TRUE ~ as.numeric(gsub(",", "", Women_Part))
    )
  )
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Women_Part = case_when(...)`.
Caused by warning:
! NAs introduced by coercion
#find the average usage for the ffy and round down
women_by_county <- women |>
  group_by(Location) |>
  summarize(totalPart = sum(Women_Part)) |>
  group_by(Location) |>
  summarise(avgPerFFY = floor(totalPart / 12)) |>
  mutate(Location = sapply(Location, normalize_county_name))

texas_women <- women_by_county |>
  filter(!is.na(avgPerFFY)) |>
  summarize(avgPerFFY = sum(avgPerFFY)) |> 
  mutate(Location = "Texas") |>
  select(Location, avgPerFFY)
final_women <- rbind(texas_women, women_by_county) |>
  mutate(TimeFrame = 2024, 
         DataFormat = "Number", 
         LocationType = ifelse(Location == "Texas", "State", "County")) |>
  rename(Data = avgPerFFY) |>
  left_join(locations, by = "Location") |>
    arrange(desc(LocationType), Location, DataFormat)
write.csv(file = "4.2c_WomenWIC/CLEANED_4.2c_WomenWIC_2024.csv", final_women, row.names = FALSE)

Children

children <- fiscal_year |>
  select(Location, Children_Enr, Children_Part, Total_Part)  

children <- fiscal_year |>
  select(Location, Children_Enr, Children_Part, Total_Part) |>
  mutate(
    Children_Part = case_when(
      Children_Part == "<10" ~ 1,
      Children_Part == "-" ~ 0,
      TRUE ~ as.numeric(gsub(",", "", Children_Part))
    )
  )
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Children_Part = case_when(...)`.
Caused by warning:
! NAs introduced by coercion
children_by_county <- children |>
  group_by(Location) |>
  summarize(totalPart = sum(Children_Part)) |>
  group_by(Location) |>
  summarise(avgPerFFY = floor(totalPart / 12)) |>
  mutate(Location = sapply(Location, normalize_county_name)) 

texas_children <- children_by_county |>
  filter(!is.na(avgPerFFY)) |>
  summarize(avgPerFFY = sum(avgPerFFY)) |> 
  mutate(LocationId = 45,
         Location = "Texas", 
         LocationType = "State", 
         TimeFrame = 2023, 
         DataFormat = "Number") |>
  rename(Data = avgPerFFY) |>
  select(LocationId, Location, TimeFrame, DataFormat, Data)
final_county_data_child <- left_join(locations, children_by_county, by = "Location") |>
  mutate(
    LocationType = "County", 
    DataFormat = "Number", 
    TimeFrame = 2023
  ) |>
  rename(Data = avgPerFFY) |>
  select(LocationId, Location, TimeFrame, DataFormat, Data) |>
    filter(Location != "Texas")
final_children <- rbind(texas_children, final_county_data_child)

#checking N/A values - extra Texas from first join w locationIDS
final_children |>
  filter(is.na(Data))
# A tibble: 3 × 5
  LocationId Location TimeFrame DataFormat  Data
       <dbl> <chr>        <dbl> <chr>      <dbl>
1       6531 Borden        2023 Number        NA
2       6649 King          2023 Number        NA
3       6665 Loving        2023 Number        NA
#final dataset
final_children <- final_children |>
  mutate(AgeGroup = "1-4")
write.csv(file = "4.2b_ChildrenWIC/CLEANED_4.2b_ChildrenWIC_2024.csv", final_children, row.names = FALSE)

Infants

infants <- fiscal_year |>
  select(Location, Infants_Enr, Infants_Part, Total_Part)  

infants <- fiscal_year |>
  select(Location, Infants_Enr, Infants_Part, Total_Part) |>
  mutate(
    Infants_Part = case_when(
      Infants_Part == "<10" ~ 1,
      Infants_Part == "-" ~ 0,
      TRUE ~ as.numeric(gsub(",", "", Infants_Part))
    )
  )
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Infants_Part = case_when(...)`.
Caused by warning:
! NAs introduced by coercion
infants_by_county <- infants |>
  group_by(Location) |>
  summarize(totalPart = sum(Infants_Part)) |>
  group_by(Location) |>
  summarise(avgPerFFY = floor(totalPart / 12)) |>
  mutate(Location = sapply(Location, normalize_county_name))

texas_infants <- infants_by_county |>
  filter(!is.na(avgPerFFY)) |>
  summarize(avgPerFFY = sum(avgPerFFY)) |> 
  mutate(LocationId = 45,
         Location = "Texas", 
         LocationType = "State", 
         TimeFrame = 2023, 
         DataFormat = "Number") |>
  rename(Data = avgPerFFY) |>
  select(LocationId, Location, TimeFrame, DataFormat, Data)
final_county_data_infants <- left_join(locations, infants_by_county, by = "Location") |>
  mutate(
    LocationType = "County", 
    DataFormat = "Number", 
    TimeFrame = 2023
  ) |>
  rename(Data = avgPerFFY) |>
  select(LocationId, Location, TimeFrame, DataFormat, Data) |>
  filter(Location != 'Texas')
final_infants <- rbind(texas_infants, final_county_data_infants)

#checking N/A values - extra Texas from first join w locationIDS
final_infants |>
  filter(is.na(Data))
# A tibble: 3 × 5
  LocationId Location TimeFrame DataFormat  Data
       <dbl> <chr>        <dbl> <chr>      <dbl>
1       6531 Borden        2023 Number        NA
2       6649 King          2023 Number        NA
3       6665 Loving        2023 Number        NA
#final dataset
final_infants <- final_infants |>
  mutate(AgeGroup = "Under 1")
write.csv(file = "4.2a_InfantsWIC/CLEANED_4.2a_InfantsWIC_2024.csv", final_infants, row.names = FALSE)