Loading in Data

children_pov_2023 <- read.csv("SAIPE_CP_2023.csv")
locations <- read.csv("../../location_countyNames copy.csv") |>
  select(Location, LocationId)

Cleaning Data

clean_poverty_data <- function(pov_dataset) {
  
  poverty_cleaned <- pov_dataset |>
    filter(Name != "United States") |>
    select(Name, Year, Number.in.Poverty, Percent.in.Poverty) |>
    mutate(Name = sub(" County", "", Name)) |>
    mutate(Number.in.Poverty = as.numeric(gsub(",", "", Number.in.Poverty))) |>
    mutate(Percent.in.Poverty = Percent.in.Poverty / 100 ) |>
    mutate(LocationType = ifelse(Name == "Texas", "State", "County")) |>
    rename(Location = Name)
  
  with_ids <- left_join(poverty_cleaned, locations, by = "Location")
  
  nums <- with_ids |>
    select(LocationId, Location, Number.in.Poverty, LocationType, Year) |>
    mutate(DataFormat = "Number") |>
    rename(Data = Number.in.Poverty, 
           TimeFrame = Year)
  
  percents <- with_ids |>
    select(LocationId, Location, Percent.in.Poverty, LocationType, Year) |>
    mutate(DataFormat = "Percent") |>
    rename(Data = Percent.in.Poverty, 
           TimeFrame = Year)
  
  final_data <- rbind(nums, percents) |>
    arrange(desc(LocationType), Location, DataFormat)
  
  return(final_data)
}

2023

final_children_2023 <- clean_poverty_data(children_pov_2023)

Exports

write.csv(file = "CLEANED_2.2_ChildPoverty_2023.csv", final_children_2023, row.names = FALSE)