children_pov_2023 <- read.csv("SAIPE_CP_2023.csv")
locations <- read.csv("../../location_countyNames copy.csv") |>
select(Location, LocationId)
clean_poverty_data <- function(pov_dataset) {
poverty_cleaned <- pov_dataset |>
filter(Name != "United States") |>
select(Name, Year, Number.in.Poverty, Percent.in.Poverty) |>
mutate(Name = sub(" County", "", Name)) |>
mutate(Number.in.Poverty = as.numeric(gsub(",", "", Number.in.Poverty))) |>
mutate(Percent.in.Poverty = Percent.in.Poverty / 100 ) |>
mutate(LocationType = ifelse(Name == "Texas", "State", "County")) |>
rename(Location = Name)
with_ids <- left_join(poverty_cleaned, locations, by = "Location")
nums <- with_ids |>
select(LocationId, Location, Number.in.Poverty, LocationType, Year) |>
mutate(DataFormat = "Number") |>
rename(Data = Number.in.Poverty,
TimeFrame = Year)
percents <- with_ids |>
select(LocationId, Location, Percent.in.Poverty, LocationType, Year) |>
mutate(DataFormat = "Percent") |>
rename(Data = Percent.in.Poverty,
TimeFrame = Year)
final_data <- rbind(nums, percents) |>
arrange(desc(LocationType), Location, DataFormat)
return(final_data)
}
2023
final_children_2023 <- clean_poverty_data(children_pov_2023)
Exports
write.csv(file = "CLEANED_2.2_ChildPoverty_2023.csv", final_children_2023, row.names = FALSE)