Loading in Data

Data was downloaded from TDC, specifically population estimates.

pop_2023 <- read.csv("../alldata.csv")
locations <- read.csv("../../location_countyNames copy.csv") |>
  select(Location, LocationId)

Cleaning Data

#total nums
pop_cleaned_total <- pop_2023 |>
  filter(Age == "All Ages") |>
  mutate(County = str_to_title(County)) |>
  mutate(County = sub(" County", "", County), 
         County = sub("State Of ", "", County)) 
#total pop nums
total_final <- pop_cleaned_total |>
  select(County, Total) |>
  rename(Location = County) |>
  left_join(locations, by = "Location") |>
  mutate(DataFormat = "Number", 
         TimeFrame = 2023, 
         LocationType = ifelse(Location == "Texas", "State", "County"), 
         Total = as.numeric(Total))
percent_race_eth <- pop_cleaned_total |>
  select(-FIPS, -contains("Female"), -contains("Male")) |>
  mutate(Anglo = NH_White_Total/Total, 
         Black = NH_Black_Total/Total, 
         Hispanic = Hispanic_Total/Total, 
         Asian = NH_Asian_Total/Total, 
         Other = NH_Other_Total/Total) |>
  select(-Age, -Total) |>
  select(County, Anglo, Black, Hispanic, Asian, Other) |>
  pivot_longer(cols = c(Anglo, Black, Hispanic, Asian, Other), 
               names_to = "RaceEthnicity", 
               values_to = "Data") |>
  mutate(DataFormat = "Percent")

num_race_eth <- pop_cleaned_total |>
    select(-FIPS, -contains("Female"), -contains("Male")) |>
    rename(Anglo = NH_White_Total, 
           Black = NH_Black_Total,
           Hispanic = Hispanic_Total, 
           Asian = NH_Asian_Total, 
           Other = NH_Other_Total) |> 
  select(-Age, -Total) |>
    select(County, Anglo, Black, Hispanic, Asian, Other) |>
   pivot_longer(cols = c(Anglo, Black, Hispanic, Asian, Other), 
               names_to = "RaceEthnicity", 
               values_to = "Data") |>
  mutate(DataFormat = "Number")

final_race_eth_2023 <- rbind(percent_race_eth, num_race_eth) |>
  rename(Location = County) |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
  left_join(locations, by = "Location") |>
  arrange(desc(LocationType), Location, DataFormat)

NON-DISAGGREGATED

percent_race_eth_dis <- pop_cleaned_total |>
  select(-FIPS, -contains("Female"), -contains("Male")) |>
  mutate(Anglo = NH_White_Total/Total, 
         Black = NH_Black_Total/Total, 
         Hispanic = Hispanic_Total/Total, 
         Other = (NH_Other_Total + NH_Asian_Total)/Total) |>
  select(-Age, -Total) |>
  select(County, Anglo, Black, Hispanic, Other) |>
  pivot_longer(cols = c(Anglo, Black, Hispanic, Other), 
               names_to = "RaceEthnicity", 
               values_to = "Data") |>
  mutate(DataFormat = "Percent")

num_race_eth_dis <- pop_cleaned_total |>
    select(-FIPS, -contains("Female"), -contains("Male")) |>
    rename(Anglo = NH_White_Total, 
           Black = NH_Black_Total,
           Hispanic = Hispanic_Total)|>
  mutate(Other = NH_Other_Total + NH_Asian_Total) |> 
  select(-Age, -Total) |>
    select(County, Anglo, Black, Hispanic, Other) |>
   pivot_longer(cols = c(Anglo, Black, Hispanic, Other), 
               names_to = "RaceEthnicity", 
               values_to = "Data") |>
  mutate(DataFormat = "Number")

final_race_eth_2023_dis <- rbind(percent_race_eth_dis, num_race_eth_dis) |>
  rename(Location = County) |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
  left_join(locations, by = "Location") |>
  arrange(desc(LocationType), Location, DataFormat)


Child Population

children <- pop_2023 |>
  mutate(Age = sub("< 1 Year", 0, Age)) |>
  mutate(Age = as.numeric(sub(" Years", "", Age)))|>
  filter(!is.na(Age) & Age < 18) |>
  mutate(County = str_to_title(County)) |>
  mutate(County = sub(" County", "", County), 
         County = sub("State Of ", "", County)) |>
  select(-FIPS, -contains("Female"), -contains("Male")) 
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Age = as.numeric(sub(" Years", "", Age))`.
## Caused by warning:
## ! NAs introduced by coercion
total_children <- children |>
  group_by(County) |>
  summarise(Data = sum(Total)) |>
  rename(Location = County) |>
  left_join(locations, by = "Location") |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County"), 
         DataFormat = "Number")

child_re_clean <- children |>
  group_by(County) |>
  summarise(Total = sum(Total), 
            Anglo = sum(NH_White_Total), 
            Black = sum(NH_Black_Total), 
            Asian = sum(NH_Asian_Total), 
            Hispanic = sum(Hispanic_Total),
            Other = sum(NH_Other_Total))
  
child_percent_race_eth <- child_re_clean |>
  mutate(Anglo = Anglo/Total,
         Black = Black/Total,
         Hispanic = Hispanic/Total,
         Asian = Asian/Total,
         Other = Other/Total) |>
  select( -Total) |>
  select(County, Anglo, Black, Hispanic, Asian, Other) |>
  pivot_longer(cols = c(Anglo, Black, Hispanic, Asian, Other),
               names_to = "RaceEthnicity",
               values_to = "Data") |>
  mutate(DataFormat = "Percent")

child_num_race_eth <- child_re_clean |>
  select(-Total) |>
   pivot_longer(cols = c(Anglo, Black, Hispanic, Asian, Other),
               names_to = "RaceEthnicity",
               values_to = "Data") |>
  mutate(DataFormat = "Number")

child_final_race_eth_2023 <- rbind(child_percent_race_eth, child_num_race_eth) |>
  rename(Location = County) |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
  left_join(locations, by = "Location") |>
  arrange(desc(LocationType), Location, DataFormat)
#disaggregated
child_re_clean_dis <- child_re_clean |>
  rename(og_other = Other) |>
  mutate(Other = og_other + Asian) |>
  select(-og_other,-Asian)

child_percent_race_eth_dis <- child_re_clean_dis |>
  mutate(Anglo = Anglo/Total,
         Black = Black/Total,
         Hispanic = Hispanic/Total,
         Other = (Other)/Total) |>
  select( -Total) |>
  select(County, Anglo, Black, Hispanic, Other) |>
  pivot_longer(cols = c(Anglo, Black, Hispanic, Other),
               names_to = "RaceEthnicity",
               values_to = "Data") |>
  mutate(DataFormat = "Percent")

child_num_race_eth_dis <- child_re_clean_dis |>
  select(-Total) |>
   pivot_longer(cols = c(Anglo, Black, Hispanic, Other),
               names_to = "RaceEthnicity",
               values_to = "Data") |>
  mutate(DataFormat = "Number")

child_final_race_eth_2023_dis <- rbind(child_percent_race_eth_dis, child_num_race_eth_dis) |>
  rename(Location = County) |>
  mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
  left_join(locations, by = "Location") |>
  arrange(desc(LocationType), Location, DataFormat)

Exports

write.csv(file = "CLEANED_1.1_TotalPopulation_2023.csv", total_final, row.names = FALSE)
write.csv(file = "../1.1_TotalPopulation_RaceEthnicity_AsianDisaggregated/CLEANED_1.1_TotalPopulation_RaceEthnicity_2023_DIS.csv", final_race_eth_2023, row.names = FALSE)

write.csv(file = "../1.1_TotalPopulation_RaceEthnicity/CLEANED_1.1_TotalPopulation_RaceEthnicity_2023.csv", final_race_eth_2023_dis, row.names = FALSE)

write.csv(file = "../1.2_ChildPopulation/CLEANED_1.2_ChildPopulation_2023.csv", total_children, row.names = FALSE)

write.csv(file = "../1.2_ChildPopulation_RaceEthnicity_AsianDisaggregated/CLEANED_1.2_ChildPopulation_RaceEthnicity_2023_DIS.csv", child_final_race_eth_2023, row.names = FALSE)

write.csv(file = "../1.2_ChildPopulation_RaceEthnicity/CLEANED_1.2_ChildPopulation_RaceEthnicity_2023.csv", child_final_race_eth_2023_dis, row.names = FALSE)