Loading in Data

exits_from_care_data <- read.csv("CPS_2.8_Exits_from_DFPS_Custody_by_Exit_Type__Avg___Placements__and_Avg_Months_in_Care_FY2015-2024_20250325.csv")
locations <- read.csv("../../location_countyNames copy.csv") |>
  select(Location, LocationId)

Cleaning Data

exits_cleaned <- exits_from_care_data |>
  mutate(Type.of.Exit = case_when(
    Type.of.Exit %in% c("Custody to Relatives without PCA", "Custody to Relatives with PCA") ~ "Relative PMC",
    Type.of.Exit == "Family Reunification" ~ "Reunification",
    Type.of.Exit == "Relative Adoption Consummated" ~ "Relative Adoption",
    Type.of.Exit == "Youth Emancipation" ~ "Emancipation",
    Type.of.Exit == "Non-Relative Adoption Consummated" ~ "Non-relative Adoption",
    TRUE ~ "Other"
  )) |>
  mutate()

missing_counties <- locations |> 
  distinct(Location) |>
  anti_join(exits_cleaned, by = c("Location" = "County")) |>
  filter (Location != "Texas")

exit_types <- c("Reunification", "Relative PMC", "Relative Adoption", "Non-Relative Adoption", "Emancipation", "Other", "Total")

missing_counties_expanded <- missing_counties |> 
  slice(rep(1:n(), each = length(exit_types))) |> 
  mutate(ExitsfromCare = rep(exit_types, times = nrow(missing_counties)), 
         Number = NA, 
         Percent = NA)

missing_counties_long <- missing_counties_expanded |> 
  pivot_longer(cols = c(Number, Percent), 
               names_to = "DataFormat", 
               values_to = "Data") |> 
  arrange(Location, ExitsfromCare) |>
  mutate(LocationType = "County")
#years
efc_2024 <- exits_cleaned |>
  filter(Fiscal.Year == 2024 & County != "Out of State") 
summarize_exits <- function(data, year) {
  summarized_data <- data |> 
    group_by(Type.of.Exit) |> 
    summarize(Number = sum(Exits.During.Fiscal.Year, na.rm = TRUE)) |> 
    ungroup() |> 
    rename(ExitsfromCare = Type.of.Exit)
  
  total_exits <- sum(summarized_data$Number, na.rm = TRUE)
  
  summarized_data <- summarized_data |> 
    add_row(ExitsfromCare = "Total", Number = total_exits)
  
  summarized_data <- summarized_data |> 
    mutate(Percent = ifelse(ExitsfromCare != "Total", Number / total_exits, NA))
  
  summarized_data_long <- summarized_data |> 
    pivot_longer(cols = c(Number, Percent), 
                 names_to = "DataFormat", 
                 values_to = "Data") |> 
    arrange(ExitsfromCare) |>
    mutate(LocationType = "State", 
           Location = "Texas", 
           TimeFrame = year) |>
    select(Location, LocationType, TimeFrame, DataFormat, ExitsfromCare, Data )
  
  return(summarized_data_long)
}
state_2024 <- summarize_exits(efc_2024, 2024)
summarize_exits_by_county <- function(data, year, state_data) {
  exit_types <- c("Reunification", "Relative PMC", "Relative Adoption", "Non-Relative Adoption", "Emancipation", "Other", "Total")

  summarized_data <- data |> 
    group_by(Location, Type.of.Exit) |> 
    summarize(Number = sum(Exits.During.Fiscal.Year, na.rm = TRUE)) |> 
    ungroup() |> 
    rename(ExitsfromCare = Type.of.Exit)

  summarized_with_totals <- summarized_data |> 
    group_by(Location) |> 
    summarize(Total = sum(Number, na.rm = TRUE)) |> 
    mutate(ExitsfromCare = "Total", Number = Total) |> 
    select(-Total)

  full_data <- bind_rows(summarized_data, summarized_with_totals)

  all_combinations <- expand.grid(Location = unique(full_data$Location), ExitsfromCare = exit_types)

  summarized_data <- all_combinations |> 
    left_join(full_data, by = c("Location", "ExitsfromCare")) |> 
    mutate(Number = replace_na(Number, 0)) 

  summarized_data <- summarized_data |> 
    group_by(Location) |> 
    mutate(Percent = ifelse(ExitsfromCare != "Total", Number / sum(Number[ExitsfromCare != "Total"], na.rm = TRUE), NA)) |> 
    ungroup()

  full_data_long <- summarized_data |> 
    pivot_longer(cols = c(Number, Percent), 
                 names_to = "DataFormat", 
                 values_to = "Data") |> 
    mutate(LocationType = "County", 
           TimeFrame = year) |> 
    select(Location, LocationType, TimeFrame, DataFormat, ExitsfromCare, Data)

  missing_counties <- locations |> 
    distinct(Location) |> 
    anti_join(data, by = "Location") |>  
    filter(Location != "Texas")

  missing_counties_expanded <- missing_counties |> 
    slice(rep(1:n(), each = length(exit_types))) |> 
    mutate(ExitsfromCare = rep(exit_types, times = nrow(missing_counties)), 
           Number = NA, 
           Percent = NA)

  missing_counties_long <- missing_counties_expanded |> 
    pivot_longer(cols = c(Number, Percent), 
                 names_to = "DataFormat", 
                 values_to = "Data") |> 
    arrange(Location, ExitsfromCare) |> 
    mutate(LocationType = "County", 
           TimeFrame = year) |> 
    select(Location, LocationType, TimeFrame, DataFormat, ExitsfromCare, Data)

  final_data_counties <- bind_rows(full_data_long, missing_counties_long)
  final_data_all <- bind_rows(final_data_counties, state_data) |> 
    arrange(desc(LocationType), Location, ExitsfromCare, DataFormat)

  return(final_data_all)
}
#should each be 3570 rows, 255, 7, 2
options(scipen = 999)

final_efc_2024 <- efc_2024 |>
  rename(Location = County) |>
  summarize_exits_by_county(2024, state_2024)
## `summarise()` has grouped output by 'Location'. You can override using the
## `.groups` argument.
final_efc_2024_wids <- left_join(final_efc_2024, locations, by = "Location")

Exports

write.csv(file = "CLEANED_6.3_ExitsfromCare_2024.csv", final_efc_2024_wids, row.names = FALSE)