library(tidyverse)
library(purrr)
library(readxl)
library(httr)
library(kableExtra)
library(maps)
library(DT)

Introduction

This report will describe homelessness data from the U.S. Department of Housing and Urban Development (HUD). The objective is to use this data provide an overview of the state of homelessness in the United States.

HUD Data

#Get HUD data from URL
url <- "https://www.huduser.gov/portal/sites/default/files/xls/2007-2022-PIT-Counts-by-CoC.xlsx"
response <- GET(url, write_disk(path <- tempfile(fileext = ".xlsx"))) 

#Read and clean Census data
census_race <- read_csv("ACSDP1Y2022.DP05-2023-10-25T140928.csv")
census_race <- census_race[c(39, 40, 41, 46, 54, 59, 60),1:2]
colnames(census_race) <- c("race", "count")
census_race <- census_race %>% 
  mutate(race = sub("^\\s*[\u00a0]*", "", race),
         pct = round(100 *count / sum(count), 1),
         source = "Census")

#Read each excel sheet into a list
homeless_list <- path %>%
  excel_sheets() %>%
  set_names() %>%
  purrr::map(read_excel, path = path)

#Standardize column names for each list element
for(i in 1:13){
  names(homeless_list[[i]]) <- str_remove(names(homeless_list[[i]]), ", \\d+")
}

homeless <- bind_rows(homeless_list[1:13], .id = "year")       #Bind rows of each list element to one object
names(homeless) <- str_replace_all(names(homeless), " ", "_")  #Replace spaces in variable names
homeless <- homeless %>% filter(CoC_Name != "Total")           #Filter out aggregate rows
rm(homeless_list, i, path, url, response)                      #Remove unnecessary objects

theme_update(plot.title = element_text(hjust = 0.5))           #Center plot titles globally

The homeless dataframe contains Point-In-Time estimates of homelessness from 2010 to 2022. Data is reported by Continuum of Care (CoC) providers. The CoC program is “designed to promote community-wide commitment to the goal of ending homelessness.” The program provides funding for and collects data from various nonprofit and government entities that help combat homelessness in the United States.

Each observation in the data represents local homelessness estimates by a CoC provider for a certain year. There are 396 unique CoC providers, and that has remained mostly constant since 2007. There are 5007 observations in the data.

There are 710 columns in the data that describe various states of homelessness. A few examples include:

homeless %>%
  filter(year == 2022) %>%
  group_by(CoC_Name) %>%
  summarize(total_homeless = sum(Overall_Homeless, na.rm = TRUE)) %>%
  arrange(desc(total_homeless)) %>%
  head(10) %>%
  kbl(col.names = c("CoC Provider", "Total Homeless"), caption = "Highest-Volume CoC Providers in 2022") %>%
  kable_classic_2(html_font = "Serif")
Highest-Volume CoC Providers in 2022
CoC Provider Total Homeless
Los Angeles City & County CoC 65111
New York City CoC 61840
Seattle/King County CoC 13368
San Jose/Santa Clara City & County CoC 10028
Oakland, Berkeley/Alameda County CoC 9747
Sacramento City & County CoC 9278
Phoenix, Mesa/Maricopa County CoC 9026
San Diego City and County CoC 8427
San Francisco CoC 7754
Texas Balance of State CoC 7054
homeless <- homeless %>% 
  filter(year == 2022) %>%
  select(CoC_Name, CoC_Category, CoC_Number) %>%
  right_join(homeless, by = c('CoC_Name', 'CoC_Number'), suffix = c("", "_og")) %>%
  select(-CoC_Category_og)

homeless %>%
  group_by(CoC_Category) %>%
  summarize(CoC_Count = n(), average_homeless = mean(Overall_Homeless, na.rm = TRUE), total_homeless = sum(Overall_Homeless, na.rm = TRUE)) %>%
  arrange(desc(total_homeless)) %>%
  filter(!is.na(CoC_Category)) %>%
  kbl(col.names = c("CoC Category", "CoC Count", "Average Homeless", "Total Homeless"), 
      caption = "CoC Providers by Category in 2022", digits = 1) %>%
  kable_classic_2(html_font = "Serif")
CoC Providers by Category in 2022
CoC Category CoC Count Average Homeless Total Homeless
Major City CoC 624 5523.5 3446667
Largely Suburban CoC 2180 893.9 1948735
Largely Rural CoC 1410 1015.6 1432000
Other Largely Urban CoC 744 724.7 539189

Homeless Demographics

Sheltered vs Unsheltered

Below is a graph of homelessness counts by year, sheltered and unsheltered (which sum to total homelessness). In 2022, the total homeless population in the US was 582,462, the majority of which was sheltered. As you can see, there’s a steep drop in 2021 data, especially unsheltered data. This is due to the COVID-19 pandemic, which made it impossible to safely collect a complete dataset. So in any time-series analysis, 2021 data should be omitted.

counts <- homeless %>%
  group_by(year) %>%
  summarize( sheltered = sum(Sheltered_Total_Homeless),
            unsheltered = sum(Unsheltered_Homeless)) %>%
  pivot_longer(unsheltered:sheltered, names_to = "category", values_to = "pop") %>%
  mutate(category = factor(category, levels = c("unsheltered", "sheltered")))
  

ggplot(counts, aes(x = year, y = pop, fill = category)) +
  geom_bar(stat = "identity", width = 0.7) +
  labs(
    title = "Homelessness in the US",
    x = "Year",
    y = "Population",
    fill = "Category"
  ) +
  scale_fill_manual(values = c("sheltered" = "#4682B4", "unsheltered" = "#B0C4DE"),
                    labels = c("Unsheltered", "Sheltered")) +
  theme(text = element_text(family = "serif")) +
  scale_y_continuous(labels = scales::comma)

Population Areas

As we already know, the vast majority of homeless people live in cities. Below is a graph that plots how homeless populations among population areas has changed over time. It’s interesting that urban homeless populations have declined slightly, while suburban and rural populations have increased.

my_color_palette <- c("#667C84", "#916259", "#6E8D71", "#A0A4A8")

homeless %>% 
  filter(year != 2021) %>%
  group_by(year, CoC_Category) %>% 
  summarize(count = sum(Overall_Homeless, na.rm = TRUE)) %>%
  filter(CoC_Category != "NA") %>%
  mutate(CoC_Category = reorder(CoC_Category, -count)) %>% 
  ggplot(aes(year, count, group = CoC_Category)) +
  geom_line(aes(color = CoC_Category)) +
  geom_point(aes(color = CoC_Category), shape = 17) +
  scale_y_continuous(labels = scales::comma,
                     limits = c(0, 325000),
                     breaks = seq(0, 60000000, by = 50000)) +
  scale_color_manual(values = my_color_palette) +
  labs(title = "Homelessness Over Time by CoC Category", x = "Year", y = "Homeless Count", color = "CoC Category") +
  theme(text = element_text(family = "serif"))

By Sex

Men comprise the majority of the homeless population in the United States. In 2022, there were 352,836 male and 222,970 female recorded homeless people in the US.

homeless %>%
  group_by(year) %>%
  summarize(total_female = sum(`Overall_Homeless_-_Female`, na.rm = TRUE),
            total_male = sum(`Overall_Homeless_-_Male`, na.rm = TRUE)) %>%
  filter(year != 2021, total_female > 0) %>%
  pivot_longer(total_female:total_male, names_to = 'gender', values_to = 'population') %>%
  ggplot(aes(year, population, fill = gender)) +
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = scales::comma) +
  scale_fill_manual(values = c("total_female" = "#D19494", "total_male" = "#94AED1"), labels = c("Total Female", "Total Male")) +
  theme(text = element_text(family = "serif")) +
  labs(x = "Year", y = "Population", fill = "Sex", title = "Homeless Populations by Year and Sex")

It’s also interesting to compare sheltered/unsheltered homeless populations among the sexes. About 45% of homeless men are unsheltered, while under a third of homeless women are. Surely this is due (in part) to the fact that it is much more dangerous to live outside as a woman.

sheltered_sex <- homeless %>% 
  group_by(year) %>% 
  summarize(female_sheltered = sum(`Sheltered_Total_Homeless_-_Female`, na.rm =TRUE),
            female_unsheltered = sum(`Unsheltered_Homeless_-_Female`, na.rm = TRUE),
            male_sheltered = sum(`Sheltered_Total_Homeless_-_Male`, na.rm = TRUE),
            male_unsheltered = sum(`Unsheltered_Homeless_-_Male`, na.rm = TRUE)) %>%
  mutate(pct_female_sheltered = female_sheltered / (female_sheltered + female_unsheltered),
         pct_female_unsheltered = female_unsheltered / (female_sheltered + female_unsheltered),
         pct_male_sheltered = male_sheltered / (male_sheltered + male_unsheltered),
         pct_male_unsheltered = male_unsheltered / (male_sheltered + male_unsheltered)) %>%
  pivot_longer(female_sheltered:pct_male_unsheltered, names_to = "category", values_to = "percent") %>%
  filter(str_detect(category, "pct"), year == 2022) %>%
  mutate(percent = round(percent, 3) * 100,
         sex = ifelse(str_detect(category, "female"), "female", "male"))

ggplot(sheltered_sex, aes(x = 1, y = percent, fill = category)) +
  geom_bar(stat = "identity", width = 1, color = "black") +
  coord_polar(theta = "y") +
  facet_wrap(~ sex, labeller = labeller(sex = c("female" = "Women", "male" = "Men"))) +
  scale_fill_manual(values = c("pct_female_sheltered" = "#9DB5A3",
                               "pct_male_sheltered" = "#9DB5A3",
                               "pct_female_unsheltered" = "#A089A1",
                               "pct_male_unsheltered" = "#A089A1"),
                    labels = c("Female Sheltered",
                               "Female Unsheltered",
                               "Male Sheltered",
                               "Male Unsheltered")) +
  labs(fill = NULL, title = "Sheltered vs Unsheltered Homeless Percentages by Sex in 2022", ) +
  theme(legend.position = "bottom") +
  geom_text(aes(label = paste0(round(percent, 1), "%")), position = position_stack(vjust = 0.5), family = "serif") +
  theme(text = element_text(family = "serif"),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        axis.text = element_blank(),
        panel.grid = element_blank())

Homeless Children and Youths

In 2022, there were 98,244 homeless children, 40,177 homeless young adults (18-24), and 444,041 homeless adults (over 24) recorded in the dataset. Counts of homeless children and young adults have slowly declined in recent years, while counts of homeless adults have increased.

homeless %>%
  group_by(year) %>%
  summarize(homeless_children = sum(`Overall_Homeless_-_Under_18`, na.rm = TRUE),
            homeless_18to25 = sum(`Overall_Homeless_-_Age_18_to_24`, na.rm = TRUE),
            homeless_adults = sum(`Overall_Homeless_-_Over_24`, na.rm = TRUE)) %>%
  pivot_longer(homeless_children:homeless_adults, values_to = "count", names_to = "age_category") %>%
  filter(count > 0, year != 2021) %>%
  mutate(age_category = factor(age_category, levels = c("homeless_adults", "homeless_children", "homeless_18to25"))) %>%
  ggplot(aes(year, count, group = age_category)) +
  geom_line(aes(color = age_category)) +
  geom_point(aes(color = age_category), shape = 17) +
  scale_y_continuous(labels = scales::comma,
                     limits = c(0, 450000)) +
  scale_color_manual(values = c("#667C84", "#916259", "#6E8D71"),
                     labels = c("Adults", "Children", "Ages 18-24")) +
  labs(x = "Year", y = "Count", title = "Homeless Counts by Year and Age Category", color = "Age Category") +
  theme(text = element_text(family = "serif"))

Homeless children are much more likely to be sheltered compared to other cohorts. Just over 10% of homeless children are unsheltered, 10,284 in 2022. There are fewer unsheltered children than unsheltered young adults, despite homeless children being a much larger category overall.

homeless %>%
  group_by(year) %>%
  summarize(unsheltered_children = sum(`Unsheltered_Homeless_-_Under_18`, na.rm = TRUE),
            unsheltered_18to24 = sum(`Unsheltered_Homeless_-_Age_18_to_24`, na.rm = TRUE),
            sheltered_children = sum(`Sheltered_Total_Homeless_-_Under_18`, na.rm = TRUE),
            sheltered_18to24 = sum(`Sheltered_Total_Homeless_-_Age_18_to_24`, na.rm = TRUE)) %>%
  pivot_longer(unsheltered_children:sheltered_18to24, names_to = "category", values_to = "count") %>%
  mutate(category = factor(category, levels = c("sheltered_children", "sheltered_18to24", "unsheltered_18to24", "unsheltered_children"))) %>%
  filter(count > 0, year != 2021) %>%
  ggplot(aes(year, count, group = category, color = category)) +
  geom_line() +
  geom_point(shape = 17) +
  scale_color_manual(values = my_color_palette,
                     labels = c("Sheltered Children", "Sheltered 18-24", "Unsheltered 18-24", "Unsheltered Children")) +
  scale_y_continuous(labels = scales::comma,) +
  labs(x = "Year", y = "Count", title = "Sheltered and Unsheltered Children and Young Adults by Year", color = "Category") +
  theme(text = element_text(family = "serif"))

Homeless Veterans

Homelessness among veterans has declined consistently since 2011, in both sheltered and unsheltered populations.

homeless %>% 
  group_by(year) %>%
  summarize(total_homeless_veterans = sum(Overall_Homeless_Veterans, na.rm = TRUE),
            homeless_veterans_unsheltered = sum(Unsheltered_Homeless_Veterans, na.rm = TRUE),
            homeless_veterans_sheltered = sum(Sheltered_Total_Homeless_Veterans)) %>%
  pivot_longer(total_homeless_veterans:homeless_veterans_sheltered, names_to = "category", values_to = "count") %>%
  filter(count > 0, year != 2021) %>%
  mutate(category = factor(category, levels = c("total_homeless_veterans", "homeless_veterans_sheltered", "homeless_veterans_unsheltered"))) %>%
  ggplot(aes(year, count, group = category, color = category)) +
  geom_line() +
  geom_point(shape = 17) +
  scale_y_continuous(labels = scales::comma,
                     limits = c(0, 70000)) +
  scale_color_manual(values = my_color_palette,
                     labels = c("Total Homeless Veterans",
                                "Sheltered Homeless Veterans",
                                "Unsheltered Homeless Veterans")) +
  labs(y = "Count", x = "Year", title = "Homeless Veteran Counts by Year", color = "Category") +
  theme(text = element_text(family = "serif"))

By Location

California and New York have the highest total homeless populations in America.

states <- map_data("state")
homeless <- homeless %>% 
  mutate(state_abb = substr(CoC_Number, 0, 2))
homeless$state <- str_to_lower(state.name[match(homeless$state_abb, state.abb)])
homeless_map <- homeless %>% 
  filter(year == 2022) %>%
  group_by(state) %>%
  summarize(total_homeless = sum(Overall_Homeless, na.rm = TRUE)) %>%
  filter(!is.na(state)) %>%
  right_join(states, by = c('state' = 'region'))

homeless_map %>% 
  ggplot(aes(x = long, y = lat, group = group)) +
  geom_polygon(aes(fill = total_homeless)) +
  geom_path() +
  scale_fill_distiller() +
  theme_classic() +
  labs(title = "Total Homelessness by U.S. State", fill = "Total Homeless", subtitle = "Heatmap of 2022 homeless estimates by state") +
  theme(axis.title = element_text(size = 0), axis.text = element_text(size = 0), axis.ticks = element_blank(), 
        axis.line = element_blank(), panel.background = element_blank(), text = element_text(family = "serif")) 

homeless %>%
  filter(year == 2022) %>%
  group_by(state) %>%
  summarize('Total Homeless' = sum(Overall_Homeless, na.rm = TRUE),
            'Sheltered Homeless' = sum(Sheltered_Total_Homeless, na.rm = TRUE),
            'Unsheltered Homeless' = sum(Unsheltered_Homeless, na.rm = TRUE)) %>%
  arrange(desc('Total Homeless')) %>%
  filter(!is.na(state)) %>%
  mutate(state = str_to_title(state)) %>%
  rename(State = state) %>%
  datatable(options = list(pageLength = 10, searching = TRUE, ordering = TRUE), caption = htmltools::tags$caption( style = 'caption-side: top; text-align: center; color:black;  font-size:200%; font-family: serif','2022 Homeless Counts by State'))

By Race

What is the racial makeup of the US homeless population in 2022, and how does that compare to the country’s overall racial makeup according to 2022 Census Data?

homeless_race <- homeless %>%
  group_by(year) %>%
  summarize('White' = sum(`Overall_Homeless_-_White`, na.rm = TRUE),
            'Black or African American' = sum(`Overall_Homeless_-_Black,_African_American,_or_African`, na.rm = TRUE),
            'Asian' = sum(`Overall_Homeless_-_Asian_or_Asian_American`, na.rm = TRUE),
            'American Indian and Alaska Native' = sum(`Overall_Homeless_-_American_Indian,_Alaska_Native,_or_Indigenous`, na.rm = TRUE),
            'Native Hawaiian and Other Pacific Islander' = sum(`Overall_Homeless_-_Native_Hawaiian_or_Other_Pacific_Islander`, na.rm = TRUE),
            'Two or More Races' = sum(`Overall_Homeless_-_Multiple_Races`, na.rm = TRUE)) %>%
  filter(year == 2022) %>%
  pivot_longer('White':'Two or More Races', names_to = 'race', values_to = 'count') %>%
  mutate(source = "HUD", pct = round(100 * count / sum(count), 1)) %>%
  select(-year) %>%
  bind_rows(census_race)
my_color_palette <- c("#5E7D81", "#A78C61", "#7E9774", "#A39597", "#6F72A0", "#B58B87", "#7B88A0")

ggplot(homeless_race, aes(x = 1, y = pct, fill = race)) + 
  geom_bar(stat = "identity", width = 1, color = "black") + 
  coord_polar(theta = "y") + 
  facet_wrap(~source, labeller = labeller(source = c("Census" = "General Population","HUD" = "Homeless Population"))) +
  scale_fill_manual(values = my_color_palette) +
  geom_text(aes(label = ifelse(pct < 4, "", paste0(pct, "%")), x = 1.1), 
            position = position_stack(vjust = 0.5), family = "serif") +
  labs(title = "Racial Makeup of the US General vs Homeless Population") +
  theme(text = element_text(family = "serif"),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        axis.text = element_blank(),
        panel.grid = element_blank(),
        legend.position = "bottom",
        legend.title = element_blank())

Pie charts aren’t always the best visualizations, but key takeaways from this graph are that the Black or African American population is significantly over represented among the homeless, making up 37.3% of the homeless population but only 12.2% of the general population in 2022. Native Americans and Native Hawaiians are also over represented, while Asians are significantly underrepresented.

Conclusion

Thanks for reading! Hopefully you learned something about what homelessness looks like in the United States.