1) Import and clean data

Load packages, import Excel files generated by ART reports in Service Point (one for each fiscal year). Load excel file of demographics data for all clients, created with report writer Add a column for fiscal year, change variable names to snake_case

library(tidyverse)
library(lubridate)
library(janitor)
library(readxl)
library(rmarkdown)

SPM2_FY15 <- read_excel("data/SPM2_FY15.xls", 
                        sheet = "Tab B - First Exit Detail") %>%
  as_tibble("SPM2_FY15.xls") %>%
  clean_names() %>%
  mutate(FY = 15)

SPM2_FY16 <- read_excel("data/SPM2_FY16.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY16.xls") %>%
  clean_names() %>%
  mutate(FY = 16)

SPM2_FY17 <- read_excel("data/SPM2_FY17.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY17.xls") %>%
  clean_names()%>%
  mutate(FY = 17)

SPM2_FY18 <- read_excel("data/SPM2_FY18.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY18.xls") %>%
  clean_names()%>%
  mutate(FY = 18)

SPM2_FY19 <- read_excel("data/SPM2_FY19.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY19.xls") %>%
  clean_names()%>%
  mutate(FY = 19)

SPM2_FY20 <- read_excel("data/SPM2_FY20.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY20.xls") %>%
  clean_names()%>%
  mutate(FY = 20)

SPM2_FY21 <- read_excel("data/SPM2_FY21.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY21.xls") %>%
  clean_names()%>%
  mutate(FY = 21)

demographics_FY13_FYTD21 <- read_csv("data/demographics_FY13_FYTD21.csv") %>%
    as_tibble("SPM2_FY21.xls") %>%
  clean_names()

2) Merge & Prepare Data Files

Merge data of SPM2 for all fiscal years into one dataframe Join this data frame with the client demographic information Change variable types, collapse factors, and filter one row per client

d_all_FY <-  bind_rows(SPM2_FY15, SPM2_FY16, SPM2_FY17, SPM2_FY18, SPM2_FY19, SPM2_FY20, SPM2_FY21)
d_all_raw <- left_join(d_all_FY, demographics_FY13_FYTD21, by = c("client_uid" = "client_id"))
d_all <-  d_all_raw %>% 
  group_by(client_uid) %>%
  arrange(client_uid) %>%
  mutate(race_f = factor(primary_race),
         proj_type_f = factor(proj_type),
         provider_f = factor(provider)) %>%
  mutate(race_fc = fct_collapse(race_f,
                                White = "White (HUD)",
                                "Black or African American"= "Black or African American (HUD)",
                                Unknown = c("Client refused (HUD)", "Client doesn't know (HUD)",
                                                 "Data not collected (HUD)"),
                                Asian = "Asian (HUD)",
                                "Native Hawaiian or Pacific Islander" = "Native Hawaiian or Other Pacific Islander (HUD)",
                                "American Indian or Alaska Native" =  "American Indian or Alaska Native (HUD)")) %>%
  mutate(race_3 = fct_collapse(race_f,
                               White = "WHite (HUD)",
                               "Black or African American" = "Black or African American (HUD)",
                               Unknown = c("Client refused (HUD)", "Client doesn't know (HUD)",
                                         "Data not collected (HUD)"),
                               Other = c( "Native Hawaiian or Other Pacific Islander (HUD)",
                                       "American Indian or Alaska Native (HUD)", "Asian (HUD)"))) %>%
    mutate(race_2 = fct_collapse(race_f,
                               White = "White (HUD)",
                               "Black or African American" = "Black or African American (HUD)",
                               "Other or Unknown" = c("Client refused (HUD)", "Client doesn't know (HUD)",
                                         "Data not collected (HUD)", "Native Hawaiian or Other Pacific Islander (HUD)",
                                       "American Indian or Alaska Native (HUD)", "Asian (HUD)"))) %>%
  mutate(race_n = as.numeric(race_fc)) %>%
  mutate(end_date = as.Date(end_date, format = '%Y/%m/%d')) %>%
  mutate(days_to_reappear = replace_na(days_to_reappear, 0)) %>%
  mutate(days_dichot = case_when(is.na(days_to_reappear) ~ "No",
                                 days_to_reappear == 0 ~ "No",
                                 days_to_reappear > 0 ~ "Yes")) %>%
  mutate(start_date = ymd(start_date),
         end_date = ymd(end_date)) %>%
           mutate(year = year(end_date),
                  month = month(end_date)) %>%
           mutate(y_m = paste(year, month, sep = "-")) %>%
           relocate(client_uid, end_date, days_to_reappear, days_dichot,
                    race_fc, race_n) %>%
           slice(1) %>%
  ungroup

3) Summary Tables

To create barplots with categorical variables, we first need to create a table of summary statistics This first Table looks people who returned to homelessness compared to those who did not

library(gt)
sum_days_dichot <- d_all %>% 
  group_by(days_dichot, race_fc) %>%
  select(client_uid, FY, race_fc, race_f, month, year, days_dichot, days_to_reappear)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(days_to_reappear, na.rm = TRUE), 
            max = max(days_to_reappear, na.rm = TRUE), 
            mean = mean(days_to_reappear, na.rm = TRUE), 
            median = median(days_to_reappear, na.rm = TRUE),
            sd = sd(days_to_reappear, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))
  
gt_days_dichot <- gt(sum_days_dichot) %>%
  tab_header(title = "Days to Return to Homelessness") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 6, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             days_dichot ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(120),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100),
             days_dichot ~ px(200)) %>%
  cols_align(align = "center") %>%
  cols_label(race_fc = "Race",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci)) 

gt_days_dichot

We can also look at a table by Fiscal year

library(gt)
sum_FY <- d_all %>% 
  group_by(FY) %>%
  select(client_uid, FY, race_fc, race_f, month, year, days_dichot, days_to_reappear)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(days_to_reappear, na.rm = TRUE), 
            max = max(days_to_reappear, na.rm = TRUE), 
            mean = mean(days_to_reappear, na.rm = TRUE), 
            median = median(days_to_reappear, na.rm = TRUE),
            sd = sd(days_to_reappear, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))
  
gt_FY <- gt(sum_FY) %>%
  tab_header(title = "Days to Return to Homelessness") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(120),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100)) %>%
  cols_align(align = "center") %>%
  cols_label(FY = "FY",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci)) 


gt_FY
Days to Return to Homelessness
FY n Min Max Mean Median S. Dev SEM 95% Confidence Intervals
Upper Lower
15 72 0 764 49.56 0 160.10 18.87 86.54 12.57
16 328 0 1093 75.38 0 210.91 11.65 98.21 52.56
17 363 0 1066 117.99 0 231.82 12.17 141.84 94.14
18 570 0 1008 100.48 0 228.02 9.55 119.20 81.76
19 674 0 1036 94.74 0 216.68 8.35 111.10 78.38
20 488 0 1041 91.31 0 206.69 9.36 109.65 72.97

Summary by Race

sum_race <- d_all %>% 
  group_by(race_fc) %>%
  select(client_uid, FY, race_fc, race_f, month, year, days_dichot, days_to_reappear)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(days_to_reappear, na.rm = TRUE), 
            max = max(days_to_reappear, na.rm = TRUE), 
            mean = mean(days_to_reappear, na.rm = TRUE), 
            median = median(days_to_reappear, na.rm = TRUE),
            sd = sd(days_to_reappear, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))
  
gt_race <- gt(sum_race) %>%
  tab_header(title = "Days to Return to Homelessness") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(120),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100)) %>%
  cols_align(align = "center") %>%
  cols_label(race_fc = "Race",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci)) 


gt_race
Days to Return to Homelessness
Race n Min Max Mean Median S. Dev SEM 95% Confidence Intervals
Upper Lower
Black or African American 1197 0 1093 103.26 0 231.30 6.69 116.37 90.16
White 1259 0 1066 85.68 0 201.88 5.69 96.84 74.53
American Indian or Alaska Native 17 0 937 290.82 162 346.78 84.11 455.67 125.98
Asian 10 0 382 38.20 0 120.80 38.20 113.07 −36.67
Unknown 9 0 0 0.00 0 0.00 0.00 0.00 0.00
Native Hawaiian or Pacific Islander 3 0 0 0.00 0 0.00 0.00 0.00 0.00

Summary Table By Month

This creates a table with 69 rows, which is pretty hard to make sense of

d_month <- d_all %>%
  group_by(year, month) %>%
  mutate(mean_y_m = mean(days_to_reappear, na.rm = TRUE),
         median_y_m = median(days_to_reappear, na.rm = TRUE))

sum_month <- d_month %>% 
  group_by(year, month) %>%
  select(client_uid, days_to_reappear, FY, race_fc, race_f,  end_date, month, year, y_m, mean_y_m, median_y_m)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(days_to_reappear, na.rm = TRUE), 
            max = max(days_to_reappear, na.rm = TRUE), 
            mean = mean(days_to_reappear, na.rm = TRUE), 
            median = median(days_to_reappear, na.rm = TRUE),
            sd = sd(days_to_reappear, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))

gt_month <- gt(sum_month) %>%
  tab_header(title = "Days to Return to Homelessness by Month") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 6, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(100),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100),
             mean ~ px(100)) %>%
  cols_align(align = "center") %>%
  cols_label(year = "Year",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci))
gt_month
Days to Return to Homelessness by Month
month n Min Max Mean Median S. Dev SEM 95% Confidence Intervals
Upper Lower
2012
12 1 558 558 558.00 558.00 NA NA NA NA
2013
2 5 0 764 183.60 0.00 331.24 148.13 473.94 -106.7405100
3 7 0 70 10.00 0.00 26.46 10.00 29.60 -9.6000000
4 6 0 0 0.00 0.00 0.00 0.00 0.00 0.0000000
5 7 0 422 65.57 0.00 157.77 59.63 182.45 -51.3091487
6 5 0 0 0.00 0.00 0.00 0.00 0.00 0.0000000
7 29 0 469 28.34 0.00 107.12 19.89 67.33 -10.6416067
8 11 0 741 67.36 0.00 223.42 67.36 199.40 -64.6690909
9 1 0 0 0.00 0.00 NA NA NA NA
10 11 0 1093 326.18 0.00 469.23 141.48 603.48 48.8851307
11 20 0 1020 207.25 0.00 348.89 78.01 360.16 54.3408491
12 46 0 140 3.04 0.00 20.64 3.04 9.01 -2.9217391
2014
1 22 0 727 44.68 0.00 156.20 33.30 109.95 -20.5884490
2 19 0 111 5.84 0.00 25.47 5.84 17.29 -5.6084211
3 32 0 906 173.66 0.00 331.31 58.57 288.45 58.8646521
4 34 0 166 11.44 0.00 38.49 6.60 24.38 -1.4974783
5 18 0 542 88.61 2.50 172.50 40.66 168.30 8.9203322
6 19 0 683 48.53 0.00 163.11 37.42 121.87 -24.8154329
7 39 0 687 85.59 0.00 201.02 32.19 148.68 22.4981896
8 37 0 651 48.41 0.00 148.02 24.33 96.10 0.7089603
9 31 0 617 69.87 0.00 167.16 30.02 128.72 11.0247087
10 42 0 1066 144.17 0.00 301.24 46.48 235.27 53.0619540
11 22 0 786 110.18 0.00 219.26 46.75 201.81 18.5570182
12 28 0 829 171.32 0.00 241.83 45.70 260.90 81.7469419
2015
1 31 0 695 54.81 0.00 137.13 24.63 103.08 6.5313726
2 45 0 924 194.40 0.00 305.06 45.47 283.53 105.2691626
3 25 0 847 132.92 0.00 249.04 49.81 230.54 35.2961324
4 35 0 685 114.40 0.00 190.91 32.27 177.65 51.1503292
5 14 0 679 48.50 0.00 181.47 48.50 143.56 -46.5600000
6 40 0 644 54.90 0.00 133.37 21.09 96.23 13.5685996
7 39 0 731 152.54 0.00 261.05 41.80 234.47 70.6065698
8 20 0 712 100.20 0.00 217.93 48.73 195.71 4.6886550
9 22 0 460 43.27 0.00 106.75 22.76 87.88 -1.3359624
10 28 0 862 143.71 0.00 242.07 45.75 233.38 54.0502796
11 48 0 1001 67.90 0.00 224.13 32.35 131.30 4.4898908
12 28 0 1008 201.79 0.00 365.78 69.13 337.27 66.2994409
2016
1 21 0 992 216.29 0.00 293.67 64.08 341.89 90.6818942
2 61 0 731 48.93 0.00 156.79 20.07 88.28 9.5874642
3 106 0 894 126.38 0.00 262.46 25.49 176.34 76.4128425
4 41 0 706 45.41 0.00 138.23 21.59 87.73 3.1012825
5 36 0 851 63.56 0.00 176.31 29.39 121.15 5.9607881
6 49 0 848 95.57 0.00 220.41 31.49 157.29 33.8572276
7 31 0 818 159.42 0.00 261.44 46.96 251.45 67.3871158
8 73 0 665 73.92 0.00 175.18 20.50 114.10 33.7312282
9 48 0 628 88.44 0.00 201.33 29.06 145.40 31.4799178
10 45 0 1036 109.82 0.00 249.67 37.22 182.77 36.8748741
11 94 0 1022 115.00 0.00 249.02 25.68 165.34 64.6575722
12 58 0 1020 198.50 0.00 330.77 43.43 283.63 113.3739420
2017
1 47 0 944 113.23 0.00 256.51 37.42 186.57 39.9004117
2 33 0 565 85.48 0.00 168.61 29.35 143.01 27.9550517
3 51 0 823 100.39 0.00 196.49 27.51 154.32 46.4651267
4 51 0 795 90.59 0.00 188.51 26.40 142.33 38.8498787
5 33 0 449 61.79 0.00 132.17 23.01 106.88 16.6909549
6 56 0 768 48.18 0.00 147.44 19.70 86.80 9.5611609
7 69 0 732 102.91 0.00 220.07 26.49 154.84 50.9872288
8 53 0 776 70.30 0.00 175.64 24.13 117.59 23.0153523
9 84 0 726 37.40 0.00 129.48 14.13 65.09 9.7151916
10 54 0 1041 189.39 0.00 308.40 41.97 271.65 107.1324333
11 54 0 902 53.96 0.00 157.35 21.41 95.93 11.9948981
12 43 0 782 79.60 0.00 202.21 30.84 140.05 19.1633616
2018
1 49 0 741 91.82 0.00 181.69 25.96 142.69 40.9433022
2 44 0 430 30.50 0.00 89.40 13.48 56.91 4.0852364
3 27 0 611 69.07 0.00 169.94 32.71 133.18 4.9713051
4 63 0 893 95.02 0.00 211.73 26.68 147.30 42.7320010
5 41 0 687 134.76 0.00 229.78 35.89 205.09 64.4206471
6 25 0 809 82.48 0.00 224.41 44.88 170.45 -5.4902443
7 20 0 779 163.00 0.00 242.74 54.28 269.39 56.6138128
8 41 0 759 51.98 0.00 176.13 27.51 105.89 -1.9385772
9 27 0 487 49.11 0.00 142.06 27.34 102.70 -4.4741040

4) Plots

Graph by Fiscal Year

To get a better picture of what is going on, we can look at a graph. First let’s look at Days to Reappear by Fiscal Year.

f_all <- d_all %>%
  filter(between(days_to_reappear, 1, 600))
  
ggplot(data = sum_FY, aes(FY, mean)) + 
  geom_col()+
  scale_x_discrete(limits=c(15,16,17,18,19,20,21))+
  geom_point(data = f_all, aes(FY, days_to_reappear, color = race_2), 
             position = position_jitter(width = 0.1),
             shape = 1,
             alpha = .7) +
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci,
                    width = 0.2, )) +
  labs(title = "Days to Return By Fiscal Year",
       x = "Fiscal Year",
       y = "Days To Return",
       col = "Primary Race") +
  theme_light()

Graph by Month

This is much easier to read than the table with 69 rows

f_all <- d_month %>%
  select(client_uid, days_to_reappear, FY, race_fc, race_n, mean_y_m, median_y_m, y_m, year, month, end_date, year, month)%>%
  filter(between(days_to_reappear, 0, 600)) %>%
  na.omit()

ggplot(data = f_all, aes(end_date, mean_y_m, color = mean_y_m)) +
  geom_smooth(aes(color = ..y..), size=1.5, se=FALSE)  +
  geom_line(size = 0.9)+
  scale_colour_gradient2(low = "green4", mid = "goldenrod1" , high = "darkred", 
                         midpoint = 250) +
  labs(title = "Average Length of Time Homeless By Month",
       x = "Month and Year",
       y = "Days Homeless",
       color = "Median 
       Days Homeless") +
  theme_bw()