SPM 2: Days to Return To Homelessness

1) Import and clean data

Load packages, import Excel files generated by ART reports in Service Point (one for each fiscal year). Load excel file of demographics data for all clients, created with report writer Add a column for fiscal year, change variable names to snake_case

library(tidyverse)
library(lubridate)
library(janitor)
library(readxl)
library(rmarkdown)

SPM2_FY15 <- read_excel("data/SPM2_FY15.xls", 
                        sheet = "Tab B - First Exit Detail") %>%
  as_tibble("SPM2_FY15.xls") %>%
  clean_names() %>%
  mutate(FY = 15)

SPM2_FY16 <- read_excel("data/SPM2_FY16.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY16.xls") %>%
  clean_names() %>%
  mutate(FY = 16)

SPM2_FY17 <- read_excel("data/SPM2_FY17.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY17.xls") %>%
  clean_names()%>%
  mutate(FY = 17)

SPM2_FY18 <- read_excel("data/SPM2_FY18.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY18.xls") %>%
  clean_names()%>%
  mutate(FY = 18)

SPM2_FY19 <- read_excel("data/SPM2_FY19.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY19.xls") %>%
  clean_names()%>%
  mutate(FY = 19)

SPM2_FY20 <- read_excel("data/SPM2_FY20.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY20.xls") %>%
  clean_names()%>%
  mutate(FY = 20)

SPM2_FY21 <- read_excel("data/SPM2_FY21.xls", 
                        sheet = "Tab B - First Exit Detail")%>%
  as_tibble("SPM2_FY21.xls") %>%
  clean_names()%>%
  mutate(FY = 21)

demographics_FY13_FYTD21 <- read_csv("data/demographics_FY13_FYTD21.csv") %>%
    as_tibble("SPM2_FY21.xls") %>%
  clean_names()

2) Merge & Prepare Data Files

Merge data of SPM2 for all fiscal years into one dataframe Join this data frame with the client demographic information Change variable types, collapse factors, and filter one row per client

d_all_FY <-  bind_rows(SPM2_FY15, SPM2_FY16, SPM2_FY17, SPM2_FY18, SPM2_FY19, SPM2_FY20, SPM2_FY21)
d_all_raw <- left_join(d_all_FY, demographics_FY13_FYTD21, by = c("client_uid" = "client_id"))
d_all <-  d_all_raw %>% 
  group_by(client_uid) %>%
  arrange(client_uid) %>%
  mutate(race_f = factor(primary_race),
         proj_type_f = factor(proj_type),
         provider_f = factor(provider)) %>%
  mutate(race_fc = fct_collapse(race_f,
                                White = "White (HUD)",
                                "Black or African American"= "Black or African American (HUD)",
                                Unknown = c("Client refused (HUD)", "Client doesn't know (HUD)",
                                                 "Data not collected (HUD)"),
                                Asian = "Asian (HUD)",
                                "Native Hawaiian or Pacific Islander" = "Native Hawaiian or Other Pacific Islander (HUD)",
                                "American Indian or Alaska Native" =  "American Indian or Alaska Native (HUD)")) %>%
  mutate(race_3 = fct_collapse(race_f,
                               White = "WHite (HUD)",
                               "Black or African American" = "Black or African American (HUD)",
                               Unknown = c("Client refused (HUD)", "Client doesn't know (HUD)",
                                         "Data not collected (HUD)"),
                               Other = c( "Native Hawaiian or Other Pacific Islander (HUD)",
                                       "American Indian or Alaska Native (HUD)", "Asian (HUD)"))) %>%
    mutate(race_2 = fct_collapse(race_f,
                               White = "White (HUD)",
                               "Black or African American" = "Black or African American (HUD)",
                               "Other or Unknown" = c("Client refused (HUD)", "Client doesn't know (HUD)",
                                         "Data not collected (HUD)", "Native Hawaiian or Other Pacific Islander (HUD)",
                                       "American Indian or Alaska Native (HUD)", "Asian (HUD)"))) %>%
  mutate(race_n = as.numeric(race_fc)) %>%
  mutate(end_date = as.Date(end_date, format = '%Y/%m/%d')) %>%
  mutate(days_to_reappear = replace_na(days_to_reappear, 0)) %>%
  mutate(days_dichot = case_when(is.na(days_to_reappear) ~ "No",
                                 days_to_reappear == 0 ~ "No",
                                 days_to_reappear > 0 ~ "Yes")) %>%
  mutate(start_date = ymd(start_date),
         end_date = ymd(end_date)) %>%
           mutate(year = year(end_date),
                  month = month(end_date)) %>%
           mutate(y_m = paste(year, month, sep = "-")) %>%
           relocate(client_uid, end_date, days_to_reappear, days_dichot,
                    race_fc, race_n) %>%
           slice(1) %>%
  ungroup

3) Summary Tables

To create barplots with categorical variables, we first need to create a table of summary statistics This first Table looks people who returned to homelessness compared to those who did not

library(gt)
sum_days_dichot <- d_all %>% 
  group_by(days_dichot, race_fc) %>%
  select(client_uid, FY, race_fc, race_f, month, year, days_dichot, days_to_reappear)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(days_to_reappear, na.rm = TRUE), 
            max = max(days_to_reappear, na.rm = TRUE), 
            mean = mean(days_to_reappear, na.rm = TRUE), 
            median = median(days_to_reappear, na.rm = TRUE),
            sd = sd(days_to_reappear, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))
  
gt_days_dichot <- gt(sum_days_dichot) %>%
  tab_header(title = "Days to Return to Homelessness") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 6, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             days_dichot ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(120),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100),
             days_dichot ~ px(200)) %>%
  cols_align(align = "center") %>%
  cols_label(race_fc = "Race",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci)) 

gt_days_dichot

We can also look at a table by Fiscal year

library(gt)
sum_FY <- d_all %>% 
  group_by(FY) %>%
  select(client_uid, FY, race_fc, race_f, month, year, days_dichot, days_to_reappear)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(days_to_reappear, na.rm = TRUE), 
            max = max(days_to_reappear, na.rm = TRUE), 
            mean = mean(days_to_reappear, na.rm = TRUE), 
            median = median(days_to_reappear, na.rm = TRUE),
            sd = sd(days_to_reappear, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))
  
gt_FY <- gt(sum_FY) %>%
  tab_header(title = "Days to Return to Homelessness") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(120),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100)) %>%
  cols_align(align = "center") %>%
  cols_label(FY = "FY",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci)) 


gt_FY

Days to Return to Homelessness
FY	n	Min	Max	Mean	Median	S. Dev	SEM	95% Confidence Intervals
FY	n	Min	Max	Mean	Median	S. Dev	SEM	Upper	Lower
15	72	0	764	49.56	0	160.10	18.87	86.54	12.57
16	328	0	1093	75.38	0	210.91	11.65	98.21	52.56
17	363	0	1066	117.99	0	231.82	12.17	141.84	94.14
18	570	0	1008	100.48	0	228.02	9.55	119.20	81.76
19	674	0	1036	94.74	0	216.68	8.35	111.10	78.38
20	488	0	1041	91.31	0	206.69	9.36	109.65	72.97

Summary by Race

sum_race <- d_all %>% 
  group_by(race_fc) %>%
  select(client_uid, FY, race_fc, race_f, month, year, days_dichot, days_to_reappear)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(days_to_reappear, na.rm = TRUE), 
            max = max(days_to_reappear, na.rm = TRUE), 
            mean = mean(days_to_reappear, na.rm = TRUE), 
            median = median(days_to_reappear, na.rm = TRUE),
            sd = sd(days_to_reappear, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))
  
gt_race <- gt(sum_race) %>%
  tab_header(title = "Days to Return to Homelessness") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(120),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100)) %>%
  cols_align(align = "center") %>%
  cols_label(race_fc = "Race",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci)) 


gt_race

Days to Return to Homelessness
Race	n	Min	Max	Mean	Median	S. Dev	SEM	95% Confidence Intervals
Race	n	Min	Max	Mean	Median	S. Dev	SEM	Upper	Lower
Black or African American	1197	0	1093	103.26	0	231.30	6.69	116.37	90.16
White	1259	0	1066	85.68	0	201.88	5.69	96.84	74.53
American Indian or Alaska Native	17	0	937	290.82	162	346.78	84.11	455.67	125.98
Asian	10	0	382	38.20	0	120.80	38.20	113.07	−36.67
Unknown	9	0	0	0.00	0	0.00	0.00	0.00	0.00
Native Hawaiian or Pacific Islander	3	0	0	0.00	0	0.00	0.00	0.00	0.00

Summary Table By Month

This creates a table with 69 rows, which is pretty hard to make sense of

d_month <- d_all %>%
  group_by(year, month) %>%
  mutate(mean_y_m = mean(days_to_reappear, na.rm = TRUE),
         median_y_m = median(days_to_reappear, na.rm = TRUE))

sum_month <- d_month %>% 
  group_by(year, month) %>%
  select(client_uid, days_to_reappear, FY, race_fc, race_f,  end_date, month, year, y_m, mean_y_m, median_y_m)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(days_to_reappear, na.rm = TRUE), 
            max = max(days_to_reappear, na.rm = TRUE), 
            mean = mean(days_to_reappear, na.rm = TRUE), 
            median = median(days_to_reappear, na.rm = TRUE),
            sd = sd(days_to_reappear, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))

gt_month <- gt(sum_month) %>%
  tab_header(title = "Days to Return to Homelessness by Month") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 6, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(100),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100),
             mean ~ px(100)) %>%
  cols_align(align = "center") %>%
  cols_label(year = "Year",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci))
gt_month

Days to Return to Homelessness by Month
month	n	Min	Max	Mean	Median	S. Dev	SEM	95% Confidence Intervals
month	n	Min	Max	Mean	Median	S. Dev	SEM	Upper	Lower
2012
12	1	558	558	558.00	558.00	NA	NA	NA	NA
2013
2	5	0	764	183.60	0.00	331.24	148.13	473.94	-106.7405100
3	7	0	70	10.00	0.00	26.46	10.00	29.60	-9.6000000
4	6	0	0	0.00	0.00	0.00	0.00	0.00	0.0000000
5	7	0	422	65.57	0.00	157.77	59.63	182.45	-51.3091487
6	5	0	0	0.00	0.00	0.00	0.00	0.00	0.0000000
7	29	0	469	28.34	0.00	107.12	19.89	67.33	-10.6416067
8	11	0	741	67.36	0.00	223.42	67.36	199.40	-64.6690909
9	1	0	0	0.00	0.00	NA	NA	NA	NA
10	11	0	1093	326.18	0.00	469.23	141.48	603.48	48.8851307
11	20	0	1020	207.25	0.00	348.89	78.01	360.16	54.3408491
12	46	0	140	3.04	0.00	20.64	3.04	9.01	-2.9217391
2014
1	22	0	727	44.68	0.00	156.20	33.30	109.95	-20.5884490
2	19	0	111	5.84	0.00	25.47	5.84	17.29	-5.6084211
3	32	0	906	173.66	0.00	331.31	58.57	288.45	58.8646521
4	34	0	166	11.44	0.00	38.49	6.60	24.38	-1.4974783
5	18	0	542	88.61	2.50	172.50	40.66	168.30	8.9203322
6	19	0	683	48.53	0.00	163.11	37.42	121.87	-24.8154329
7	39	0	687	85.59	0.00	201.02	32.19	148.68	22.4981896
8	37	0	651	48.41	0.00	148.02	24.33	96.10	0.7089603
9	31	0	617	69.87	0.00	167.16	30.02	128.72	11.0247087
10	42	0	1066	144.17	0.00	301.24	46.48	235.27	53.0619540
11	22	0	786	110.18	0.00	219.26	46.75	201.81	18.5570182
12	28	0	829	171.32	0.00	241.83	45.70	260.90	81.7469419
2015
1	31	0	695	54.81	0.00	137.13	24.63	103.08	6.5313726
2	45	0	924	194.40	0.00	305.06	45.47	283.53	105.2691626
3	25	0	847	132.92	0.00	249.04	49.81	230.54	35.2961324
4	35	0	685	114.40	0.00	190.91	32.27	177.65	51.1503292
5	14	0	679	48.50	0.00	181.47	48.50	143.56	-46.5600000
6	40	0	644	54.90	0.00	133.37	21.09	96.23	13.5685996
7	39	0	731	152.54	0.00	261.05	41.80	234.47	70.6065698
8	20	0	712	100.20	0.00	217.93	48.73	195.71	4.6886550
9	22	0	460	43.27	0.00	106.75	22.76	87.88	-1.3359624
10	28	0	862	143.71	0.00	242.07	45.75	233.38	54.0502796
11	48	0	1001	67.90	0.00	224.13	32.35	131.30	4.4898908
12	28	0	1008	201.79	0.00	365.78	69.13	337.27	66.2994409
2016
1	21	0	992	216.29	0.00	293.67	64.08	341.89	90.6818942
2	61	0	731	48.93	0.00	156.79	20.07	88.28	9.5874642
3	106	0	894	126.38	0.00	262.46	25.49	176.34	76.4128425
4	41	0	706	45.41	0.00	138.23	21.59	87.73	3.1012825
5	36	0	851	63.56	0.00	176.31	29.39	121.15	5.9607881
6	49	0	848	95.57	0.00	220.41	31.49	157.29	33.8572276
7	31	0	818	159.42	0.00	261.44	46.96	251.45	67.3871158
8	73	0	665	73.92	0.00	175.18	20.50	114.10	33.7312282
9	48	0	628	88.44	0.00	201.33	29.06	145.40	31.4799178
10	45	0	1036	109.82	0.00	249.67	37.22	182.77	36.8748741
11	94	0	1022	115.00	0.00	249.02	25.68	165.34	64.6575722
12	58	0	1020	198.50	0.00	330.77	43.43	283.63	113.3739420
2017
1	47	0	944	113.23	0.00	256.51	37.42	186.57	39.9004117
2	33	0	565	85.48	0.00	168.61	29.35	143.01	27.9550517
3	51	0	823	100.39	0.00	196.49	27.51	154.32	46.4651267
4	51	0	795	90.59	0.00	188.51	26.40	142.33	38.8498787
5	33	0	449	61.79	0.00	132.17	23.01	106.88	16.6909549
6	56	0	768	48.18	0.00	147.44	19.70	86.80	9.5611609
7	69	0	732	102.91	0.00	220.07	26.49	154.84	50.9872288
8	53	0	776	70.30	0.00	175.64	24.13	117.59	23.0153523
9	84	0	726	37.40	0.00	129.48	14.13	65.09	9.7151916
10	54	0	1041	189.39	0.00	308.40	41.97	271.65	107.1324333
11	54	0	902	53.96	0.00	157.35	21.41	95.93	11.9948981
12	43	0	782	79.60	0.00	202.21	30.84	140.05	19.1633616
2018
1	49	0	741	91.82	0.00	181.69	25.96	142.69	40.9433022
2	44	0	430	30.50	0.00	89.40	13.48	56.91	4.0852364
3	27	0	611	69.07	0.00	169.94	32.71	133.18	4.9713051
4	63	0	893	95.02	0.00	211.73	26.68	147.30	42.7320010
5	41	0	687	134.76	0.00	229.78	35.89	205.09	64.4206471
6	25	0	809	82.48	0.00	224.41	44.88	170.45	-5.4902443
7	20	0	779	163.00	0.00	242.74	54.28	269.39	56.6138128
8	41	0	759	51.98	0.00	176.13	27.51	105.89	-1.9385772
9	27	0	487	49.11	0.00	142.06	27.34	102.70	-4.4741040

4) Plots

Graph by Fiscal Year

To get a better picture of what is going on, we can look at a graph. First let’s look at Days to Reappear by Fiscal Year.

f_all <- d_all %>%
  filter(between(days_to_reappear, 1, 600))
  
ggplot(data = sum_FY, aes(FY, mean)) + 
  geom_col()+
  scale_x_discrete(limits=c(15,16,17,18,19,20,21))+
  geom_point(data = f_all, aes(FY, days_to_reappear, color = race_2), 
             position = position_jitter(width = 0.1),
             shape = 1,
             alpha = .7) +
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci,
                    width = 0.2, )) +
  labs(title = "Days to Return By Fiscal Year",
       x = "Fiscal Year",
       y = "Days To Return",
       col = "Primary Race") +
  theme_light()

Graph by Month

This is much easier to read than the table with 69 rows

f_all <- d_month %>%
  select(client_uid, days_to_reappear, FY, race_fc, race_n, mean_y_m, median_y_m, y_m, year, month, end_date, year, month)%>%
  filter(between(days_to_reappear, 0, 600)) %>%
  na.omit()

ggplot(data = f_all, aes(end_date, mean_y_m, color = mean_y_m)) +
  geom_smooth(aes(color = ..y..), size=1.5, se=FALSE)  +
  geom_line(size = 0.9)+
  scale_colour_gradient2(low = "green4", mid = "goldenrod1" , high = "darkred", 
                         midpoint = 250) +
  labs(title = "Average Length of Time Homeless By Month",
       x = "Month and Year",
       y = "Days Homeless",
       color = "Median 
       Days Homeless") +
  theme_bw()