System Performance Measure 1: Length of Time Homeless

1) Load packages and Import SPM Data

Importing .xls versions of ART700 reports for each fiscal year. These are the default files saved by the Service Point ART700 report. The report for FY19 is split into two excel sheets because there are so many clients. Adding a column to each dataframe for fiscal year.

library(readxl)
library(tidyverse)
library(janitor)
library(lubridate)
library(gt)

SPM1_FYTD15 <- read_excel("data/SPM1_FYTD15.xls", 
  sheet = "Tab B - Detail") %>%
  clean_names() %>%
  as_tibble() %>%
  mutate(client_uid = as.numeric(client_uid),
         FY = 15)
SPM1_FYTD16 <- read_excel("data/SPM1_FYTD16.xls", 
    sheet = "Tab B - Detail") %>%
  clean_names() %>%
  as_tibble() %>%
  mutate(FY = 16)
SPM1_FYTD17 <- read_excel("data/SPM1_FYTD17.xls", 
    sheet = "Tab B - Detail") %>%
  clean_names() %>%
  as_tibble() %>%
  mutate(FY = 17)
SPM1_FYTD18 <- read_excel("data/SPM1_FYTD18.xls", 
    sheet = "Tab B - Detail") %>%
  clean_names() %>%
  as_tibble() %>%
  mutate(FY = 18)
SPM1_FYTD19_a <- read_excel("data/SPM1_FYTD19.xls", 
    sheet = "Tab B - Detail") %>%
  clean_names() %>%
  as_tibble() %>%
  mutate(FY = 19)
SPM1_FYTD19_b <- read_excel("data/SPM1_FYTD19.xls", 
    sheet = "Tab B - Detail(1)") %>%
  clean_names() %>%
  as_tibble() %>%
  mutate(FY = 19)
SPM1_FYTD20 <- read_excel("data/SPM1_FYTD20.xls", 
    sheet = "Tab B - Detail") %>%
  clean_names() %>%
  as_tibble() %>%
  mutate(FY = 20)
SPM1_FYTD21 <- read_excel("data/SPM1_FYTD21.xls", 
    sheet = "Tab B - Detail") %>%
  clean_names() %>%
  as_tibble() %>%
  mutate(FY = 21)

2) Import Demographic Data

This data was generated using report writer. The file includes demographic information on all client and all entry/exits since 2013.

demographics_FY13_FYTD21 <- read_csv("data/demographics_FY13_FYTD21.csv")

3) Merge and Clean Data

First I will join together the two FY19 dataframes Then I’m going to join all of the ART data into one dataframe Then I will join that newly created dataframe with the demographics data I will also clean up the variable names The resulting dataframe is 293,000 rows of 27 variables This is because the dataframe includes multiple rows for some clients I will group the data by client_uid and select the first row for each client I will also collapse race into smaller groups and change date formatting

SPM1_FYTD19_a <- SPM1_FYTD19_a %>%
  select(-overlap_likely_hmi_error) #### This variable is not in the second sheet of the FY19 report
SPM1_FYTD19 <- rbind(SPM1_FYTD19_a,setNames(SPM1_FYTD19_b,names(SPM1_FYTD19_a))) %>%
  mutate(date_move_in = ymd_hms(date_move_in)) #### Only FY19 has date_move_in as a numeric variable
d_all_FY <- bind_rows(SPM1_FYTD15, SPM1_FYTD16, SPM1_FYTD17,
                   SPM1_FYTD18, SPM1_FYTD19, SPM1_FYTD20, SPM1_FYTD21)
d_all <- left_join (d_all_FY, demographics_FY13_FYTD21, by = c("client_uid" = "client_id"))%>%
  group_by(client_uid) %>%
  slice(1) %>%
  arrange(client_uid) %>%
  mutate(race_f = factor(primary_race),
         proj_type_f = factor(proj_type),
         provider_f = factor(provider)) %>%
  mutate(race_fc = fct_collapse(race_f,
                                White = "White (HUD)",
                                "Black or African American"= "Black or African American (HUD)",
                                Unknown = c("Client refused (HUD)", "Client doesn't know (HUD)",
                                                 "Data not collected (HUD)"),
                                Asian = "Asian (HUD)",
                                "Native Hawaiian or Pacific Islander" = 
                                  "Native Hawaiian or Other Pacific Islander (HUD)",
                                "American Indian or Alaska Native" =  
                                  "American Indian or Alaska Native (HUD)")) %>%
  mutate(race_3 = fct_collapse(race_f,
                               White = "WHite (HUD)",
                               "Black or African American" = "Black or African American (HUD)",
                               Unknown = c("Client refused (HUD)", 
                                        "Client doesn't know (HUD)",
                                        "Data not collected (HUD)"),
                               Other = c( "Native Hawaiian or Other Pacific Islander (HUD)",
                                       "American Indian or Alaska Native (HUD)", 
                                       "Asian (HUD)"))) %>%
    mutate(race_2 = fct_collapse(race_f,
                               White = "WHite (HUD)",
                               "Black or African American" = "Black or African American (HUD)",
                               "Other or Unknown" = c("Client refused (HUD)", 
                                      "Client doesn't know (HUD)",
                                      "Data not collected (HUD)", 
                                      "Native Hawaiian or Other Pacific Islander (HUD)",
                                      "American Indian or Alaska Native (HUD)", 
                                      "Asian (HUD)"))) %>%
  mutate(race_n = as.numeric(race_fc)) %>%
  mutate(entry_adj_base = ymd(entry_adj_base),
         exit_adj_base = ymd(exit_adj_base),
         adjusted_cutoff = ymd(adjusted_cutoff)) %>%
  mutate(entry_exit_interval = interval(entry_adj_base, exit_adj_base),
         tran_lot_days = time_length(entry_exit_interval, "day")) %>%
  mutate(tran_lot_sum = sum(tran_lot)) %>%
  mutate(LOT_h = total_es_sh_th) %>%
  mutate(end_date_ymd = as.Date(end_date, format = '%Y/%m/%d')) %>%
  replace_na(list(end_date_ymd = today())) %>%
  mutate(year = year(end_date_ymd),
         month = month(end_date_ymd)) %>%
  mutate(y_m = paste(year, month, sep = "-")) %>%
  relocate(client_uid, entry_adj_base, exit_adj_base, entry_exit_interval,
           tran_lot, tran_lot_days, total_es_sh ,total_es_sh_th, LOT_h) 
head(d_all)

4) Inspect Data

Take a quick look at the first few rows of the new dataframe

head(d_all)

## # A tibble: 6 x 43
## # Groups:   client_uid [6]
##   client_uid entry_adj_base exit_adj_base entry_exit_interval           
##        <dbl> <date>         <date>        <Interval>                    
## 1          3 2015-10-08     2015-10-09    2015-10-08 UTC--2015-10-09 UTC
## 2         33 2013-10-01     2014-04-05    2013-10-01 UTC--2014-04-05 UTC
## 3         37 2016-06-23     2016-06-23    2016-06-23 UTC--2016-06-23 UTC
## 4         42 2019-07-28     2019-07-28    2019-07-28 UTC--2019-07-28 UTC
## 5         59 2019-01-10     2019-01-11    2019-01-10 UTC--2019-01-11 UTC
## 6         61 2018-01-23     2018-02-10    2018-01-23 UTC--2018-02-10 UTC
## # ... with 39 more variables: tran_lot <dbl>, tran_lot_days <dbl>,
## #   total_es_sh <dbl>, total_es_sh_th <dbl>, LOT_h <dbl>, unique_id <chr>,
## #   trans_type <chr>, trans_id <dbl>, provider <chr>, proj_type <chr>,
## #   date_move_in <dttm>, start_date <dttm>, end_date <dttm>, x12 <lgl>,
## #   adjusted_cutoff <date>, overlap_likely_hmi_error <lgl>, FY <dbl>,
## #   last_name <chr>, first_name <chr>, entry_date <chr>, exit_date <chr>,
## #   client_location_always_choose_va_502_unless_directed_otherwise <chr>,
## #   total_monthly_income <dbl>, current_locality <chr>, date_of_birth <chr>,
## #   ethnicity <chr>, primary_race <chr>, race_f <fct>, proj_type_f <fct>,
## #   provider_f <fct>, race_fc <fct>, race_3 <fct>, race_2 <fct>, race_n <dbl>,
## #   tran_lot_sum <dbl>, end_date_ymd <date>, year <dbl>, month <dbl>, y_m <chr>

5) Summarize Data

Now we want to look at descriptive statistics for each fiscal year

library(gt)
sum_FY <- d_all %>% 
  group_by(FY) %>%
  select(client_uid, LOT_h, FY, race_fc, race_f, total_es_sh_th, end_date_ymd, month, year)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(LOT_h, na.rm = TRUE), 
            max = max(LOT_h, na.rm = TRUE), 
            mean = mean(LOT_h, na.rm = TRUE), 
            median = median(LOT_h, na.rm = TRUE),
            sd = sd(LOT_h, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))

gt_FY <- gt(sum_FY) %>%
  tab_header(title = "Length of Time Homeless by Fiscal Year") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(120),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100),
             FY ~ px(200)) %>%
  cols_align(align = "center") %>%
  cols_label(FY = "Fiscal Year",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci))

gt_FY

Length of Time Homeless by Fiscal Year
Fiscal Year	n	Min	Max	Mean	Median	S. Dev	SEM	95% Confidence Intervals
Fiscal Year	n	Min	Max	Mean	Median	S. Dev	SEM	Upper	Lower
15	414	1	932	80.90	51.5	98.93	4.86	90.43	71.37
16	1036	1	306	45.94	25.0	53.75	1.67	49.21	42.66
17	743	1	322	37.88	22.0	42.97	1.58	40.97	34.79
18	1338	1	249	28.07	11.0	40.89	1.12	30.26	25.87
19	1328	1	327	28.54	10.0	45.55	1.25	30.99	26.09
20	676	1	473	36.94	14.0	54.95	2.11	41.08	32.80
21	250	1	358	65.83	19.0	88.19	5.58	76.76	54.90

Summary Table by Month

We can also look at Length of Time Homeless for each Month This creates a very long table that is difficult to read

library(gt)
d_all <- d_all %>%
  group_by(year, month) %>%
  mutate(mean_y_m = mean(LOT_h, na.rm = TRUE),
         median_y_m = median(LOT_h, na.rm = TRUE))

sum_month <- d_all %>% 
  group_by(year, month) %>%
  select(client_uid, LOT_h, FY, race_fc, race_f, total_es_sh_th,  end_date_ymd, month, year, y_m, mean_y_m, median_y_m)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(LOT_h, na.rm = TRUE), 
            max = max(LOT_h, na.rm = TRUE), 
            mean = mean(LOT_h, na.rm = TRUE), 
            median = median(LOT_h, na.rm = TRUE),
            sd = sd(LOT_h, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))

gt_month <- gt(sum_month) %>%
  tab_header(title = "Length of Time Homeless by Month") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(120),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100),
             mean ~ px(200)) %>%
  cols_align(align = "center") %>%
  cols_label(year = "Year",
             month = "Month",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci))
gt_month

Length of Time Homeless by Month
Month	n	Min	Max	Mean	Median	S. Dev	SEM	95% Confidence Intervals
Month	n	Min	Max	Mean	Median	S. Dev	SEM	Upper	Lower
2014
4	1	202	202.00	202.00000	202.00	NA	NA	NA	NA
5	3	157	283.00	229.33333	248.00	65.04	37.55	302.93	155.732499
6	4	18	232.00	129.25000	133.50	92.44	46.22	219.84	38.660056
7	3	51	183.00	132.66667	164.00	71.36	41.20	213.42	51.914561
8	4	46	168.00	107.50000	108.00	49.92	24.96	156.42	58.581735
9	2	32	370.00	201.00000	201.00	239.00	169.00	532.24	-130.240000
10	36	3	343.00	64.63889	33.00	72.29	12.05	88.25	41.024687
11	26	13	277.00	74.61538	46.00	60.34	11.83	97.81	51.420510
12	29	11	318.00	90.79310	67.00	70.10	13.02	116.31	65.278521
2015
1	26	6	225.00	78.73077	57.00	64.16	12.58	103.39	54.069361
2	28	2	145.00	57.57143	58.00	44.25	8.36	73.96	41.180508
3	25	2	322.00	70.08000	50.00	74.48	14.90	99.28	40.881938
4	44	2	565.00	95.29545	61.50	96.75	14.59	123.88	66.708119
5	22	6	381.00	109.77273	40.00	126.27	26.92	162.54	57.005844
6	34	3	189.00	49.35294	51.50	40.35	6.92	62.92	35.790092
7	29	11	409.00	68.72414	47.00	76.27	14.16	96.48	40.965318
8	29	12	476.00	76.17241	41.00	114.32	21.23	117.78	34.562565
9	29	1	265.00	65.31034	52.00	60.60	11.25	87.37	43.252895
10	138	1	270.00	53.66667	32.00	58.50	4.98	63.43	43.906530
11	78	1	268.00	50.75641	32.50	54.57	6.18	62.87	38.646229
12	65	1	174.00	43.75385	29.00	43.22	5.36	54.26	33.246360
2016
1	71	1	212.00	40.80282	24.00	45.90	5.45	51.48	30.127193
2	72	1	227.00	42.59722	30.50	47.93	5.65	53.67	31.527040
3	63	1	286.00	49.41270	25.00	61.22	7.71	64.53	34.294441
4	100	1	271.00	68.27000	51.00	69.69	6.97	81.93	54.610521
5	119	1	233.00	44.48739	20.00	56.74	5.20	54.68	34.292925
6	99	1	191.00	41.35354	25.00	45.88	4.61	50.39	32.315903
7	75	1	850.00	47.98667	18.00	103.42	11.94	71.39	24.581146
8	90	1	203.00	29.77778	17.50	37.74	3.98	37.58	21.980115
9	78	1	409.00	33.41026	14.50	59.84	6.78	46.69	20.130204
10	63	1	932.00	78.28571	35.00	131.75	16.60	110.82	45.752201
11	49	1	309.00	50.20408	33.00	58.57	8.37	66.60	33.804750
12	60	1	204.00	42.41667	27.00	46.65	6.02	54.22	30.613532
2017
1	74	1	242.00	47.37838	31.50	47.87	5.56	58.29	36.471707
2	59	1	232.00	43.28814	25.00	45.65	5.94	54.94	31.639873
3	60	1	137.00	30.40000	15.00	34.33	4.43	39.09	21.714560
4	63	1	178.00	35.92063	18.00	43.51	5.48	46.67	25.175355
5	70	1	143.00	44.48571	30.50	41.44	4.95	54.19	34.776650
6	55	1	182.00	43.32727	25.00	41.20	5.56	54.22	32.438061
7	58	1	167.00	43.75862	31.00	38.23	5.02	53.60	33.918942
8	80	1	86.00	26.90000	24.00	19.61	2.19	31.20	22.603365
9	79	1	95.00	13.12658	9.00	14.50	1.63	16.32	9.929937
10	41	1	204.00	34.97561	16.00	46.45	7.25	49.19	20.756837
11	27	1	128.00	21.51852	14.00	30.17	5.81	32.90	10.138620
12	31	1	188.00	28.38710	11.00	41.05	7.37	42.84	13.935516
2018
1	103	1	248.00	48.08738	31.00	50.18	4.94	57.78	38.395919
2	178	1	249.00	54.58989	28.50	64.93	4.87	64.13	45.051177
3	162	1	178.00	25.87037	7.00	38.42	3.02	31.79	19.953659
4	123	1	163.00	28.47967	13.00	37.20	3.35	35.05	21.906079
5	117	1	126.00	19.88034	8.00	27.89	2.58	24.93	14.827359
6	137	1	119.00	18.72263	8.00	24.97	2.13	22.90	14.541315
7	121	1	181.00	22.86777	9.00	31.77	2.89	28.53	17.206980
8	140	1	85.00	19.71429	12.50	20.17	1.70	23.06	16.373258
9	151	1	109.00	11.67550	6.00	17.23	1.40	14.42	8.927695
10	119	1	326.00	43.26050	19.00	56.54	5.18	53.42	33.101584
11	130	1	327.00	42.07692	12.50	77.00	6.75	55.31	28.840535
12	95	1	303.00	32.14737	11.00	53.96	5.54	43.00	21.296103
2019
1	102	1	265.00	36.05882	19.00	47.28	4.68	45.23	26.884212
2	108	1	230.00	32.75926	10.00	48.66	4.68	41.94	23.582807
3	117	1	176.00	29.26496	12.00	37.87	3.50	36.13	22.402569
4	109	1	152.00	23.45872	4.00	37.52	3.59	30.50	16.415725
5	103	1	146.00	22.15534	5.00	34.66	3.42	28.85	15.460897
6	94	1	116.00	24.14894	12.00	29.15	3.01	30.04	18.256845
7	118	1	99.00	22.05932	11.00	24.20	2.23	26.43	17.693404
8	116	1	242.00	19.01724	8.00	28.96	2.69	24.29	13.746196
9	104	1	136.00	11.13462	7.00	16.12	1.58	14.23	8.035692
10	100	1	344.00	42.27000	20.00	61.65	6.17	54.35	30.186212
11	97	1	290.00	32.42268	9.00	60.46	6.14	44.45	20.391315
12	83	1	473.00	25.53012	6.00	58.58	6.43	38.13	12.928322
2020
1	102	1	274.00	34.27451	9.00	56.17	5.56	45.17	23.374439
2	57	1	233.00	38.15789	10.00	60.46	8.01	53.85	22.462644
3	46	1	195.00	41.23913	12.00	56.80	8.37	57.65	24.824737
4	18	1	200.00	37.83333	11.50	50.83	11.98	61.32	14.349524
5	19	1	154.00	47.15789	23.00	50.58	11.60	69.90	24.414731
6	36	1	129.00	54.61111	39.00	48.50	8.08	70.45	38.768951
7	30	1	91.00	32.60000	22.50	27.93	5.10	42.60	22.603890
8	41	1	68.00	25.68293	23.00	20.86	3.26	32.07	19.297065
9	33	1	296.00	31.54545	14.00	54.04	9.41	49.98	13.106452
10	33	1	327.00	46.75758	17.00	63.89	11.12	68.56	24.959788
11	34	1	312.00	71.61765	31.00	96.10	16.48	103.92	39.315914
12	38	1	299.00	50.63158	25.50	69.69	11.31	72.79	28.472066
2021
1	35	1	358.00	35.11429	13.00	63.32	10.70	56.09	14.134864
2	26	1	235.00	41.42308	11.00	64.52	12.65	66.22	16.624105
3	34	1	205.00	41.00000	12.50	55.76	9.56	59.74	22.258346
4	30	1	178.00	46.30000	26.00	54.39	9.93	65.76	26.836742
5	21	1	151.00	69.71429	8.00	71.34	15.57	100.23	39.199809
6	34	35	350.00	180.35294	159.50	93.20	15.98	211.68	149.023649

Summary Table by Race

library(gt)
sum_race <- d_all %>% 
  group_by(race_fc) %>%
  select(client_uid, LOT_h, FY, race_fc, race_f, total_es_sh_th, end_date_ymd, month, year)%>%
  na.omit()%>%
  summarise(n = n_distinct(client_uid), 
            min = min(LOT_h, na.rm = TRUE), 
            max = max(LOT_h, na.rm = TRUE), 
            mean = mean(LOT_h, na.rm = TRUE), 
            median = median(LOT_h, na.rm = TRUE),
            sd = sd(LOT_h, na.rm = TRUE),
            sem = sd/sqrt(n()),
            upper_ci = mean + (1.96 * sem),
            lower_ci = mean - (1.96 * sem))%>%
  arrange(desc(n))

gt_race <- gt(sum_race) %>%
  tab_header(title = "Length of Time Homeless by Client Race") %>%
  fmt_number(columns = 7:10, decimals = 2) %>%
  fmt_number(columns = 5, decimals = 2) %>%
  cols_width(upper_ci ~ px(100),
             lower_ci ~ px(100),
             sd ~ px(120),
             n ~ px(50),
             mean ~ px(100),
             median ~ px(100),
             race_fc ~ px(200)) %>%
  cols_align(align = "center") %>%
  cols_label(race_fc = "Race",
             mean = "Mean",
             median = "Median",
             min = "Min",
             max = "Max",
             sd = "S. Dev",
             sem = "SEM",
             upper_ci = "Upper",
             lower_ci = "Lower") %>%
  tab_spanner(label = "95% Confidence Intervals",
              columns = c(upper_ci, lower_ci))

gt_race

Length of Time Homeless by Client Race
Race	n	Min	Max	Mean	Median	S. Dev	SEM	95% Confidence Intervals
Race	n	Min	Max	Mean	Median	S. Dev	SEM	Upper	Lower
White	3462	1	850	38.72	16	57.26	0.97	40.63	36.82
Black or African American	2207	1	932	39.61	20	55.88	1.19	41.94	37.28
American Indian or Alaska Native	53	1	290	49.42	25	67.83	9.32	67.68	31.15
Unknown	36	1	234	34.39	11	54.92	9.15	52.33	16.45
Asian	14	1	122	21.79	6	33.73	9.01	39.45	4.12
Native Hawaiian or Pacific Islander	13	1	132	35.69	11	44.91	12.46	60.10	11.28

6) Plot Data

Graph by Fiscal Year

We can now create plots to show the change in Length of Time Homeless by Fiscal Year To make the plot a little easier to read, I’m going to filter out some outliers and reduce race to three categories

f_all <- d_all %>%
  filter(between(LOT_h, 1, 300))%>%
  drop_na(race_3)
  
ggplot(data = sum_FY, aes(FY, mean)) + 
  geom_col()+
  scale_x_discrete(limits=c(15,16,17,18,19,20,21))+
  geom_point(data = f_all, aes(FY, LOT_h, color = race_2), 
             position = position_jitter(width = 0.1),
             shape = 1,
             alpha = .7) +
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci,
                    width = 0.2, )) +
  geom_errorbar(aes(ymax = median, ymin = median, linetype = "Median Length of Time Homeless")) +
  labs(title = "Length of Time Homeless By Fiscal Year",
       x = "Fiscal Year",
       y = "Days Homeless",
       col = "Primary Race") +
  theme_light()

ggsave("LOT_homeless x FY .jpg")

Graph by Month

We can also plot Length of Time Homeless by Month First calculate mean and median LOT Homeless for each month, then filter data to make the plot easier to read

f_all <- d_all %>%
  select(client_uid, LOT_h, FY, race_fc, race_n, mean_y_m, end_date_ymd, median_y_m)%>%
  filter(between(LOT_h, 0, 300)) %>%
  na.omit()

ggplot(data = f_all, aes(end_date_ymd, median_y_m, color = median_y_m)) +
  geom_smooth(aes(color=..y..), size=1.5, se=FALSE)  +
  geom_line(size = 0.9)+
  scale_x_date(date_labels = "%y",
               date_breaks = "1 year")+
  scale_colour_gradient2(low = "green4", mid = "goldenrod1" , high = "darkred", 
                         midpoint= 100) +
  labs(title = "Average Length of Time Homeless By Month",
       x = "Month and Year",
       y = "Days Homeless",
       color = "Median 
       Days Homeless") +
  theme_bw()