Load packages, import Excel files generated by ART reports in Service Point (one for each fiscal year). Load excel file of demographics data for all clients, created with report writer Add a column for fiscal year, change variable names to snake_case
library(tidyverse)
library(lubridate)
library(janitor)
library(readxl)
library(rmarkdown)
SPM2_FY15 <- read_excel("data/SPM2_FY15.xls",
sheet = "Tab B - First Exit Detail") %>%
as_tibble("SPM2_FY15.xls") %>%
clean_names() %>%
mutate(FY = 15)
SPM2_FY16 <- read_excel("data/SPM2_FY16.xls",
sheet = "Tab B - First Exit Detail")%>%
as_tibble("SPM2_FY16.xls") %>%
clean_names() %>%
mutate(FY = 16)
SPM2_FY17 <- read_excel("data/SPM2_FY17.xls",
sheet = "Tab B - First Exit Detail")%>%
as_tibble("SPM2_FY17.xls") %>%
clean_names()%>%
mutate(FY = 17)
SPM2_FY18 <- read_excel("data/SPM2_FY18.xls",
sheet = "Tab B - First Exit Detail")%>%
as_tibble("SPM2_FY18.xls") %>%
clean_names()%>%
mutate(FY = 18)
SPM2_FY19 <- read_excel("data/SPM2_FY19.xls",
sheet = "Tab B - First Exit Detail")%>%
as_tibble("SPM2_FY19.xls") %>%
clean_names()%>%
mutate(FY = 19)
SPM2_FY20 <- read_excel("data/SPM2_FY20.xls",
sheet = "Tab B - First Exit Detail")%>%
as_tibble("SPM2_FY20.xls") %>%
clean_names()%>%
mutate(FY = 20)
SPM2_FY21 <- read_excel("data/SPM2_FY21.xls",
sheet = "Tab B - First Exit Detail")%>%
as_tibble("SPM2_FY21.xls") %>%
clean_names()%>%
mutate(FY = 21)
demographics_FY13_FYTD21 <- read_csv("data/demographics_FY13_FYTD21.csv") %>%
as_tibble("SPM2_FY21.xls") %>%
clean_names()
Merge data of SPM2 for all fiscal years into one dataframe Join this data frame with the client demographic information Change variable types, collapse factors, and filter one row per client
d_all_FY <- bind_rows(SPM2_FY15, SPM2_FY16, SPM2_FY17, SPM2_FY18, SPM2_FY19, SPM2_FY20, SPM2_FY21)
d_all_raw <- left_join(d_all_FY, demographics_FY13_FYTD21, by = c("client_uid" = "client_id"))
d_all <- d_all_raw %>%
group_by(client_uid) %>%
arrange(client_uid) %>%
mutate(race_f = factor(primary_race),
proj_type_f = factor(proj_type),
provider_f = factor(provider)) %>%
mutate(race_fc = fct_collapse(race_f,
White = "White (HUD)",
"Black or African American"= "Black or African American (HUD)",
Unknown = c("Client refused (HUD)", "Client doesn't know (HUD)",
"Data not collected (HUD)"),
Asian = "Asian (HUD)",
"Native Hawaiian or Pacific Islander" = "Native Hawaiian or Other Pacific Islander (HUD)",
"American Indian or Alaska Native" = "American Indian or Alaska Native (HUD)")) %>%
mutate(race_3 = fct_collapse(race_f,
White = "WHite (HUD)",
"Black or African American" = "Black or African American (HUD)",
Unknown = c("Client refused (HUD)", "Client doesn't know (HUD)",
"Data not collected (HUD)"),
Other = c( "Native Hawaiian or Other Pacific Islander (HUD)",
"American Indian or Alaska Native (HUD)", "Asian (HUD)"))) %>%
mutate(race_2 = fct_collapse(race_f,
White = "White (HUD)",
"Black or African American" = "Black or African American (HUD)",
"Other or Unknown" = c("Client refused (HUD)", "Client doesn't know (HUD)",
"Data not collected (HUD)", "Native Hawaiian or Other Pacific Islander (HUD)",
"American Indian or Alaska Native (HUD)", "Asian (HUD)"))) %>%
mutate(race_n = as.numeric(race_fc)) %>%
mutate(end_date = as.Date(end_date, format = '%Y/%m/%d')) %>%
mutate(days_to_reappear = replace_na(days_to_reappear, 0)) %>%
mutate(days_dichot = case_when(is.na(days_to_reappear) ~ "No",
days_to_reappear == 0 ~ "No",
days_to_reappear > 0 ~ "Yes")) %>%
mutate(start_date = ymd(start_date),
end_date = ymd(end_date)) %>%
mutate(year = year(end_date),
month = month(end_date)) %>%
mutate(y_m = paste(year, month, sep = "-")) %>%
relocate(client_uid, end_date, days_to_reappear, days_dichot,
race_fc, race_n) %>%
slice(1) %>%
ungroup
To create barplots with categorical variables, we first need to create a table of summary statistics This first Table looks people who returned to homelessness compared to those who did not
library(gt)
sum_days_dichot <- d_all %>%
group_by(days_dichot, race_fc) %>%
select(client_uid, FY, race_fc, race_f, month, year, days_dichot, days_to_reappear)%>%
na.omit()%>%
summarise(n = n_distinct(client_uid),
min = min(days_to_reappear, na.rm = TRUE),
max = max(days_to_reappear, na.rm = TRUE),
mean = mean(days_to_reappear, na.rm = TRUE),
median = median(days_to_reappear, na.rm = TRUE),
sd = sd(days_to_reappear, na.rm = TRUE),
sem = sd/sqrt(n()),
upper_ci = mean + (1.96 * sem),
lower_ci = mean - (1.96 * sem))
gt_days_dichot <- gt(sum_days_dichot) %>%
tab_header(title = "Days to Return to Homelessness") %>%
fmt_number(columns = 7:10, decimals = 2) %>%
fmt_number(columns = 6, decimals = 2) %>%
cols_width(upper_ci ~ px(100),
days_dichot ~ px(100),
lower_ci ~ px(100),
sd ~ px(120),
n ~ px(50),
mean ~ px(100),
median ~ px(100),
days_dichot ~ px(200)) %>%
cols_align(align = "center") %>%
cols_label(race_fc = "Race",
mean = "Mean",
median = "Median",
min = "Min",
max = "Max",
sd = "S. Dev",
sem = "SEM",
upper_ci = "Upper",
lower_ci = "Lower") %>%
tab_spanner(label = "95% Confidence Intervals",
columns = c(upper_ci, lower_ci))
gt_days_dichot
library(gt)
sum_FY <- d_all %>%
group_by(FY) %>%
select(client_uid, FY, race_fc, race_f, month, year, days_dichot, days_to_reappear)%>%
na.omit()%>%
summarise(n = n_distinct(client_uid),
min = min(days_to_reappear, na.rm = TRUE),
max = max(days_to_reappear, na.rm = TRUE),
mean = mean(days_to_reappear, na.rm = TRUE),
median = median(days_to_reappear, na.rm = TRUE),
sd = sd(days_to_reappear, na.rm = TRUE),
sem = sd/sqrt(n()),
upper_ci = mean + (1.96 * sem),
lower_ci = mean - (1.96 * sem))
gt_FY <- gt(sum_FY) %>%
tab_header(title = "Days to Return to Homelessness") %>%
fmt_number(columns = 7:10, decimals = 2) %>%
fmt_number(columns = 5, decimals = 2) %>%
cols_width(upper_ci ~ px(100),
lower_ci ~ px(100),
sd ~ px(120),
n ~ px(50),
mean ~ px(100),
median ~ px(100)) %>%
cols_align(align = "center") %>%
cols_label(FY = "FY",
mean = "Mean",
median = "Median",
min = "Min",
max = "Max",
sd = "S. Dev",
sem = "SEM",
upper_ci = "Upper",
lower_ci = "Lower") %>%
tab_spanner(label = "95% Confidence Intervals",
columns = c(upper_ci, lower_ci))
gt_FY
| Days to Return to Homelessness | |||||||||
|---|---|---|---|---|---|---|---|---|---|
| FY | n | Min | Max | Mean | Median | S. Dev | SEM | 95% Confidence Intervals | |
| Upper | Lower | ||||||||
| 15 | 72 | 0 | 764 | 49.56 | 0 | 160.10 | 18.87 | 86.54 | 12.57 |
| 16 | 328 | 0 | 1093 | 75.38 | 0 | 210.91 | 11.65 | 98.21 | 52.56 |
| 17 | 363 | 0 | 1066 | 117.99 | 0 | 231.82 | 12.17 | 141.84 | 94.14 |
| 18 | 570 | 0 | 1008 | 100.48 | 0 | 228.02 | 9.55 | 119.20 | 81.76 |
| 19 | 674 | 0 | 1036 | 94.74 | 0 | 216.68 | 8.35 | 111.10 | 78.38 |
| 20 | 488 | 0 | 1041 | 91.31 | 0 | 206.69 | 9.36 | 109.65 | 72.97 |
sum_race <- d_all %>%
group_by(race_fc) %>%
select(client_uid, FY, race_fc, race_f, month, year, days_dichot, days_to_reappear)%>%
na.omit()%>%
summarise(n = n_distinct(client_uid),
min = min(days_to_reappear, na.rm = TRUE),
max = max(days_to_reappear, na.rm = TRUE),
mean = mean(days_to_reappear, na.rm = TRUE),
median = median(days_to_reappear, na.rm = TRUE),
sd = sd(days_to_reappear, na.rm = TRUE),
sem = sd/sqrt(n()),
upper_ci = mean + (1.96 * sem),
lower_ci = mean - (1.96 * sem))
gt_race <- gt(sum_race) %>%
tab_header(title = "Days to Return to Homelessness") %>%
fmt_number(columns = 7:10, decimals = 2) %>%
fmt_number(columns = 5, decimals = 2) %>%
cols_width(upper_ci ~ px(100),
lower_ci ~ px(100),
sd ~ px(120),
n ~ px(50),
mean ~ px(100),
median ~ px(100)) %>%
cols_align(align = "center") %>%
cols_label(race_fc = "Race",
mean = "Mean",
median = "Median",
min = "Min",
max = "Max",
sd = "S. Dev",
sem = "SEM",
upper_ci = "Upper",
lower_ci = "Lower") %>%
tab_spanner(label = "95% Confidence Intervals",
columns = c(upper_ci, lower_ci))
gt_race
| Days to Return to Homelessness | |||||||||
|---|---|---|---|---|---|---|---|---|---|
| Race | n | Min | Max | Mean | Median | S. Dev | SEM | 95% Confidence Intervals | |
| Upper | Lower | ||||||||
| Black or African American | 1197 | 0 | 1093 | 103.26 | 0 | 231.30 | 6.69 | 116.37 | 90.16 |
| White | 1259 | 0 | 1066 | 85.68 | 0 | 201.88 | 5.69 | 96.84 | 74.53 |
| American Indian or Alaska Native | 17 | 0 | 937 | 290.82 | 162 | 346.78 | 84.11 | 455.67 | 125.98 |
| Asian | 10 | 0 | 382 | 38.20 | 0 | 120.80 | 38.20 | 113.07 | −36.67 |
| Unknown | 9 | 0 | 0 | 0.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
| Native Hawaiian or Pacific Islander | 3 | 0 | 0 | 0.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
This creates a table with 69 rows, which is pretty hard to make sense of
d_month <- d_all %>%
group_by(year, month) %>%
mutate(mean_y_m = mean(days_to_reappear, na.rm = TRUE),
median_y_m = median(days_to_reappear, na.rm = TRUE))
sum_month <- d_month %>%
group_by(year, month) %>%
select(client_uid, days_to_reappear, FY, race_fc, race_f, end_date, month, year, y_m, mean_y_m, median_y_m)%>%
na.omit()%>%
summarise(n = n_distinct(client_uid),
min = min(days_to_reappear, na.rm = TRUE),
max = max(days_to_reappear, na.rm = TRUE),
mean = mean(days_to_reappear, na.rm = TRUE),
median = median(days_to_reappear, na.rm = TRUE),
sd = sd(days_to_reappear, na.rm = TRUE),
sem = sd/sqrt(n()),
upper_ci = mean + (1.96 * sem),
lower_ci = mean - (1.96 * sem))
gt_month <- gt(sum_month) %>%
tab_header(title = "Days to Return to Homelessness by Month") %>%
fmt_number(columns = 7:10, decimals = 2) %>%
fmt_number(columns = 6, decimals = 2) %>%
cols_width(upper_ci ~ px(100),
lower_ci ~ px(100),
sd ~ px(100),
n ~ px(50),
mean ~ px(100),
median ~ px(100),
mean ~ px(100)) %>%
cols_align(align = "center") %>%
cols_label(year = "Year",
mean = "Mean",
median = "Median",
min = "Min",
max = "Max",
sd = "S. Dev",
sem = "SEM",
upper_ci = "Upper",
lower_ci = "Lower") %>%
tab_spanner(label = "95% Confidence Intervals",
columns = c(upper_ci, lower_ci))
gt_month
| Days to Return to Homelessness by Month | |||||||||
|---|---|---|---|---|---|---|---|---|---|
| month | n | Min | Max | Mean | Median | S. Dev | SEM | 95% Confidence Intervals | |
| Upper | Lower | ||||||||
| 2012 | |||||||||
| 12 | 1 | 558 | 558 | 558.00 | 558.00 | NA | NA | NA | NA |
| 2013 | |||||||||
| 2 | 5 | 0 | 764 | 183.60 | 0.00 | 331.24 | 148.13 | 473.94 | -106.7405100 |
| 3 | 7 | 0 | 70 | 10.00 | 0.00 | 26.46 | 10.00 | 29.60 | -9.6000000 |
| 4 | 6 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0000000 |
| 5 | 7 | 0 | 422 | 65.57 | 0.00 | 157.77 | 59.63 | 182.45 | -51.3091487 |
| 6 | 5 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0000000 |
| 7 | 29 | 0 | 469 | 28.34 | 0.00 | 107.12 | 19.89 | 67.33 | -10.6416067 |
| 8 | 11 | 0 | 741 | 67.36 | 0.00 | 223.42 | 67.36 | 199.40 | -64.6690909 |
| 9 | 1 | 0 | 0 | 0.00 | 0.00 | NA | NA | NA | NA |
| 10 | 11 | 0 | 1093 | 326.18 | 0.00 | 469.23 | 141.48 | 603.48 | 48.8851307 |
| 11 | 20 | 0 | 1020 | 207.25 | 0.00 | 348.89 | 78.01 | 360.16 | 54.3408491 |
| 12 | 46 | 0 | 140 | 3.04 | 0.00 | 20.64 | 3.04 | 9.01 | -2.9217391 |
| 2014 | |||||||||
| 1 | 22 | 0 | 727 | 44.68 | 0.00 | 156.20 | 33.30 | 109.95 | -20.5884490 |
| 2 | 19 | 0 | 111 | 5.84 | 0.00 | 25.47 | 5.84 | 17.29 | -5.6084211 |
| 3 | 32 | 0 | 906 | 173.66 | 0.00 | 331.31 | 58.57 | 288.45 | 58.8646521 |
| 4 | 34 | 0 | 166 | 11.44 | 0.00 | 38.49 | 6.60 | 24.38 | -1.4974783 |
| 5 | 18 | 0 | 542 | 88.61 | 2.50 | 172.50 | 40.66 | 168.30 | 8.9203322 |
| 6 | 19 | 0 | 683 | 48.53 | 0.00 | 163.11 | 37.42 | 121.87 | -24.8154329 |
| 7 | 39 | 0 | 687 | 85.59 | 0.00 | 201.02 | 32.19 | 148.68 | 22.4981896 |
| 8 | 37 | 0 | 651 | 48.41 | 0.00 | 148.02 | 24.33 | 96.10 | 0.7089603 |
| 9 | 31 | 0 | 617 | 69.87 | 0.00 | 167.16 | 30.02 | 128.72 | 11.0247087 |
| 10 | 42 | 0 | 1066 | 144.17 | 0.00 | 301.24 | 46.48 | 235.27 | 53.0619540 |
| 11 | 22 | 0 | 786 | 110.18 | 0.00 | 219.26 | 46.75 | 201.81 | 18.5570182 |
| 12 | 28 | 0 | 829 | 171.32 | 0.00 | 241.83 | 45.70 | 260.90 | 81.7469419 |
| 2015 | |||||||||
| 1 | 31 | 0 | 695 | 54.81 | 0.00 | 137.13 | 24.63 | 103.08 | 6.5313726 |
| 2 | 45 | 0 | 924 | 194.40 | 0.00 | 305.06 | 45.47 | 283.53 | 105.2691626 |
| 3 | 25 | 0 | 847 | 132.92 | 0.00 | 249.04 | 49.81 | 230.54 | 35.2961324 |
| 4 | 35 | 0 | 685 | 114.40 | 0.00 | 190.91 | 32.27 | 177.65 | 51.1503292 |
| 5 | 14 | 0 | 679 | 48.50 | 0.00 | 181.47 | 48.50 | 143.56 | -46.5600000 |
| 6 | 40 | 0 | 644 | 54.90 | 0.00 | 133.37 | 21.09 | 96.23 | 13.5685996 |
| 7 | 39 | 0 | 731 | 152.54 | 0.00 | 261.05 | 41.80 | 234.47 | 70.6065698 |
| 8 | 20 | 0 | 712 | 100.20 | 0.00 | 217.93 | 48.73 | 195.71 | 4.6886550 |
| 9 | 22 | 0 | 460 | 43.27 | 0.00 | 106.75 | 22.76 | 87.88 | -1.3359624 |
| 10 | 28 | 0 | 862 | 143.71 | 0.00 | 242.07 | 45.75 | 233.38 | 54.0502796 |
| 11 | 48 | 0 | 1001 | 67.90 | 0.00 | 224.13 | 32.35 | 131.30 | 4.4898908 |
| 12 | 28 | 0 | 1008 | 201.79 | 0.00 | 365.78 | 69.13 | 337.27 | 66.2994409 |
| 2016 | |||||||||
| 1 | 21 | 0 | 992 | 216.29 | 0.00 | 293.67 | 64.08 | 341.89 | 90.6818942 |
| 2 | 61 | 0 | 731 | 48.93 | 0.00 | 156.79 | 20.07 | 88.28 | 9.5874642 |
| 3 | 106 | 0 | 894 | 126.38 | 0.00 | 262.46 | 25.49 | 176.34 | 76.4128425 |
| 4 | 41 | 0 | 706 | 45.41 | 0.00 | 138.23 | 21.59 | 87.73 | 3.1012825 |
| 5 | 36 | 0 | 851 | 63.56 | 0.00 | 176.31 | 29.39 | 121.15 | 5.9607881 |
| 6 | 49 | 0 | 848 | 95.57 | 0.00 | 220.41 | 31.49 | 157.29 | 33.8572276 |
| 7 | 31 | 0 | 818 | 159.42 | 0.00 | 261.44 | 46.96 | 251.45 | 67.3871158 |
| 8 | 73 | 0 | 665 | 73.92 | 0.00 | 175.18 | 20.50 | 114.10 | 33.7312282 |
| 9 | 48 | 0 | 628 | 88.44 | 0.00 | 201.33 | 29.06 | 145.40 | 31.4799178 |
| 10 | 45 | 0 | 1036 | 109.82 | 0.00 | 249.67 | 37.22 | 182.77 | 36.8748741 |
| 11 | 94 | 0 | 1022 | 115.00 | 0.00 | 249.02 | 25.68 | 165.34 | 64.6575722 |
| 12 | 58 | 0 | 1020 | 198.50 | 0.00 | 330.77 | 43.43 | 283.63 | 113.3739420 |
| 2017 | |||||||||
| 1 | 47 | 0 | 944 | 113.23 | 0.00 | 256.51 | 37.42 | 186.57 | 39.9004117 |
| 2 | 33 | 0 | 565 | 85.48 | 0.00 | 168.61 | 29.35 | 143.01 | 27.9550517 |
| 3 | 51 | 0 | 823 | 100.39 | 0.00 | 196.49 | 27.51 | 154.32 | 46.4651267 |
| 4 | 51 | 0 | 795 | 90.59 | 0.00 | 188.51 | 26.40 | 142.33 | 38.8498787 |
| 5 | 33 | 0 | 449 | 61.79 | 0.00 | 132.17 | 23.01 | 106.88 | 16.6909549 |
| 6 | 56 | 0 | 768 | 48.18 | 0.00 | 147.44 | 19.70 | 86.80 | 9.5611609 |
| 7 | 69 | 0 | 732 | 102.91 | 0.00 | 220.07 | 26.49 | 154.84 | 50.9872288 |
| 8 | 53 | 0 | 776 | 70.30 | 0.00 | 175.64 | 24.13 | 117.59 | 23.0153523 |
| 9 | 84 | 0 | 726 | 37.40 | 0.00 | 129.48 | 14.13 | 65.09 | 9.7151916 |
| 10 | 54 | 0 | 1041 | 189.39 | 0.00 | 308.40 | 41.97 | 271.65 | 107.1324333 |
| 11 | 54 | 0 | 902 | 53.96 | 0.00 | 157.35 | 21.41 | 95.93 | 11.9948981 |
| 12 | 43 | 0 | 782 | 79.60 | 0.00 | 202.21 | 30.84 | 140.05 | 19.1633616 |
| 2018 | |||||||||
| 1 | 49 | 0 | 741 | 91.82 | 0.00 | 181.69 | 25.96 | 142.69 | 40.9433022 |
| 2 | 44 | 0 | 430 | 30.50 | 0.00 | 89.40 | 13.48 | 56.91 | 4.0852364 |
| 3 | 27 | 0 | 611 | 69.07 | 0.00 | 169.94 | 32.71 | 133.18 | 4.9713051 |
| 4 | 63 | 0 | 893 | 95.02 | 0.00 | 211.73 | 26.68 | 147.30 | 42.7320010 |
| 5 | 41 | 0 | 687 | 134.76 | 0.00 | 229.78 | 35.89 | 205.09 | 64.4206471 |
| 6 | 25 | 0 | 809 | 82.48 | 0.00 | 224.41 | 44.88 | 170.45 | -5.4902443 |
| 7 | 20 | 0 | 779 | 163.00 | 0.00 | 242.74 | 54.28 | 269.39 | 56.6138128 |
| 8 | 41 | 0 | 759 | 51.98 | 0.00 | 176.13 | 27.51 | 105.89 | -1.9385772 |
| 9 | 27 | 0 | 487 | 49.11 | 0.00 | 142.06 | 27.34 | 102.70 | -4.4741040 |
To get a better picture of what is going on, we can look at a graph. First let’s look at Days to Reappear by Fiscal Year.
f_all <- d_all %>%
filter(between(days_to_reappear, 1, 600))
ggplot(data = sum_FY, aes(FY, mean)) +
geom_col()+
scale_x_discrete(limits=c(15,16,17,18,19,20,21))+
geom_point(data = f_all, aes(FY, days_to_reappear, color = race_2),
position = position_jitter(width = 0.1),
shape = 1,
alpha = .7) +
geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci,
width = 0.2, )) +
labs(title = "Days to Return By Fiscal Year",
x = "Fiscal Year",
y = "Days To Return",
col = "Primary Race") +
theme_light()
This is much easier to read than the table with 69 rows
f_all <- d_month %>%
select(client_uid, days_to_reappear, FY, race_fc, race_n, mean_y_m, median_y_m, y_m, year, month, end_date, year, month)%>%
filter(between(days_to_reappear, 0, 600)) %>%
na.omit()
ggplot(data = f_all, aes(end_date, mean_y_m, color = mean_y_m)) +
geom_smooth(aes(color = ..y..), size=1.5, se=FALSE) +
geom_line(size = 0.9)+
scale_colour_gradient2(low = "green4", mid = "goldenrod1" , high = "darkred",
midpoint = 250) +
labs(title = "Average Length of Time Homeless By Month",
x = "Month and Year",
y = "Days Homeless",
color = "Median
Days Homeless") +
theme_bw()