# Load and clean Data
depression <- read_csv("C:\\Users\\zim13\\Downloads\\NYC EH Data Portal - Depression (adults) (full table).csv") %>%
clean_names()
## Rows: 96 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): TimePeriod, GeoType, Geography, Age-adjusted percent, Percent
## dbl (2): GeoID, GeoRank
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
psychillness <- read_csv("C:\\Users\\zim13\\Downloads\\NYC EH Data Portal - Psychiatric hospitalizations (adults) (full table).csv") %>%
clean_names()
## Rows: 144 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): GeoType, Geography
## dbl (3): TimePeriod, GeoID, GeoRank
## num (3): Age-adjusted rate per 100,000 residents, Number, Rate per 100,000 r...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
seriousdistress <- read_csv("C:\\Users\\zim13\\Downloads\\NYC EH Data Portal - Serious psychological distress (adults) (full table).csv") %>%
clean_names()
## Rows: 192 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): TimePeriod, GeoType, Geography, Age-adjusted percent, Number, Percent
## dbl (2): GeoID, GeoRank
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
evictions <- read_csv("C:\\Users\\zim13\\Downloads\\NYC EH Data Portal - Evictions (court-ordered) (full table).csv") %>%
clean_names()
## Rows: 2122 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): GeoType, Geography, Estimated annual rate per 10,000 homes
## dbl (3): TimePeriod, GeoID, GeoRank
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
crowding <- read_csv("C:\\Users\\zim13\\Downloads\\NYC EH Data Portal - Household crowding (full table).csv") %>%
clean_names()
## Rows: 3758 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TimePeriod, GeoType, Geography
## dbl (3): GeoID, GeoRank, Percent
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
occupiedhomes <- read_csv("C:\\Users\\zim13\\Downloads\\NYC EH Data Portal - Owner-occupied homes (full table).csv") %>%
clean_names()
## Rows: 3758 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TimePeriod, GeoType, Geography
## dbl (3): GeoID, GeoRank, Percent
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
rentburdened <- read_csv("C:\\Users\\zim13\\Downloads\\NYC EH Data Portal - Rent-burdened households (full table).csv") %>%
clean_names()
## Rows: 3758 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TimePeriod, GeoType, Geography
## dbl (3): GeoID, GeoRank, Percent
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
depression <- depression %>%
rename(
depression_percent = percent,
depression_number = number
)
rentburdened <- rentburdened %>%
rename(
rentburdened_percent = percent,
rentburdened_number = number
)
# Filter by year to try to align years as best as possible, Depression only has 2017-18 and 2021-22
RBMatching2017 <- rentburdened %>%
filter(time_period == "2014-18",)
Depression2017 <- depression %>%
filter(time_period == "2017-18",)
# Depression rate was in a weird format and was not numeric
Depression2017 <- Depression2017 %>%
mutate(
depression_percent_clean = as.numeric(str_extract(depression_percent, "^[0-9.]+"))
)
# Left join by neighborhood
depressionxburden2018 <- left_join(Depression2017, RBMatching2017, by = c("geo_type","geography"))
# Depression data was mostly in UHF42, so we use that
depressionxburden2018UHF42 <- depressionxburden2018 %>%
filter(geo_type == "UHF42")
# Regression Analysis
model1 <- lm(depression_percent_clean ~ rentburdened_percent, data = depressionxburden2018UHF42)
summary(model1)
##
## Call:
## lm(formula = depression_percent_clean ~ rentburdened_percent,
## data = depressionxburden2018UHF42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0315 -2.8398 -0.5448 2.3106 9.0990
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.15338 4.54670 0.254 0.8011
## rentburdened_percent 0.17355 0.08933 1.943 0.0593 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.65 on 39 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.08824, Adjusted R-squared: 0.06486
## F-statistic: 3.775 on 1 and 39 DF, p-value: 0.05928
# Scatterplot
ggplot(depressionxburden2018UHF42, aes(x = rentburdened_percent, y = depression_percent_clean)) +
geom_point(color = "blue") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "red") + # Regression line with confidence interval
labs(
title = "Depression vs Rent Burden",
x = "Number of Rent-Burdened People",
y = "Number of People with Depression"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
# Mapped Data
uhf42shp <- st_read("C:\\Users\\zim13\\Downloads\\UHF_42_DOHMH\\UHF_42_DOHMH.shp")
## Reading layer `UHF_42_DOHMH' from data source
## `C:\Users\zim13\Downloads\UHF_42_DOHMH\UHF_42_DOHMH.shp' using driver `ESRI Shapefile'
## Simple feature collection with 43 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 913090.8 ymin: 120053.5 xmax: 1067310 ymax: 272932
## Projected CRS: NAD83 / New York Long Island (ftUS)
map_data <- left_join(uhf42shp, depressionxburden2018UHF42, by = c("UHF" = "geo_id.x"))
# Depression Map
map1 <- ggplot(map_data) +
geom_sf(aes(fill = depression_percent_clean)) +
scale_fill_viridis_c(name = "Depression") +
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Depression Rates by Area")
# Rent Burdened Map
map2 <- ggplot(map_data) +
geom_sf(aes(fill = rentburdened_percent)) +
scale_fill_viridis_c(name = "Rent Burdened") +
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Rent Burden Rate by Area")
ggarrange(map2, map1, nrow = 1, ncol = 2, align ="h")
crowding2017 <- crowding %>%
filter(time_period == "2014-18",)
depressionxcrowding2018 <- left_join(Depression2017, crowding2017, by = c("geo_type","geography"))
depressionxcrowding2018UHF42 <- depressionxcrowding2018 %>%
filter(geo_type == "UHF42")
model2 <- lm(depression_percent_clean ~ percent, data = depressionxcrowding2018UHF42)
summary(model2)
##
## Call:
## lm(formula = depression_percent_clean ~ percent, data = depressionxcrowding2018UHF42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2403 -3.0550 -0.6823 2.2074 9.1148
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.0999 1.2885 6.286 2.07e-07 ***
## percent 0.2068 0.1310 1.578 0.123
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.706 on 39 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.06005, Adjusted R-squared: 0.03595
## F-statistic: 2.492 on 1 and 39 DF, p-value: 0.1225
ggplot(depressionxcrowding2018UHF42, aes(x = percent, y = depression_percent_clean)) +
geom_point(color = "red") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "blue") + # Regression line with confidence interval
labs(
title = "Depression vs Crowding",
x = "Rate of Crowding",
y = "Rate of Depression"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
map_data2 <- left_join(uhf42shp, depressionxcrowding2018UHF42, by = c("UHF" = "geo_id.x"))
map1.1 <- ggplot(map_data2) +
geom_sf(aes(fill = depression_percent_clean)) +
scale_fill_gradient(name = "Depression", low = "pink", high = "red") +
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Depression Rates by Area")
map3 <- ggplot(map_data2) +
geom_sf(aes(fill = percent)) +
scale_fill_gradient(name = "Crowding", low = "pink", high ="red") +
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Crowding Rates by Area")
ggarrange(map3, map1.1, nrow = 1, ncol = 2, align ="h")
# Eviction rate was not numeric
evictions <- evictions %>%
mutate(
eviction_rate = as.numeric(str_extract(estimated_annual_rate_per_10_000_homes, "\\d+\\.?\\d*"))
)
evictions2018 <- evictions %>%
filter(time_period == "2018",)
depressionxevictions2018 <- left_join(Depression2017, evictions2018, by = c("geo_type","geography"))
depressionxevictions2018UHF42 <- depressionxevictions2018 %>%
filter(geo_type == "UHF42")
model3 <- lm(depression_percent_clean ~ eviction_rate, data = depressionxevictions2018UHF42)
summary(model3)
##
## Call:
## lm(formula = depression_percent_clean ~ eviction_rate, data = depressionxevictions2018UHF42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6615 -2.5718 -0.6506 1.6967 8.5149
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.12265 0.81618 8.727 1.04e-10 ***
## eviction_rate 0.04794 0.01117 4.291 0.000114 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.15 on 39 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3207, Adjusted R-squared: 0.3033
## F-statistic: 18.41 on 1 and 39 DF, p-value: 0.0001136
ggplot(depressionxevictions2018UHF42, aes(x = eviction_rate, y = depression_percent_clean)) +
geom_point(color = "black") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "yellow") + # Regression line with confidence interval
labs(
title = "Depression vs Evictions",
x = "Rate of Evictions",
y = "Rate of Depression"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
map_data3 <- left_join(uhf42shp, depressionxevictions2018UHF42, by = c("UHF" = "geo_id.x"))
map1.2 <- ggplot(map_data3) +
geom_sf(aes(fill = depression_percent_clean)) +
scale_fill_gradient(name = "Depression", low = "lightyellow", high = "yellow") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Depression Rates by Area")
map4 <- ggplot(map_data3) +
geom_sf(aes(fill = eviction_rate)) +
scale_fill_gradient(name = "Evictions", low = "lightyellow", high ="yellow") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Eviction Rates by Area")
ggarrange(map4, map1.2, nrow = 1, ncol = 2, align ="h")
occupiedhomes2018 <- occupiedhomes %>%
filter(time_period == "2014-18", geo_type == "UHF42")
depressionxoccupiedhomes2018 <- left_join(Depression2017, occupiedhomes2018, by = c("geo_type","geography"))
depressionxoccupiedhomes2018 <- depressionxoccupiedhomes2018 %>%
filter(geo_type == "UHF42")
model4 <- lm(depression_percent_clean ~ percent, data = depressionxoccupiedhomes2018)
summary(model4)
##
## Call:
## lm(formula = depression_percent_clean ~ percent, data = depressionxoccupiedhomes2018)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6796 -2.2080 -0.0741 1.6938 7.1311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.0117 0.9429 14.860 < 2e-16 ***
## percent -0.1179 0.0236 -4.996 1.27e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.985 on 39 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3902, Adjusted R-squared: 0.3746
## F-statistic: 24.96 on 1 and 39 DF, p-value: 1.269e-05
ggplot(depressionxoccupiedhomes2018, aes(x = percent, y = depression_percent_clean)) +
geom_point(color = "purple") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "green") + # Regression line with confidence interval
labs(
title = "Depression vs Home Ownership",
x = "Number of Homes Owned",
y = "Number of People with Depression"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
map_data4 <- left_join(uhf42shp, depressionxoccupiedhomes2018, by = c("UHF" = "geo_id.x"))
map1.3 <- ggplot(map_data4) +
geom_sf(aes(fill = depression_percent_clean)) +
scale_fill_gradient(name = "Depression", low = "lightgreen", high = "darkgreen") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Depression Rates by Area")
map5 <- ggplot(map_data4) +
geom_sf(aes(fill = percent)) +
scale_fill_gradient(name = "Occpied by Owners", low = "lightgreen", high ="darkgreen") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Rates of Living in Owned Homes")
ggarrange(map5, map1.3, nrow = 1, ncol = 2, align ="h")
# Combined dataset
combined <- bind_rows(
depressionxevictions2018 %>%
select(depression_percent_clean, percent = eviction_rate) %>%
mutate(type = "Evictions"),
depressionxcrowding2018UHF42 %>%
select(depression_percent_clean, percent = percent) %>%
mutate(type = "Crowding"),
depressionxburden2018UHF42 %>%
select(depression_percent_clean, percent = rentburdened_percent) %>%
mutate(type = "Rent Burden"),
depressionxoccupiedhomes2018 %>%
select(depression_percent_clean, percent) %>%
mutate(type = "Home Ownership")
)
ggplot(combined, aes(x = percent, y = depression_percent_clean, color = type)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "Predictor Percent",
y = "Depression Percent",
title = "Regression Lines of Depression % by Housing Factors"
) +
theme_minimal() +
theme(legend.title = element_blank())
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
bigplot1 <- ggplot(combined, aes(x = percent, y = depression_percent_clean, color = type)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "Predictor Percent",
y = "Depression Percent",
title = "Regression Lines of Depression % by Housing Factors"
) +
theme_minimal() +
theme(legend.title = element_blank())
combined2 <- bind_rows(
depressionxcrowding2018UHF42 %>%
select(depression_percent_clean, percent = percent) %>%
mutate(type = "Crowding"),
depressionxburden2018UHF42 %>%
select(depression_percent_clean, percent = rentburdened_percent) %>%
mutate(type = "Rent Burden"),
depressionxoccupiedhomes2018 %>%
select(depression_percent_clean, percent) %>%
mutate(type = "Owner Occupied Homes")
)
ggplot(combined2, aes(x = percent, y = depression_percent_clean, color = type)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "Predictor Percent",
y = "Depression Percent",
title = "Regression Lines of Depression % by Housing Factors W/O Evictions"
) +
theme_minimal() +
theme(legend.title = element_blank())
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
### Owner occupied homes makes the other factors hard to see
combined3 <- bind_rows(
depressionxcrowding2018UHF42 %>%
select(depression_percent_clean, percent = percent) %>%
mutate(type = "Crowding"),
depressionxburden2018UHF42 %>%
select(depression_percent_clean, percent = rentburdened_percent) %>%
mutate(type = "Rent Burden"),
)
ggplot(combined3, aes(x = percent, y = depression_percent_clean, color = type)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "Predictor Percent",
y = "Depression Percent",
title = "Regression Lines of Depression % by Housing Factors W/O Evictions/Owner Occupied Homes"
) +
theme_minimal() +
theme(legend.title = element_blank())
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
depression_df <- depressionxevictions2018UHF42
crowding_df <- depressionxcrowding2018UHF42
burden_df <- depressionxburden2018UHF42
occupied_df <- depressionxoccupiedhomes2018
names(crowding_df)[names(crowding_df) == 'percent'] <- 'crowding_percent'
names(occupied_df)[names(occupied_df) == 'percent'] <- 'occupied_percent'
# Join them step by step (assumes same key column, e.g., "UHF_code")
combined_df <- depression_df %>%
select(geo_id.x, depression_percent_clean) %>%
left_join(depression_df %>% select(geo_id.x, eviction_rate), by ="geo_id.x") %>%
left_join(crowding_df %>% select(geo_id.x, crowding_percent), by = "geo_id.x") %>%
left_join(burden_df %>% select(geo_id.x, rentburdened_percent), by = "geo_id.x") %>%
left_join(occupied_df %>% select(geo_id.x, occupied_percent), by = "geo_id.x")
model <- lm(depression_percent_clean ~ occupied_percent + rentburdened_percent + crowding_percent + eviction_rate, data = combined_df)
summary(model)
##
## Call:
## lm(formula = depression_percent_clean ~ occupied_percent + rentburdened_percent +
## crowding_percent + eviction_rate, data = combined_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8779 -1.4755 -0.6063 1.6966 5.3983
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.38097 4.94700 1.492 0.144409
## occupied_percent -0.11622 0.02845 -4.086 0.000235 ***
## rentburdened_percent 0.15929 0.13522 1.178 0.246533
## crowding_percent -0.34450 0.17652 -1.952 0.058797 .
## eviction_rate 0.02667 0.01457 1.831 0.075406 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.686 on 36 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5441, Adjusted R-squared: 0.4934
## F-statistic: 10.74 on 4 and 36 DF, p-value: 7.826e-06
combined_df1 <- depression_df %>%
select(geo_id.x, depression_percent_clean) %>%
left_join(depression_df %>% select(geo_id.x, eviction_rate), by ="geo_id.x") %>%
left_join(crowding_df %>% select(geo_id.x, crowding_percent), by = "geo_id.x") %>%
left_join(occupied_df %>% select(geo_id.x, occupied_percent), by = "geo_id.x")
model_1 <- lm(depression_percent_clean ~ occupied_percent + crowding_percent + eviction_rate, data = combined_df)
summary(model_1)
##
## Call:
## lm(formula = depression_percent_clean ~ occupied_percent + crowding_percent +
## eviction_rate, data = combined_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9195 -1.6084 -0.5021 1.7613 6.3610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.86863 1.67326 7.691 3.48e-09 ***
## occupied_percent -0.10018 0.02511 -3.990 0.00030 ***
## crowding_percent -0.18721 0.11606 -1.613 0.11523
## eviction_rate 0.03726 0.01153 3.232 0.00258 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.7 on 37 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5265, Adjusted R-squared: 0.4881
## F-statistic: 13.71 on 3 and 37 DF, p-value: 3.621e-06
psychillness <- psychillness %>%
rename(
psychillness_rateper100k = rate_per_100_000_residents,
psychillness_number = number
)
# Filter by year to try to align years as best as possible, Depression only has 2017-18 and 2021-22
rentburndened2019 <- rentburdened %>%
filter(time_period == "2015-19",)
psychillness2019 <- psychillness %>%
filter(time_period == "2019",)
# Left join by neighborhood
illnessxburden <- left_join(psychillness2019, rentburndened2019, by = c("geo_type","geography"))
# Depression data was mostly in UHF42, so we use that
illnessxburdenufh42 <- illnessxburden %>%
filter(geo_type == "UHF42")
# Regression Analysis
model5 <- lm(psychillness_rateper100k ~ rentburdened_percent, data = illnessxburdenufh42)
summary(model5)
##
## Call:
## lm(formula = psychillness_rateper100k ~ rentburdened_percent,
## data = illnessxburdenufh42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -373.5 -187.5 -34.3 135.8 1121.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -178.669 318.164 -0.562 0.5775
## rentburdened_percent 15.943 6.415 2.485 0.0172 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 283.8 on 40 degrees of freedom
## Multiple R-squared: 0.1338, Adjusted R-squared: 0.1121
## F-statistic: 6.178 on 1 and 40 DF, p-value: 0.01722
ggplot(illnessxburdenufh42, aes(x = rentburdened_percent, y = psychillness_rateper100k)) +
geom_point(color = "blue") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "red") + # Regression line with confidence interval
labs(
title = "Psych Hospitalizations vs Rent Burden",
x = "Number of Rent-Burdened People",
y = "Number of Hospitalized People"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
map_data5 <- left_join(uhf42shp, illnessxburdenufh42, by = c("UHF" = "geo_id.x"))
map1.4 <- ggplot(map_data5) +
geom_sf(aes(fill = psychillness_rateper100k)) +
scale_fill_gradient(name = "Hospitalization", low = "violet", high = "purple") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Hopitalization Rates by Area")
map6 <- ggplot(map_data5) +
geom_sf(aes(fill = rentburdened_percent)) +
scale_fill_gradient(name = "Rent Burden", low = "violet", high ="purple") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Rent Burden Rate by Area")
ggarrange(map6, map1.4, nrow = 1, ncol = 2, align ="h")
# Filter by year to try to align years as best as possible, Depression only has 2017-18 and 2021-22
crowding2019 <- crowding %>%
filter(time_period == "2015-19",)
crowding2019uhf <- crowding2019 %>%
filter(geo_type == "UHF42" )
# Left join by neighborhood
illnessxcrowding <- left_join(psychillness2019, crowding2019, by = c("geo_type","geography"))
# Depression data was mostly in UHF42, so we use that
illnessxcrowdingufh42 <- illnessxcrowding %>%
filter(geo_type == "UHF42")
# Regression Analysis
model6 <- lm(psychillness_rateper100k ~ percent, data = illnessxcrowdingufh42)
summary(model6)
##
## Call:
## lm(formula = psychillness_rateper100k ~ percent, data = illnessxcrowdingufh42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -350.27 -231.62 -70.95 157.24 1126.43
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 480.28 103.29 4.650 3.59e-05 ***
## percent 14.17 10.54 1.344 0.186
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 298.2 on 40 degrees of freedom
## Multiple R-squared: 0.04322, Adjusted R-squared: 0.01931
## F-statistic: 1.807 on 1 and 40 DF, p-value: 0.1864
ggplot(illnessxcrowdingufh42, aes(x = percent, y = psychillness_rateper100k)) +
geom_point(color = "blue") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "purple") + # Regression line with confidence interval
labs(
title = "Psych Hospitalizations vs Crowding",
x = "Percent of Crowded Homes",
y = "Number of Hospitalized People"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
map_data6 <- left_join(uhf42shp, illnessxcrowdingufh42, by = c("UHF" = "geo_id.x"))
map1.5 <- ggplot(map_data6) +
geom_sf(aes(fill = psychillness_rateper100k)) +
scale_fill_gradient(name = "Hospitalization", low = "pink", high = "purple") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Hopitalization Rates by Area")
map7 <- ggplot(map_data6) +
geom_sf(aes(fill = percent)) +
scale_fill_gradient(name = "Crowding", low = "pink", high ="purple") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Crowding Rate by Area")
ggarrange(map7, map1.5, nrow = 1, ncol = 2, align ="h")
# Filter by year to try to align years as best as possible, Depression only has 2017-18 and 2021-22
evictions2019 <- evictions %>%
filter(time_period == "2019",)
# Left join by neighborhood
illnessxevictions <- left_join(psychillness2019, evictions2019, by = c("geo_type","geography"))
# Depression data was mostly in UHF42, so we use that
illnessxevictionsufh42 <- illnessxevictions %>%
filter(geo_type == "UHF42")
# Regression Analysis
model7 <- lm(psychillness_rateper100k ~ eviction_rate, data = illnessxevictionsufh42)
summary(model7)
##
## Call:
## lm(formula = psychillness_rateper100k ~ eviction_rate, data = illnessxevictionsufh42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -288.12 -122.22 -72.23 65.42 1093.55
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 347.2102 58.5101 5.934 5.85e-07 ***
## eviction_rate 5.2760 0.9545 5.527 2.18e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 229.6 on 40 degrees of freedom
## Multiple R-squared: 0.433, Adjusted R-squared: 0.4189
## F-statistic: 30.55 on 1 and 40 DF, p-value: 2.18e-06
ggplot(illnessxevictionsufh42, aes(x = eviction_rate, y = psychillness_rateper100k)) +
geom_point(color = "darkblue") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "black") + # Regression line with confidence interval
labs(
title = "Psych Hospitalizations vs Evictions",
x = "Rate of Eviction of Area",
y = "Rate of Hospitalizations"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
map_data7 <- left_join(uhf42shp, illnessxevictionsufh42, by = c("UHF" = "geo_id.x"))
map1.6 <- ggplot(map_data7) +
geom_sf(aes(fill = psychillness_rateper100k)) +
scale_fill_gradient(name = "Hospitalization", low = "lightblue", high = "darkblue") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Hopitalization Rates by Area")
map8 <- ggplot(map_data7) +
geom_sf(aes(fill = eviction_rate)) +
scale_fill_gradient(name = "Evictions", low = "lightblue", high ="darkblue") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Eviction Rate by Area")
ggarrange(map8, map1.6, nrow = 1, ncol = 2, align ="h")
occupiedhomes2019 <- occupiedhomes %>%
filter(time_period == "2015-19",)
# Left join by neighborhood
illnessxoccupiedhomes <- left_join(psychillness2019, occupiedhomes2019, by = c("geo_type","geography"))
# Depression data was mostly in UHF42, so we use that
illnessxoccupiedhomesufh42 <- illnessxoccupiedhomes %>%
filter(geo_type == "UHF42")
# Regression Analysis
model8 <- lm(psychillness_rateper100k ~ percent, data = illnessxoccupiedhomesufh42)
summary(model8)
##
## Call:
## lm(formula = psychillness_rateper100k ~ percent, data = illnessxoccupiedhomesufh42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -421.80 -185.84 -31.08 148.64 915.05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 848.817 85.053 9.980 2.05e-12 ***
## percent -7.065 2.144 -3.295 0.00207 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 270.4 on 40 degrees of freedom
## Multiple R-squared: 0.2135, Adjusted R-squared: 0.1938
## F-statistic: 10.86 on 1 and 40 DF, p-value: 0.002066
ggplot(illnessxoccupiedhomesufh42, aes(x = percent, y = psychillness_rateper100k)) +
geom_point(color = "brown") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "black") + # Regression line with confidence interval
labs(
title = "Psych Hospitalizations vs Home Ownership",
x = "Rate of Home Ownership of Area",
y = "Rate of Hospitalizations"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
map_data8 <- left_join(uhf42shp, illnessxoccupiedhomesufh42, by = c("UHF" = "geo_id.x"))
map1.7 <- ggplot(map_data8) +
geom_sf(aes(fill = psychillness_rateper100k)) +
scale_fill_gradient(name = "Hospitalization", low = "lightyellow", high = "orange") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Hopitalization Rates by Area")
map9 <- ggplot(map_data8) +
geom_sf(aes(fill = percent)) +
scale_fill_gradient(name = "Owner Occupied Homes", low = "lightyellow", high ="orange") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Rate of Owner Occupied Homes by Area")
ggarrange(map9, map1.7, nrow = 1, ncol = 2, align ="h")
# Combined dataset
combined4 <- bind_rows(
illnessxevictionsufh42 %>%
select(psychillness_rateper100k, percent = eviction_rate) %>%
mutate(type = "Evictions"),
illnessxcrowdingufh42 %>%
select(psychillness_rateper100k, percent = percent) %>%
mutate(type = "Crowding"),
illnessxburdenufh42 %>%
select(psychillness_rateper100k, percent = rentburdened_percent) %>%
mutate(type = "Rent Burden"),
illnessxoccupiedhomesufh42 %>%
select(psychillness_rateper100k, percent) %>%
mutate(type = "Home Ownership")
)
ggplot(combined4, aes(x = percent, y = psychillness_rateper100k, color = type)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "Predictor Percent",
y = "Hospitalization Percent",
title = "Regression Lines of Hospitalizations % by Housing Factors"
) +
theme_minimal() +
theme(legend.title = element_blank())
## `geom_smooth()` using formula = 'y ~ x'
bigplot2 <- ggplot(combined4, aes(x = percent, y = psychillness_rateper100k, color = type)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "Predictor Percent",
y = "Hospitalization Percent",
title = "Regression Lines of Hospitalization % by Housing Factors"
) +
theme_minimal() +
theme(legend.title = element_blank())
illness_df2 <- illnessxevictionsufh42
crowding_df2 <- illnessxcrowdingufh42
burden_df2 <- illnessxburdenufh42
occupied_df2 <- illnessxoccupiedhomesufh42
names(crowding_df2)[names(crowding_df2) == 'percent'] <- 'crowding_percent'
names(occupied_df2)[names(occupied_df2) == 'percent'] <- 'occupied_percent'
# Join them step by step (assumes same key column, e.g., "UHF_code")
combined_df2 <- illness_df2 %>%
select(geo_id.x, psychillness_rateper100k) %>%
left_join(illness_df2 %>% select(geo_id.x, eviction_rate), by ="geo_id.x") %>%
left_join(crowding_df2 %>% select(geo_id.x, crowding_percent), by = "geo_id.x") %>%
left_join(burden_df2 %>% select(geo_id.x, rentburdened_percent), by = "geo_id.x") %>%
left_join(occupied_df2 %>% select(geo_id.x, occupied_percent), by = "geo_id.x")
model_2 <- lm(psychillness_rateper100k ~ occupied_percent + rentburdened_percent + crowding_percent + eviction_rate, data = combined_df2)
summary(model_2)
##
## Call:
## lm(formula = psychillness_rateper100k ~ occupied_percent + rentburdened_percent +
## crowding_percent + eviction_rate, data = combined_df2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -249.10 -131.00 -31.56 39.59 934.43
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 480.449 340.891 1.409 0.16707
## occupied_percent -5.097 2.263 -2.252 0.03034 *
## rentburdened_percent 5.144 9.583 0.537 0.59467
## crowding_percent -20.247 13.087 -1.547 0.13035
## eviction_rate 4.617 1.407 3.281 0.00226 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 220.9 on 37 degrees of freedom
## Multiple R-squared: 0.5144, Adjusted R-squared: 0.4619
## F-statistic: 9.798 on 4 and 37 DF, p-value: 1.654e-05
check_collinearity(model_2)
## # Check for Multicollinearity
##
## Low Correlation
##
## Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
## occupied_percent 1.67 [1.28, 2.61] 1.29 0.60 [0.38, 0.78]
## rentburdened_percent 3.68 [2.48, 5.86] 1.92 0.27 [0.17, 0.40]
## crowding_percent 2.81 [1.95, 4.43] 1.68 0.36 [0.23, 0.51]
## eviction_rate 2.35 [1.68, 3.68] 1.53 0.43 [0.27, 0.60]
combined_df2.1 <- illness_df2 %>%
select(geo_id.x, psychillness_rateper100k) %>%
left_join(illness_df2 %>% select(geo_id.x, eviction_rate), by ="geo_id.x") %>%
left_join(crowding_df2 %>% select(geo_id.x, crowding_percent), by = "geo_id.x") %>%
left_join(occupied_df2 %>% select(geo_id.x, occupied_percent), by = "geo_id.x")
model_2.1 <- lm(psychillness_rateper100k ~ occupied_percent + crowding_percent + eviction_rate, data = combined_df2.1)
summary(model_2.1)
##
## Call:
## lm(formula = psychillness_rateper100k ~ occupied_percent + crowding_percent +
## eviction_rate, data = combined_df2.1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -251.30 -131.92 -25.92 40.61 949.28
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 648.192 134.859 4.806 2.43e-05 ***
## occupied_percent -4.577 2.026 -2.259 0.0297 *
## crowding_percent -15.327 9.254 -1.656 0.1059
## eviction_rate 5.105 1.063 4.803 2.46e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 218.8 on 38 degrees of freedom
## Multiple R-squared: 0.5106, Adjusted R-squared: 0.472
## F-statistic: 13.22 on 3 and 38 DF, p-value: 4.657e-06
seriousdistress <- seriousdistress %>%
rename(
distress_percent = percent,
distress_number = number
)
seriousdistress <- seriousdistress %>%
mutate(
distress_percent_clean = as.numeric(str_extract(distress_percent, "^[0-9.]+"))
)
# Filter by year to try to align years as best as possible, Depression only has 2017-18 and 2021-22
rentburndened2020 <- rentburdened %>%
filter(time_period == "2016-20",)
seriousdistress2020 <- seriousdistress %>%
filter(time_period == "2019-20",)
# Left join by neighborhood
distressxburden <- left_join(seriousdistress2020, rentburndened2020, by = c("geo_type","geography"))
# Depression data was mostly in UHF42, so we use that
distressxburdenufh42 <- distressxburden %>%
filter(geo_type == "UHF42")
# Regression Analysis
model9 <- lm(distress_percent_clean ~ rentburdened_percent, data = distressxburdenufh42)
summary(model9)
##
## Call:
## lm(formula = distress_percent_clean ~ rentburdened_percent, data = distressxburdenufh42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.3548 -1.2705 -0.1239 1.4838 3.7973
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.85934 2.58876 -0.332 0.7418
## rentburdened_percent 0.13466 0.05207 2.586 0.0137 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.062 on 38 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1497, Adjusted R-squared: 0.1273
## F-statistic: 6.689 on 1 and 38 DF, p-value: 0.01366
ggplot(distressxburdenufh42, aes(x = rentburdened_percent, y = distress_percent_clean)) +
geom_point(color = "orange") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "darkgreen") + # Regression line with confidence interval
labs(
title = "Serious Psychological Distress vs Rent Burden",
x = "Rate of Rent Burden by Area",
y = "Rate of Serious Psychological Distress"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
map_data9 <- left_join(uhf42shp, distressxburdenufh42, by = c("UHF" = "geo_id.x"))
map1.8 <- ggplot(map_data9) +
geom_sf(aes(fill = distress_percent_clean)) +
scale_fill_gradient(name = "Serious Psychological Distress", low = "pink", high = "red") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Serious Psychological Distress")
map10 <- ggplot(map_data9) +
geom_sf(aes(fill = rentburdened_percent)) +
scale_fill_gradient(name = "Rate of Rent Burden", low = "pink", high ="red") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Rate of Rent Burden by Area")
ggarrange(map10, map1.8, nrow = 1, ncol = 2, align ="h")
crowding2020 <- crowding %>%
filter(time_period == "2016-20",)
distressxcrowding <- left_join(seriousdistress2020, crowding2020, by = c("geo_type","geography"))
# Depression data was mostly in UHF42, so we use that
distressxcrowdingufh42 <- distressxcrowding %>%
filter(geo_type == "UHF42")
# Regression Analysis
model10 <- lm(distress_percent_clean ~ percent, data = distressxcrowdingufh42)
summary(model10)
##
## Call:
## lm(formula = distress_percent_clean ~ percent, data = distressxcrowdingufh42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.555 -1.433 -0.196 1.708 4.368
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.94279 0.78896 6.265 2.47e-07 ***
## percent 0.09507 0.08021 1.185 0.243
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.195 on 38 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.03565, Adjusted R-squared: 0.01027
## F-statistic: 1.405 on 1 and 38 DF, p-value: 0.2433
ggplot(distressxcrowdingufh42, aes(x = percent, y = distress_percent_clean)) +
geom_point(color = "orange") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "darkgreen") + # Regression line with confidence interval
labs(
title = "Serious Psychological Distress vs Rent Burden",
x = "Rate of Crowding by Area",
y = "Rate of Serious Psychological Distress"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
map_data10 <- left_join(uhf42shp, distressxcrowdingufh42, by = c("UHF" = "geo_id.x"))
map1.9 <- ggplot(map_data10) +
geom_sf(aes(fill = distress_percent_clean)) +
scale_fill_gradient(name = "Serious Psychological Distress", low = "lightgreen", high = "darkgreen") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Serious Psychological Distress")
map11 <- ggplot(map_data10) +
geom_sf(aes(fill = percent)) +
scale_fill_gradient(name = "Rate of Crowding", low = "lightgreen", high ="darkgreen") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Rate of Crowding by Area")
ggarrange(map11, map1.9, nrow = 1, ncol = 2, align ="h")
evictions2020 <- evictions %>%
filter(time_period == "2020",)
distressxevictions <- left_join(seriousdistress2020, evictions2020, by = c("geo_type","geography"))
# Depression data was mostly in UHF42, so we use that
distressxevictionsufh42 <- distressxevictions %>%
filter(geo_type == "UHF42")
# Regression Analysis
model11 <- lm(distress_percent_clean ~ eviction_rate, data = distressxevictionsufh42)
summary(model11)
##
## Call:
## lm(formula = distress_percent_clean ~ eviction_rate, data = distressxevictionsufh42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.386 -1.538 -0.164 1.304 3.854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.38828 0.53529 8.198 6.28e-10 ***
## eviction_rate 0.15635 0.04866 3.213 0.00268 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.982 on 38 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.2136, Adjusted R-squared: 0.1929
## F-statistic: 10.32 on 1 and 38 DF, p-value: 0.002676
ggplot(distressxevictionsufh42, aes(x = eviction_rate, y = distress_percent_clean)) +
geom_point(color = "blue") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "purple") + # Regression line with confidence interval
labs(
title = "Serious Psychological Distress vs Evictions",
x = "Rate of Evictions by Area",
y = "Rate of Serious Psychological Distress"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
map_data11 <- left_join(uhf42shp, distressxevictionsufh42, by = c("UHF" = "geo_id.x"))
map1.91 <- ggplot(map_data11) +
geom_sf(aes(fill = distress_percent_clean)) +
scale_fill_gradient(name = "Serious Psychological Distress", low = "lightblue", high = "darkblue") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Serious Psychological Distress")
map12 <- ggplot(map_data11) +
geom_sf(aes(fill = eviction_rate)) +
scale_fill_gradient(name = "Rate of Evictions", low = "lightblue", high ="darkblue") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Rate of Evictions by Area")
ggarrange(map12, map1.91, nrow = 1, ncol = 2, align ="h")
occupiedhomes2020 <- occupiedhomes %>%
filter(time_period == "2016-20",)
distressxoccupiedhomes <- left_join(seriousdistress2020, occupiedhomes2020, by = c("geo_type","geography"))
# Depression data was mostly in UHF42, so we use that
distressxoccupiedhomesufh42 <- distressxoccupiedhomes %>%
filter(geo_type == "UHF42")
# Regression Analysis
model12 <- lm(distress_percent_clean ~ percent, data = distressxoccupiedhomesufh42)
summary(model12)
##
## Call:
## lm(formula = distress_percent_clean ~ percent, data = distressxoccupiedhomesufh42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.782 -1.633 0.019 1.687 4.310
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.101682 0.722905 8.441 3.04e-10 ***
## percent -0.009376 0.018543 -0.506 0.616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.228 on 38 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.006683, Adjusted R-squared: -0.01946
## F-statistic: 0.2557 on 1 and 38 DF, p-value: 0.616
ggplot(distressxoccupiedhomesufh42, aes(x = percent, y = distress_percent_clean)) +
geom_point(color = "blue") + # Scatterplot points
geom_smooth(method = "lm", se = TRUE, color = "purple") + # Regression line with confidence interval
labs(
title = "Serious Psychological Distress vs Owner Occupied Homes",
x = "Rate of Owner Occupied Homes by Area",
y = "Rate of Serious Psychological Distress"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
map_data12 <- left_join(uhf42shp, distressxoccupiedhomesufh42, by = c("UHF" = "geo_id.x"))
map1.92 <- ggplot(map_data12) +
geom_sf(aes(fill = distress_percent_clean)) +
scale_fill_gradient(name = "Serious Psychological Distress", low = "lightyellow", high = "orange") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Serious Psychological Distress")
map13 <- ggplot(map_data12) +
geom_sf(aes(fill = percent)) +
scale_fill_gradient(name = "Rate of Owner Occupied Homes", low = "lightyellow", high ="orange") + # Customize the color scale
theme_minimal() +
theme(legend.position = "bottom") +
labs(title = "Rate of Owner Occupied Homes by Area")
ggarrange(map13, map1.92, nrow = 1, ncol = 2, align ="h")
# Combined dataset
combined5 <- bind_rows(
distressxevictionsufh42 %>%
select(distress_percent_clean, percent = eviction_rate) %>%
mutate(type = "Evictions"),
distressxcrowdingufh42 %>%
select(distress_percent_clean, percent = percent) %>%
mutate(type = "Crowding"),
distressxburdenufh42 %>%
select(distress_percent_clean, percent = rentburdened_percent) %>%
mutate(type = "Rent Burden"),
distressxoccupiedhomesufh42 %>%
select(distress_percent_clean, percent) %>%
mutate(type = "Home Ownership")
)
ggplot(combined5, aes(x = percent, y = distress_percent_clean, color = type)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "Predictor Percent",
y = "Psychiatric Distress Percent",
title = "Regression Lines of Psychiatric Distress % by Housing Factors"
) +
theme_minimal() +
theme(legend.title = element_blank())
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 8 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_point()`).
bigplot3 <- ggplot(combined5, aes(x = percent, y = distress_percent_clean, color = type)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE) +
labs(
x = "Predictor Percent",
y = "Psychiatric Distress Percent" ) +
theme_minimal() +
theme(legend.title = element_blank())
distress_df3 <- distressxevictionsufh42
crowding_df3 <- distressxcrowdingufh42
burden_df3 <- distressxburdenufh42
occupied_df3 <- distressxoccupiedhomesufh42
names(crowding_df3)[names(crowding_df3) == 'percent'] <- 'crowding_percent'
names(occupied_df3)[names(occupied_df3) == 'percent'] <- 'occupied_percent'
# Join them step by step (assumes same key column, e.g., "UHF_code")
combined_df3 <- distress_df3 %>%
select(geo_id.x, distress_percent_clean) %>%
left_join(distress_df3 %>% select(geo_id.x, eviction_rate), by ="geo_id.x") %>%
left_join(crowding_df3 %>% select(geo_id.x, crowding_percent), by = "geo_id.x") %>%
left_join(burden_df3 %>% select(geo_id.x, rentburdened_percent), by = "geo_id.x") %>%
left_join(occupied_df3 %>% select(geo_id.x, occupied_percent), by = "geo_id.x")
model_3 <- lm(distress_percent_clean ~ occupied_percent + rentburdened_percent + crowding_percent + eviction_rate, data = combined_df3)
summary(model_3)
##
## Call:
## lm(formula = distress_percent_clean ~ occupied_percent + rentburdened_percent +
## crowding_percent + eviction_rate, data = combined_df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6833 -1.4285 -0.0899 1.4064 3.6992
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.806346 3.691866 0.218 0.828
## occupied_percent 0.004976 0.021930 0.227 0.822
## rentburdened_percent 0.088300 0.103752 0.851 0.401
## crowding_percent -0.079201 0.130455 -0.607 0.548
## eviction_rate 0.129090 0.076465 1.688 0.100
##
## Residual standard error: 2.029 on 35 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.2413, Adjusted R-squared: 0.1546
## F-statistic: 2.783 on 4 and 35 DF, p-value: 0.04161
check_collinearity(model_2)
## # Check for Multicollinearity
##
## Low Correlation
##
## Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
## occupied_percent 1.67 [1.28, 2.61] 1.29 0.60 [0.38, 0.78]
## rentburdened_percent 3.68 [2.48, 5.86] 1.92 0.27 [0.17, 0.40]
## crowding_percent 2.81 [1.95, 4.43] 1.68 0.36 [0.23, 0.51]
## eviction_rate 2.35 [1.68, 3.68] 1.53 0.43 [0.27, 0.60]
model_3.1 <- lm(distress_percent_clean ~ occupied_percent + crowding_percent + eviction_rate, data = combined_df3)
summary(model_3.1)
##
## Call:
## lm(formula = distress_percent_clean ~ occupied_percent + crowding_percent +
## eviction_rate, data = combined_df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4096 -1.4082 -0.1602 1.2577 4.0171
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.758088 1.260333 2.982 0.00511 **
## occupied_percent 0.013816 0.019240 0.718 0.47731
## crowding_percent 0.002753 0.087670 0.031 0.97512
## eviction_rate 0.171544 0.057730 2.972 0.00525 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.021 on 36 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.2256, Adjusted R-squared: 0.1611
## F-statistic: 3.496 on 3 and 36 DF, p-value: 0.02524
require(leaflet)
## Loading required package: leaflet
## Warning: package 'leaflet' was built under R version 4.4.3
library(RColorBrewer)
depressionxhousingsf <- uhf42shp %>%
left_join(combined_df, by = c("UHFCODE" = "geo_id.x"))
depressionxhousingsf <- st_as_sf(depressionxhousingsf)
depressionxhousingsf <- st_transform(depressionxhousingsf, crs = 4326)
# Create color palettes
pal_dep <- colorNumeric("Blues", domain = depressionxhousingsf$depression_percent_clean)
pal_rent <- colorNumeric("Reds", domain = depressionxhousingsf$rentburdened_percent)
pal_crowd <- colorNumeric("Greens", domain = depressionxhousingsf$crowding_percent)
pal_evict <- colorNumeric("Oranges", domain = depressionxhousingsf$eviction_rate)
pal_own <- colorNumeric("Purples", domain = depressionxhousingsf$occupied_percent)
# Build leaflet map
leaflet(depressionxhousingsf) %>%
addTiles() %>%
# Depression layer
addPolygons(
fillColor = ~pal_dep(depression_percent_clean),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Depression (%)",
label = ~paste0("Depression: ", depression_percent_clean, "%")
) %>%
# Rent Burden layer
addPolygons(
fillColor = ~pal_rent(rentburdened_percent),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Rent Burdened (%)",
label = ~paste0("Rent Burdened: ", rentburdened_percent, "%")
) %>%
# Crowding layer
addPolygons(
fillColor = ~pal_crowd(crowding_percent),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Crowding (%)",
label = ~paste0("Crowding: ", crowding_percent, "%")
) %>%
# Crowding layer
addPolygons(
fillColor = ~pal_evict(eviction_rate),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Eviction (%)",
label = ~paste0("Eviction: ", eviction_rate, "%")
) %>%
# Crowding layer
addPolygons(
fillColor = ~pal_own(occupied_percent),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Owner Occupied Homes (%)",
label = ~paste0("Occupied: ", occupied_percent, "%")
) %>%
# Layer control
addLayersControl(
baseGroups = c("Depression (%)", "Rent Burdened (%)", "Crowding (%)", "Evictions (%)", "Owner Occupied Homes (%)"),
options = layersControlOptions(collapsed = FALSE)
)
hospitalizationsxhousingsf <- uhf42shp %>%
left_join(combined_df2, by = c("UHFCODE" = "geo_id.x"))
hospitalizationsxhousingsf <- st_as_sf(hospitalizationsxhousingsf)
hospitalizationsxhousingsf <- st_transform(hospitalizationsxhousingsf, crs = 4326)
# Create color palettes
pal_dep2 <- colorNumeric("Oranges", domain = hospitalizationsxhousingsf$psychillness_rateper100k)
pal_rent <- colorNumeric("Reds", domain = hospitalizationsxhousingsf$rentburdened_percent)
pal_crowd <- colorNumeric("Greens", domain = hospitalizationsxhousingsf$crowding_percent)
pal_evict <- colorNumeric("Blues", domain = hospitalizationsxhousingsf$eviction_rate)
pal_own <- colorNumeric("Purples", domain = hospitalizationsxhousingsf$occupied_percent)
# Build leaflet map
leaflet(hospitalizationsxhousingsf) %>%
addTiles() %>%
# Depression layer
addPolygons(
fillColor = ~pal_dep2(psychillness_rateper100k),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Psychiatric Hospitalization (%)",
label = ~paste0("Psychiatric Hospitalizations: ", psychillness_rateper100k, "%")
) %>%
# Rent Burden layer
addPolygons(
fillColor = ~pal_rent(rentburdened_percent),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Rent Burdened (%)",
label = ~paste0("Rent Burdened: ", rentburdened_percent, "%")
) %>%
# Crowding layer
addPolygons(
fillColor = ~pal_crowd(crowding_percent),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Crowding (%)",
label = ~paste0("Crowding: ", crowding_percent, "%")
) %>%
# Crowding layer
addPolygons(
fillColor = ~pal_evict(eviction_rate),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Eviction (%)",
label = ~paste0("Eviction: ", eviction_rate, "%")
) %>%
# Crowding layer
addPolygons(
fillColor = ~pal_own(occupied_percent),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Owner Occupied Homes (%)",
label = ~paste0("Occupied: ", occupied_percent, "%")
) %>%
# Layer control
addLayersControl(
baseGroups = c("Psychiatric Hospitalizations (%)", "Rent Burdened (%)", "Crowding (%)", "Evictions (%)", "Owner Occupied Homes (%)"),
options = layersControlOptions(collapsed = FALSE)
)
distressxhousingsf <- uhf42shp %>%
left_join(combined_df3, by = c("UHFCODE" = "geo_id.x"))
distressxhousingsf <- st_as_sf(distressxhousingsf)
distressxhousingsf <- st_transform(distressxhousingsf, crs = 4326)
# Create color palettes
pal_dep3 <- colorNumeric("Blues", domain = distressxhousingsf$distress_percent_clean)
pal_rent <- colorNumeric("Reds", domain = distressxhousingsf$rentburdened_percent)
pal_crowd <- colorNumeric("Greens", domain = distressxhousingsf$crowding_percent)
pal_evict <- colorNumeric("Oranges", domain = distressxhousingsf$eviction_rate)
pal_own <- colorNumeric("Purples", domain = distressxhousingsf$occupied_percent)
# Build leaflet map
leaflet(distressxhousingsf) %>%
addTiles() %>%
# Depression layer
addPolygons(
fillColor = ~pal_dep3(distress_percent_clean),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Psychological Distress (%)",
label = ~paste0("Psychological Distress: ", distress_percent_clean, "%")
) %>%
# Rent Burden layer
addPolygons(
fillColor = ~pal_rent(rentburdened_percent),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Rent Burdened (%)",
label = ~paste0("Rent Burdened: ", rentburdened_percent, "%")
) %>%
# Crowding layer
addPolygons(
fillColor = ~pal_crowd(crowding_percent),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Crowding (%)",
label = ~paste0("Crowding: ", crowding_percent, "%")
) %>%
# Crowding layer
addPolygons(
fillColor = ~pal_evict(eviction_rate),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Eviction (%)",
label = ~paste0("Eviction: ", eviction_rate, "%")
) %>%
# Crowding layer
addPolygons(
fillColor = ~pal_own(occupied_percent),
color = "white",
weight = 1,
fillOpacity = 0.7,
group = "Owner Occupied Homes (%)",
label = ~paste0("Occupied: ", occupied_percent, "%")
) %>%
# Layer control
addLayersControl(
baseGroups = c("Psychological Distress (%)", "Rent Burdened (%)", "Crowding (%)", "Evictions (%)", "Owner Occupied Homes (%)"),
options = layersControlOptions(collapsed = FALSE)
)