This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
## Rows: 96 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): TimePeriod, GeoType, Geography, Age-adjusted percent, Percent
## dbl (2): GeoID, GeoRank
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 144 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): GeoType, Geography
## dbl (3): TimePeriod, GeoID, GeoRank
## num (3): Age-adjusted rate per 100,000 residents, Number, Rate per 100,000 r...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 192 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): TimePeriod, GeoType, Geography, Age-adjusted percent, Number, Percent
## dbl (2): GeoID, GeoRank
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2122 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): GeoType, Geography, Estimated annual rate per 10,000 homes
## dbl (3): TimePeriod, GeoID, GeoRank
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3758 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TimePeriod, GeoType, Geography
## dbl (3): GeoID, GeoRank, Percent
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3758 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TimePeriod, GeoType, Geography
## dbl (3): GeoID, GeoRank, Percent
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3758 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TimePeriod, GeoType, Geography
## dbl (3): GeoID, GeoRank, Percent
## num (1): Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
depression <- depression %>%
rename(
depression_percent = percent,
depression_number = number
)
Depression2017 <- depression %>%
filter(time_period == "2017-18", geo_type == "UHF42")
# Depression rate was in a weird format and was not numeric
Depression2017 <- Depression2017 %>%
mutate(
depression_percent_clean = as.numeric(str_extract(depression_percent, "^[0-9.]+"))
)
immigrant <- immigrant %>%
rename(
immigrant_percent = percent,
immigrant_number = number
)
immigrant2018 <- immigrant %>%
filter(time_period == "2014-18", geo_type =="UHF42")
crowding2018 <- crowding %>%
filter(time_period == "2014-18", geo_type =="UHF42")
immigrantxcrowding2018 <- left_join(immigrant2018, crowding2018 , by = c("geo_type", "geography"))
immigrantxcrowding2018$crowding_c <- scale(immigrantxcrowding2018$percent, center = TRUE, scale = FALSE)
immigrantxcrowding2018$immigrant_c <- scale(immigrantxcrowding2018$immigrant_percent, center = TRUE, scale = FALSE)
immigrantxcrowding2018$interaction <- immigrantxcrowding2018$crowding_c * immigrantxcrowding2018$immigrant_c
df_new2 <- data.frame(
crowding = immigrantxcrowding2018$percent,
immigrant = immigrantxcrowding2018$immigrant_percent,
depression = Depression2017$depression_percent_clean,
crowding_c = immigrantxcrowding2018$crowding_c,
immigrant_c = immigrantxcrowding2018$immigrant_c,
interaction = immigrantxcrowding2018$interaction
)
# Moderation regression
model_mod1.1 <- lm(depression ~ crowding_c + immigrant_c + interaction, data = df_new2)
summary(model_mod1.1)
##
## Call:
## lm(formula = depression ~ crowding_c + immigrant_c + interaction,
## data = df_new2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.1379 -2.0432 -0.4372 1.7411 7.7311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.33782 0.58269 17.742 <2e-16 ***
## crowding_c 0.40658 0.15646 2.599 0.0135 *
## immigrant_c -0.10719 0.05862 -1.829 0.0757 .
## interaction -0.02622 0.01126 -2.328 0.0256 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.189 on 36 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1992, Adjusted R-squared: 0.1325
## F-statistic: 2.986 on 3 and 36 DF, p-value: 0.04385
plot(lm(depression ~ crowding_c + immigrant_c + interaction, data = df_new2))
hist(resid(model_mod1.1), main="Histogram of Residuals", xlab="Residuals")
shapiro.test(resid(model_mod1.1))
##
## Shapiro-Wilk normality test
##
## data: resid(model_mod1.1)
## W = 0.96986, p-value = 0.3562
seriousdistress <- seriousdistress %>%
rename(
distress_percent = percent,
distress_number = number
)
seriousdistress <- seriousdistress %>%
mutate(
distress_percent_clean = as.numeric(str_extract(distress_percent, "^[0-9.]+"))
)
seriousdistress2020 <- seriousdistress %>%
filter(time_period == "2019-20",geo_type == "UHF42")
immigrant2020 <- immigrant %>%
filter(time_period == "2016-20", geo_type =="UHF42")
crowding2020 <- crowding %>%
filter(time_period == "2016-20", geo_type =="UHF42")
immigrantxcrowding2020 <- left_join(immigrant2020, crowding2020 , by = c("geo_type","geography"))
immigrantxcrowding2020$crowding_c <- scale(immigrantxcrowding2020$percent, center = TRUE, scale = FALSE)
immigrantxcrowding2020$immigrant_c <- scale(immigrantxcrowding2020$immigrant_percent, center = TRUE, scale = FALSE)
immigrantxcrowding2020$interaction <- immigrantxcrowding2020$crowding_c * immigrantxcrowding2020$immigrant_c
df_new3 <- data.frame(
crowding = immigrantxcrowding2020$percent,
immigrant = immigrantxcrowding2020$immigrant_percent,
distress = seriousdistress2020$distress_percent_clean,
crowding_c = immigrantxcrowding2020$crowding_c,
immigrant_c = immigrantxcrowding2020$immigrant_c,
interaction = immigrantxcrowding2020$interaction
)
# Moderation regression
model_mod2.1 <- lm(distress ~ crowding_c + immigrant_c + interaction, data = df_new3)
summary(model_mod2.1)
##
## Call:
## lm(formula = distress ~ crowding_c + immigrant_c + interaction,
## data = df_new3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.2354 -1.3483 -0.0792 1.3322 4.4001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.219870 0.375187 16.578 <2e-16 ***
## crowding_c 0.144871 0.099922 1.450 0.1560
## immigrant_c 0.029400 0.037835 0.777 0.4423
## interaction -0.020989 0.007764 -2.703 0.0105 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.05 on 35 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.2095, Adjusted R-squared: 0.1417
## F-statistic: 3.092 on 3 and 35 DF, p-value: 0.03945
plot(lm(distress ~ crowding_c + immigrant_c + interaction, data = df_new3))
hist(resid(model_mod2.1), main="Histogram of Residuals", xlab="Residuals")
shapiro.test(resid(model_mod2.1))
##
## Shapiro-Wilk normality test
##
## data: resid(model_mod2.1)
## W = 0.99093, p-value = 0.986
# Compute SD of immigrant_c
sd_imm <- sd(df_new2$immigrant_c, na.rm = TRUE)
# Create a grid of values
newdata <- expand.grid(
crowding_c = seq(min(df_new2$crowding_c, na.rm=TRUE),
max(df_new2$crowding_c, na.rm=TRUE), length=100),
immigrant_c = c(-sd_imm, 0, sd_imm)
)
# Add interaction term
newdata$interaction <- newdata$crowding_c * newdata$immigrant_c
# Predict depression
model <- lm(depression ~ crowding_c + immigrant_c + interaction, data=df_new2)
newdata$predicted <- predict(model, newdata)
ggplot(newdata, aes(x=crowding_c, y=predicted, color=factor(immigrant_c))) +
geom_line(size=1.2) +
labs(
x = "Crowding (centered)",
y = "Predicted Depression",
color = "Immigrant %\n(Level)"
) +
scale_color_manual(
values=c("blue", "green", "red"),
labels=c("Low", "Mean", "High")
) +
theme_minimal() +
ggtitle("Moderation of Crowding on Depression by Immigrant %")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.