R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

## Rows: 96 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): TimePeriod, GeoType, Geography, Age-adjusted percent, Percent
## dbl (2): GeoID, GeoRank
## num (1): Number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 144 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): GeoType, Geography
## dbl (3): TimePeriod, GeoID, GeoRank
## num (3): Age-adjusted rate per 100,000 residents, Number, Rate per 100,000 r...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 192 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): TimePeriod, GeoType, Geography, Age-adjusted percent, Number, Percent
## dbl (2): GeoID, GeoRank
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 2122 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): GeoType, Geography, Estimated annual rate per 10,000 homes
## dbl (3): TimePeriod, GeoID, GeoRank
## num (1): Number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3758 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TimePeriod, GeoType, Geography
## dbl (3): GeoID, GeoRank, Percent
## num (1): Number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3758 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TimePeriod, GeoType, Geography
## dbl (3): GeoID, GeoRank, Percent
## num (1): Number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 3758 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): TimePeriod, GeoType, Geography
## dbl (3): GeoID, GeoRank, Percent
## num (1): Number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Moderation Analysis - Crowding X Depression (Immigrant Pop as Moderator)

depression <- depression %>%
  rename(
    depression_percent = percent,
    depression_number = number
  )

Depression2017 <- depression %>%
  filter(time_period == "2017-18", geo_type == "UHF42")

# Depression rate was in a weird format and was not numeric

Depression2017 <- Depression2017 %>%
  mutate(
    depression_percent_clean = as.numeric(str_extract(depression_percent, "^[0-9.]+"))
  )

immigrant <- immigrant %>%
  rename(
    immigrant_percent = percent,
    immigrant_number = number
  )

immigrant2018 <- immigrant %>%
  filter(time_period == "2014-18", geo_type =="UHF42")


crowding2018 <- crowding %>%
  filter(time_period == "2014-18", geo_type =="UHF42")

immigrantxcrowding2018 <- left_join(immigrant2018, crowding2018 , by = c("geo_type", "geography"))

immigrantxcrowding2018$crowding_c <- scale(immigrantxcrowding2018$percent, center = TRUE, scale = FALSE)
immigrantxcrowding2018$immigrant_c <- scale(immigrantxcrowding2018$immigrant_percent, center = TRUE, scale = FALSE)

immigrantxcrowding2018$interaction <- immigrantxcrowding2018$crowding_c * immigrantxcrowding2018$immigrant_c

df_new2 <- data.frame(
  crowding = immigrantxcrowding2018$percent,
  immigrant = immigrantxcrowding2018$immigrant_percent,
  depression = Depression2017$depression_percent_clean,
  crowding_c = immigrantxcrowding2018$crowding_c,
  immigrant_c = immigrantxcrowding2018$immigrant_c,
  interaction = immigrantxcrowding2018$interaction
)

# Moderation regression
model_mod1.1 <- lm(depression ~ crowding_c + immigrant_c + interaction, data = df_new2)

summary(model_mod1.1)
## 
## Call:
## lm(formula = depression ~ crowding_c + immigrant_c + interaction, 
##     data = df_new2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.1379 -2.0432 -0.4372  1.7411  7.7311 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.33782    0.58269  17.742   <2e-16 ***
## crowding_c   0.40658    0.15646   2.599   0.0135 *  
## immigrant_c -0.10719    0.05862  -1.829   0.0757 .  
## interaction -0.02622    0.01126  -2.328   0.0256 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.189 on 36 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1992, Adjusted R-squared:  0.1325 
## F-statistic: 2.986 on 3 and 36 DF,  p-value: 0.04385

Shows that as Immigration population increases in an area, effect of crowding on depression decreases

Normality Tests

plot(lm(depression ~ crowding_c + immigrant_c + interaction, data = df_new2))

hist(resid(model_mod1.1), main="Histogram of Residuals", xlab="Residuals")

shapiro.test(resid(model_mod1.1))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model_mod1.1)
## W = 0.96986, p-value = 0.3562

Crowding X Distress (Immigrant Pop as Moderator)

seriousdistress <- seriousdistress %>%
  rename(
    distress_percent = percent,
    distress_number = number
  )

seriousdistress <- seriousdistress %>%
mutate(
    distress_percent_clean = as.numeric(str_extract(distress_percent, "^[0-9.]+"))
  )

seriousdistress2020 <- seriousdistress %>%
  filter(time_period == "2019-20",geo_type == "UHF42")

immigrant2020 <- immigrant %>%
  filter(time_period == "2016-20", geo_type =="UHF42")

crowding2020 <- crowding %>%
  filter(time_period == "2016-20", geo_type =="UHF42")

immigrantxcrowding2020 <- left_join(immigrant2020, crowding2020 , by = c("geo_type","geography"))

immigrantxcrowding2020$crowding_c <- scale(immigrantxcrowding2020$percent, center = TRUE, scale = FALSE)
immigrantxcrowding2020$immigrant_c <- scale(immigrantxcrowding2020$immigrant_percent, center = TRUE, scale = FALSE)

immigrantxcrowding2020$interaction <- immigrantxcrowding2020$crowding_c * immigrantxcrowding2020$immigrant_c

df_new3 <- data.frame(
  crowding = immigrantxcrowding2020$percent,
  immigrant = immigrantxcrowding2020$immigrant_percent,
  distress = seriousdistress2020$distress_percent_clean,
  crowding_c = immigrantxcrowding2020$crowding_c,
  immigrant_c = immigrantxcrowding2020$immigrant_c,
  interaction = immigrantxcrowding2020$interaction
)

# Moderation regression
model_mod2.1 <- lm(distress ~ crowding_c + immigrant_c + interaction, data = df_new3)

summary(model_mod2.1)
## 
## Call:
## lm(formula = distress ~ crowding_c + immigrant_c + interaction, 
##     data = df_new3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2354 -1.3483 -0.0792  1.3322  4.4001 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.219870   0.375187  16.578   <2e-16 ***
## crowding_c   0.144871   0.099922   1.450   0.1560    
## immigrant_c  0.029400   0.037835   0.777   0.4423    
## interaction -0.020989   0.007764  -2.703   0.0105 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.05 on 35 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.2095, Adjusted R-squared:  0.1417 
## F-statistic: 3.092 on 3 and 35 DF,  p-value: 0.03945

Same results here but for distress

Normality Tests

plot(lm(distress ~ crowding_c + immigrant_c + interaction, data = df_new3))

hist(resid(model_mod2.1), main="Histogram of Residuals", xlab="Residuals")

shapiro.test(resid(model_mod2.1))
## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model_mod2.1)
## W = 0.99093, p-value = 0.986

Graph for Moderation Analysis

# Compute SD of immigrant_c
sd_imm <- sd(df_new2$immigrant_c, na.rm = TRUE)

# Create a grid of values
newdata <- expand.grid(
  crowding_c = seq(min(df_new2$crowding_c, na.rm=TRUE),
                   max(df_new2$crowding_c, na.rm=TRUE), length=100),
  immigrant_c = c(-sd_imm, 0, sd_imm)
)

# Add interaction term
newdata$interaction <- newdata$crowding_c * newdata$immigrant_c

# Predict depression
model <- lm(depression ~ crowding_c + immigrant_c + interaction, data=df_new2)
newdata$predicted <- predict(model, newdata)

ggplot(newdata, aes(x=crowding_c, y=predicted, color=factor(immigrant_c))) +
  geom_line(size=1.2) +
  labs(
    x = "Crowding (centered)",
    y = "Predicted Depression",
    color = "Immigrant %\n(Level)"
  ) +
  scale_color_manual(
    values=c("blue", "green", "red"),
    labels=c("Low", "Mean", "High")
  ) +
  theme_minimal() +
  ggtitle("Moderation of Crowding on Depression by Immigrant %")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.