library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## The following object is masked from 'package:tidyr':
##
## extract
library(readxl)
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.3
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(sandwich)
## Warning: package 'sandwich' was built under R version 4.5.3
fema_data <- read_excel("fema_data.xlsx", sheet = "Core Survey", skip = 1)
## Warning: Expecting logical in LS1120 / R1120C331: got 'there is no third
## gender'
## Warning: Expecting logical in MO1246 / R1246C353: got 'normal'
## Warning: Expecting logical in MO1414 / R1414C353: got 'Fluid'
## Warning: Expecting logical in LS1963 / R1963C331: got 'Transgender'
## Warning: Expecting logical in MO2184 / R2184C353: got 'Genderfluid'
## Warning: Expecting logical in LS2311 / R2311C331: got 'Trans'
## Warning: Expecting logical in MO2699 / R2699C353: got 'Transgender'
## Warning: Expecting logical in MO2830 / R2830C353: got 'Nothing in particular'
## Warning: Expecting logical in MO3048 / R3048C353: got 'Good'
## Warning: Expecting logical in MO3304 / R3304C353: got 'Transgender'
## Warning: Expecting logical in LS4642 / R4642C331: got 'Transgender Female'
## Warning: Expecting logical in MO5355 / R5355C353: got 'Femal'
## Warning: Expecting logical in LS5609 / R5609C331: got 'There are two sexes,
## male and female. I'm a male.'
## Warning: Expecting logical in MO6038 / R6038C353: got 'Transgender'
## Warning: Expecting logical in LS6358 / R6358C331: got 'Transgender Male (FTM)'
## Warning: Expecting logical in MO7225 / R7225C353: got 'solo ager'
#This command instructs R which sheet to use and to start on row 2 for variable names. Row one is the long form question that was asked in the survey.
#This data set contains 7605 observations and 406 variables
#I am interested in learning more about preparedness of individuals with disabilities and FNSS requirements. Using the Codebook provided with the data set I selected the following variables to investigate.
summary(fema_data$disability)
## Length Class Mode
## 7604 character character
#Unfortunately, these are yes or no questions and will not work easily for this assignment. As such, I will begin with a more basic study of preparedness vs perceived risk.
#level of self perceived preparedness
table(fema_data$dis_soc)
##
## Don't know
## 387
## I am NOT prepared, and I do not intend to prepare in the next year
## 736
## I am NOT prepared, but I intend to get prepared in the next six months
## 1505
## I am NOT prepared, but I intend to start preparing in the next year
## 1252
## I have been prepared for LESS than a year
## 1357
## I have been prepared for MORE than a year and I continue preparing
## 2367
#is there a perceived threat from disaster
table(fema_data$dis_iperception)
##
## No Unknown Yes
## 1457 619 5528
#individual identifies as having a disability
table(fema_data$disability)
##
## Disability No Disability
## 1855 5749
#Viewing levels of preparedness as factors will allow me to understand how the different responses are ordered from least to most prepared.
#I am also removing "Don't know" responses for preparedness, since those become NA when the ordered factor is created.
fema_data$dis_soc <- factor(fema_data$dis_soc,levels = c(
"I am NOT prepared, and I do not intend to prepare in the next year",
"I am NOT prepared, but I intend to start preparing in the next year",
"I am NOT prepared, but I intend to get prepared in the next six months",
"I have been prepared for LESS than a year",
"I have been prepared for MORE than a year and I continue preparing"))
#Check the levels
levels(fema_data$dis_soc)
## [1] "I am NOT prepared, and I do not intend to prepare in the next year"
## [2] "I am NOT prepared, but I intend to start preparing in the next year"
## [3] "I am NOT prepared, but I intend to get prepared in the next six months"
## [4] "I have been prepared for LESS than a year"
## [5] "I have been prepared for MORE than a year and I continue preparing"
#I am not interested in data from individuals who answered "unknown" to whether they perceive a threat from disasters, have a disability, have experienced a disaster in the past, or whether they provide care for someone with disabilities.
fema_compare <- subset(fema_data,dis_iperception %in% c("Yes", "No") &! is.na(dis_soc) & disability %in% c("Disability", "No Disability") & dis_exp %in% c("Yes", "No") & care %in% c("Yes", "No"))
#Convert disability status to numeric
fema_compare$disability_numeric <- ifelse(fema_compare$disability == "Disability", 1, 0)
#Convert perceived threat to numeric
fema_compare$threat_numeric <- ifelse(fema_compare$dis_iperception == "Yes", 1, 0)
#Convert preparedness to numeric
fema_compare$prep_numeric <- as.numeric(fema_compare$dis_soc)
#turn disaster experience into a factor and remove "Don't know" responses
fema_compare$dis_exp <- factor(fema_compare$dis_exp,levels = c("No", "Yes"))
#turn care required or given into a factor and remove "Don't know" responses
fema_compare$care <- factor(fema_compare$care,levels = c("No", "Yes"))
#Verify that the above step worked
summary(fema_compare$prep_numeric)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 4.000 3.517 5.000 5.000
table(fema_compare$disability, useNA = "ifany")
##
## Disability No Disability
## 1580 4977
#Check the relationship between disability and perceived disaster threat
table(fema_compare$disability, fema_compare$dis_iperception)
##
## No Yes
## Disability 259 1321
## No Disability 1064 3913
chisq.test(table(fema_compare$disability, fema_compare$dis_iperception))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(fema_compare$disability, fema_compare$dis_iperception)
## X-squared = 18.203, df = 1, p-value = 1.986e-05
#Adding the provided weight variable allows respondents to contribute proportionally based on how representative they are of the population.
#Weighted mean preparedness for respondents with disabilities
disability_subset <- subset(fema_compare, disability == "Disability" & !is.na(prep_numeric) & !is.na(weight))
weighted.mean(disability_subset$prep_numeric, disability_subset$weight)
## [1] 3.514663
#Weighted mean preparedness for respondents without disabilities
no_disability_subset <- subset(fema_compare, disability == "No Disability" & !is.na(prep_numeric) & !is.na(weight))
weighted.mean(no_disability_subset$prep_numeric, no_disability_subset$weight)
## [1] 3.523527
# Frequency table to identify the most common disability answer
fema_disability_mode_table<-fema_compare %>% group_by(disability) %>% summarize(count_of_x=n()) %>% arrange(-count_of_x)
head(fema_disability_mode_table)
## # A tibble: 2 × 2
## disability count_of_x
## <chr> <int>
## 1 No Disability 4977
## 2 Disability 1580
# Frequency table to identify the most common preparedness level
prep_mode_table <- fema_compare %>% group_by(prep_numeric) %>% summarize(count_of_prep = n()) %>% arrange(-count_of_prep)
prep_mode_table
## # A tibble: 5 × 2
## prep_numeric count_of_prep
## <dbl> <int>
## 1 5 2232
## 2 3 1355
## 3 4 1254
## 4 2 1104
## 5 1 612
ggplot(fema_compare, aes(x = disability, y = prep_numeric)) + geom_boxplot() + labs(title = "Preparedness Levels by Disability Status",x = "Disability Status",y = "Preparedness Level (1–5)")

ggplot(fema_compare, aes(x = dis_iperception, y = prep_numeric)) + geom_boxplot() + labs(title = "Preparedness Levels by Perceived Disaster Risk",x = "Perceived Disaster Risk",y = "Preparedness Level (1–5)")

ggplot(fema_compare, aes(x = dis_exp, y = prep_numeric)) + geom_boxplot() + labs(title = "Preparedness Levels by Disaster Experience",x = "Previous Disaster Experience",y = "Preparedness Level (1–5)")

ggplot(fema_compare, aes(x = care, y = prep_numeric)) + geom_boxplot() + labs(title = "Preparedness Levels by Care Responsibilities",x = "Caregiver Status",y = "Preparedness Level (1–5)") + theme_minimal()

# Mean and median preparedness for respondents with disabilities
disability_mean <- mean(fema_compare$prep_numeric[fema_compare$disability == "Disability"], na.rm = TRUE)
disability_median <- median(fema_compare$prep_numeric[fema_compare$disability == "Disability"], na.rm = TRUE)
# Mean and median preparedness for respondents with no disabilities
no_disability_mean <- mean(fema_compare$prep_numeric[fema_compare$disability == "No Disability"], na.rm = TRUE)
no_disability_median <- median(fema_compare$prep_numeric[fema_compare$disability == "No Disability"], na.rm = TRUE)
#Weighted mean for respondents with disabilities
disability_weighted_mean <- weighted.mean(disability_subset$prep_numeric, disability_subset$weight)
#Weighted mean for respondents with no disabilities
no_disability_weighted_mean <- weighted.mean(no_disability_subset$prep_numeric, no_disability_subset$weight)
# Histogram of preparedness with Disability
hist(fema_compare$prep_numeric[fema_compare$disability == "Disability"],
breaks = 6,
probability = TRUE)
lines(density(fema_compare$prep_numeric[fema_compare$disability == "Disability"]),
col = "red",
lwd = 3)
abline(v = disability_mean, col = "blue", lwd = 5)
abline(v = disability_median, col = "green", lwd = 5)

# Histogram of preparedness with No Disability
hist(fema_compare$prep_numeric[fema_compare$disability == "No Disability"],
breaks = 6,
probability = TRUE)
lines(density(fema_compare$prep_numeric[fema_compare$disability == "No Disability"]),
col = "red",
lwd = 3)
abline(v = no_disability_mean, col = "blue", lwd = 5)
abline(v = no_disability_median, col = "green", lwd = 5)

#Null hypothesis: There is no difference in disaster preparedness levels between individuals with disabilities and those without disabilities.
#Alternative hypothesis: There is a difference in disaster preparedness levels between individuals with disabilities and those without disabilities.
t.test(prep_numeric ~ disability, data = fema_compare)
##
## Welch Two Sample t-test
##
## data: prep_numeric by disability
## t = -0.6884, df = 2574.5, p-value = 0.4913
## alternative hypothesis: true difference in means between group Disability and group No Disability is not equal to 0
## 95 percent confidence interval:
## -0.10547117 0.05065904
## sample estimates:
## mean in group Disability mean in group No Disability
## 3.496203 3.523609
# Logistic model: Does having a disability effect perceived threat?
glm(threat_numeric ~ disability, data = fema_compare, family = "binomial")
##
## Call: glm(formula = threat_numeric ~ disability, family = "binomial",
## data = fema_compare)
##
## Coefficients:
## (Intercept) disabilityNo Disability
## 1.629 -0.327
##
## Degrees of Freedom: 6556 Total (i.e. Null); 6555 Residual
## Null Deviance: 6594
## Residual Deviance: 6575 AIC: 6579
# Linear model: Does having a disability + perceiving threat change preparedness?
lm(prep_numeric ~ disability + threat_numeric, data = fema_compare)
##
## Call:
## lm(formula = prep_numeric ~ disability + threat_numeric, data = fema_compare)
##
## Coefficients:
## (Intercept) disabilityNo Disability threat_numeric
## 2.93756 0.06072 0.66817
#What variables really effect preparedness?
disability_exp_perc_care_model<-lm(prep_numeric ~ dis_perception + dis_exp + disability_numeric + care, data = fema_compare)
summary(disability_exp_perc_care_model)
##
## Call:
## lm(formula = prep_numeric ~ dis_perception + dis_exp + disability_numeric +
## care, data = fema_compare)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1143 -0.9830 0.1645 1.1081 2.3389
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.25561 0.03102 104.964 < 2e-16 ***
## dis_perceptionUnlikely -0.42009 0.04305 -9.758 < 2e-16 ***
## dis_perceptionVery likely 0.22233 0.03784 5.875 4.43e-09 ***
## dis_expYes 0.50506 0.03436 14.699 < 2e-16 ***
## disability_numeric -0.17443 0.03949 -4.417 1.01e-05 ***
## careYes 0.13124 0.04281 3.066 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.295 on 6551 degrees of freedom
## Multiple R-squared: 0.08259, Adjusted R-squared: 0.08189
## F-statistic: 118 on 5 and 6551 DF, p-value: < 2.2e-16
#Because testing below indicates that the model violates homoscedasticity and normality, a log model was created in an attempt to mitigate these issues.
disability_exp_perc_care_model_log <- lm(log(prep_numeric) ~ dis_perception + dis_exp + disability_numeric + care,data = fema_compare)
#Is this model legitimate?
#Linearity
plot(disability_exp_perc_care_model,which=1)

raintest(disability_exp_perc_care_model)
##
## Rainbow test
##
## data: disability_exp_perc_care_model
## Rain = 0.96138, df1 = 3279, df2 = 3272, p-value = 0.8701
#With a p-value of greater than 0.05 the model is likely linear.
#Independence of errors
durbinWatsonTest(disability_exp_perc_care_model)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.007347213 2.014606 0.562
## Alternative hypothesis: rho != 0
#Although the model exhibits heteroscedasticity, the Durbin-Watson test indicates no evidence of autocorrelation with a p-value of greater than 0.5. These results suggest that while the variance of residuals is not constant, the independence assumption is still satisfied.
#Homoscedasticity
plot(disability_exp_perc_care_model,which=3)

bptest(disability_exp_perc_care_model)
##
## studentized Breusch-Pagan test
##
## data: disability_exp_perc_care_model
## BP = 151.68, df = 5, p-value < 2.2e-16
#With a p-value less that 0.05, the model is likely heteroscedastic.
#Homoscedasticity mitigation
plot(disability_exp_perc_care_model_log,which=3)

bptest(disability_exp_perc_care_model_log)
##
## studentized Breusch-Pagan test
##
## data: disability_exp_perc_care_model_log
## BP = 258.04, df = 5, p-value < 2.2e-16
#The log transformation was found to be ineffective, likely due to the limited range of preparedness levels (1-5). So, instead of transforming the dependent variable, robust standard errors are used to correct for heteroscedasticity. This approach keeps the original model intact while adjusting the standard errors and p-values to be more reliable in the presence of unequal variance across observations.
coeftest(disability_exp_perc_care_model, vcov = vcovHC(disability_exp_perc_care_model, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.255614 0.031627 102.9380 < 2.2e-16 ***
## dis_perceptionUnlikely -0.420092 0.046877 -8.9616 < 2.2e-16 ***
## dis_perceptionVery likely 0.222325 0.036128 6.1538 8.012e-10 ***
## dis_expYes 0.505065 0.034840 14.4967 < 2.2e-16 ***
## disability_numeric -0.174431 0.040110 -4.3488 1.390e-05 ***
## careYes 0.131242 0.042332 3.1003 0.001942 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#While heteroscedasticity remains present, the adjusted standard errors provide more reliable estimates of statistical significance. All variables
#Normality of residuals
plot(disability_exp_perc_care_model,which=2)

#Due to >5,000 sample size, the Shapiro-Wilk test will not run. Instead, the K-S test was used.
ks.test(disability_exp_perc_care_model$residuals,"pnorm")
## Warning in ks.test.default(disability_exp_perc_care_model$residuals, "pnorm"):
## ties should not be present for the one-sample Kolmogorov-Smirnov test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: disability_exp_perc_care_model$residuals
## D = 0.18735, p-value < 2.2e-16
## alternative hypothesis: two-sided
#Normality of residuals mitigation
plot(disability_exp_perc_care_model_log,which=2)

#After applying the log transformation, the Q-Q plot shows the residuals deviate further from normality. This is likely due to the limited scale of the preparedness variable, which only ranges from 1 to 5. As a result, the transformation does not resolve the issue. Given the large sample size, minor deviations from normality are not considered a major concern.
#3e No multicolinarity
vif(disability_exp_perc_care_model)
## GVIF Df GVIF^(1/(2*Df))
## dis_perception 1.116341 2 1.027896
## dis_exp 1.118157 1 1.057429
## disability_numeric 1.114446 1 1.055673
## care 1.145472 1 1.070267
#A VIF less than 10 for all variables likely means that they are not strongly correlated with one another.
#Does disaster perception interact with another variable to have a synergistic effect on preparedness level?
interaction_model <- lm(prep_numeric ~ dis_perception * disability_numeric + dis_exp + care,
data = fema_compare)
ggplot(fema_compare, aes(x = dis_perception, y = prep_numeric, fill = as.factor(disability_numeric))) + geom_boxplot() + labs(title = "Preparedness by Perceived Threat and Disability Status",x = "Perceived Threat",y = "Preparedness Level",fill = "Disability Status")

summary(interaction_model)
##
## Call:
## lm(formula = prep_numeric ~ dis_perception * disability_numeric +
## dis_exp + care, data = fema_compare)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1418 -1.0045 0.1827 1.1081 2.2669
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 3.25097 0.03210 101.282
## dis_perceptionUnlikely -0.43364 0.04791 -9.051
## dis_perceptionVery likely 0.24918 0.04416 5.643
## disability_numeric -0.15597 0.05561 -2.805
## dis_expYes 0.50435 0.03437 14.673
## careYes 0.13732 0.04298 3.195
## dis_perceptionUnlikely:disability_numeric 0.07172 0.10508 0.683
## dis_perceptionVery likely:disability_numeric -0.09393 0.08411 -1.117
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## dis_perceptionUnlikely < 2e-16 ***
## dis_perceptionVery likely 1.74e-08 ***
## disability_numeric 0.00505 **
## dis_expYes < 2e-16 ***
## careYes 0.00140 **
## dis_perceptionUnlikely:disability_numeric 0.49493
## dis_perceptionVery likely:disability_numeric 0.26412
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.295 on 6549 degrees of freedom
## Multiple R-squared: 0.08294, Adjusted R-squared: 0.08196
## F-statistic: 84.62 on 7 and 6549 DF, p-value: < 2.2e-16
#The interactions were not statistically significant, suggesting that the relationship between perceived risk and preparedness is consistent regardless of disability status. As a result, the simpler additive model was retained for interpretation.