I hypothesize that there is a relationship between Marital Status (marital_status) and Mental Health (mental_health). I will be analyzing responses to the Subset of National Health Interview Survey Responses data set in order to test this hypothesis.
library(readr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
data<-read_csv("/Users/rebeccagibble/Downloads/SD2 Data.csv")
## Parsed with column specification:
## cols(
## sex = col_character(),
## race = col_character(),
## marital_status = col_character(),
## poverty_status = col_character(),
## age_range = col_character(),
## health = col_character(),
## bmi_category = col_character(),
## mental_health = col_character(),
## heart_attack_history = col_character(),
## heart_condition_history = col_character(),
## cancer_history = col_character(),
## prediabetes_history = col_character(),
## asthma_history = col_character(),
## hypertension_history = col_character(),
## smoking_history = col_character(),
## birthcontrol_status = col_logical()
## )
data<-data%>%
select(marital_status, mental_health)%>%
filter(marital_status %in% c("Never Married","Married","Widowed","DivorcedOrSeparated"),
mental_health %in% c("Low Risk","Moderate Mental Distress","Serious Mental Illness"))
table(data$marital_status)%>%
prop.table()%>%
round(2)
##
## DivorcedOrSeparated Married Never Married Widowed
## 0.18 0.45 0.29 0.08
table(data$mental_health)%>%
prop.table()%>%
round(2)
##
## Low Risk Moderate Mental Distress Serious Mental Illness
## 0.80 0.16 0.03
data%>%
group_by(marital_status,mental_health)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))
## `summarise()` regrouping output by 'marital_status' (override with `.groups` argument)
## # A tibble: 12 x 4
## # Groups: marital_status [4]
## marital_status mental_health n percent
## <chr> <chr> <int> <dbl>
## 1 DivorcedOrSeparated Low Risk 34619 0.739
## 2 DivorcedOrSeparated Moderate Mental Distress 9469 0.202
## 3 DivorcedOrSeparated Serious Mental Illness 2770 0.0591
## 4 Married Low Risk 98237 0.852
## 5 Married Moderate Mental Distress 14542 0.126
## 6 Married Serious Mental Illness 2462 0.0214
## 7 Never Married Low Risk 57073 0.774
## 8 Never Married Moderate Mental Distress 14046 0.190
## 9 Never Married Serious Mental Illness 2634 0.0357
## 10 Widowed Low Risk 15983 0.797
## 11 Widowed Moderate Mental Distress 3374 0.168
## 12 Widowed Serious Mental Illness 706 0.0352
table(data$marital_status, data$mental_health)%>%
prop.table(1)
##
## Low Risk Moderate Mental Distress
## DivorcedOrSeparated 0.73880661 0.20207862
## Married 0.85244835 0.12618773
## Never Married 0.77383971 0.19044649
## Widowed 0.79664058 0.16817026
##
## Serious Mental Illness
## DivorcedOrSeparated 0.05911477
## Married 0.02136392
## Never Married 0.03571380
## Widowed 0.03518915
data%>%
group_by(marital_status, mental_health)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
filter(marital_status == "DivorcedOrSeparated")%>%
ggplot()+
geom_col(aes(x=mental_health, y=percent, fill=marital_status))
## `summarise()` regrouping output by 'marital_status' (override with `.groups` argument)
data%>%
group_by(marital_status, mental_health)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
filter(marital_status == "Married")%>%
ggplot()+
geom_col(aes(x=mental_health, y=percent, fill=marital_status))
## `summarise()` regrouping output by 'marital_status' (override with `.groups` argument)
data%>%
group_by(marital_status, mental_health)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
filter(marital_status == "Never Married")%>%
ggplot()+
geom_col(aes(x=mental_health, y=percent, fill=marital_status))
## `summarise()` regrouping output by 'marital_status' (override with `.groups` argument)
data%>%
group_by(marital_status, mental_health)%>%
summarize(n=n())%>%
mutate(percent=n/sum(n))%>%
filter(marital_status == "Widowed")%>%
ggplot()+
geom_col(aes(x=mental_health, y=percent, fill=marital_status))
## `summarise()` regrouping output by 'marital_status' (override with `.groups` argument)
chisq.test(data$marital_status, data$mental_health)
##
## Pearson's Chi-squared test
##
## data: data$marital_status and data$mental_health
## X-squared = 3856.6, df = 6, p-value < 2.2e-16