library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
NHIS_Data <- read_csv("Downloads/NHIS Data.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   Demo_Race = col_logical(),
##   Demo_Hispanic = col_character(),
##   Demo_RaceEthnicity = col_character(),
##   Demo_Region = col_character(),
##   Demo_sex_C = col_character(),
##   Demo_sexorien_C = col_logical(),
##   Demo_agerange_C = col_character(),
##   Demo_marital_C = col_character(),
##   Demo_hourswrk_C = col_character(),
##   MentalHealth_MentalIllnessK6_C = col_character(),
##   MentalHealth_depressionmeds_B = col_logical(),
##   Health_SelfRatedHealth_C = col_character(),
##   Health_diagnosed_STD5yr_B = col_logical(),
##   Health_BirthControlNow_B = col_logical(),
##   Health_EverHavePrediabetes_B = col_logical(),
##   Health_HIVAidsRisk_C = col_character(),
##   Health_BMI_C = col_character(),
##   Health_UsualPlaceHealthcare_C = col_character(),
##   Health_AbnormalPapPast3yr_B = col_logical(),
##   Behav_CigsPerDay_C = col_character()
##   # ... with 1 more columns
## )
## ℹ Use `spec()` for the full column specifications.
## Warning: 683386 parsing failures.
##   row       col           expected                            actual                      file
## 68557 Demo_Race 1/0/T/F/TRUE/FALSE Black or African American         'Downloads/NHIS Data.csv'
## 68558 Demo_Race 1/0/T/F/TRUE/FALSE Asian                             'Downloads/NHIS Data.csv'
## 68559 Demo_Race 1/0/T/F/TRUE/FALSE American Indian or Alaskan Native 'Downloads/NHIS Data.csv'
## 68560 Demo_Race 1/0/T/F/TRUE/FALSE White                             'Downloads/NHIS Data.csv'
## 68561 Demo_Race 1/0/T/F/TRUE/FALSE White                             'Downloads/NHIS Data.csv'
## ..... ......... .................. ................................. .........................
## See problems(...) for more details.

I hypothesize that there is a relationship between Race/Ethnicity and Mental Health Illness.

Crosstab (Null Vs Actual)

table(NHIS_Data $ Demo_RaceEthnicity) 
## 
##            American Indian or Alaskan Native Hispanic 
##                                                  1557 
##        American Indian or Alaskan Native Not Hispanic 
##                                                  3136 
##                                        Asian Hispanic 
##                                                  1035 
##                                    Asian Not Hispanic 
##                                                 24733 
## Black or African American (Hispanic Identity Unknown) 
##                                                     2 
##                    Black or African American Hispanic 
##                                                  2725 
##                Black or African American Not Hispanic 
##                                                 77661 
##                      Hispanic (Race Identity Unknown) 
##                                                 11100 
##            Multiple Races (Hispanic Identity Unknown) 
##                                                     1 
##                               Multiple Races Hispanic 
##                                                  1588 
##                           Multiple Races Not Hispanic 
##                                                  6810 
##                  Not Hispanic (Race Identity Unknown) 
##                                                 58278 
##                                   Other Race Hispanic 
##                                                  4807 
##                               Other Race Not Hispanic 
##                                                   151 
##                     White (Hispanic Identity Unknown) 
##                                                    16 
##                                        White Hispanic 
##                                                 81356 
##                                    White Not Hispanic 
##                                                344526
table(NHIS_Data $ MentalHealth_MentalIllnessK6_C)
## 
## Low Risk      MMD      SMI 
##   487109    97837    21633

Actual Distribution

table(NHIS_Data $ Demo_RaceEthnicity, NHIS_Data $ MentalHealth_MentalIllnessK6_C)
##                                                        
##                                                         Low Risk    MMD    SMI
##   American Indian or Alaskan Native Hispanic                1130    330     78
##   American Indian or Alaskan Native Not Hispanic            2221    626    197
##   Asian Hispanic                                             810    161     48
##   Asian Not Hispanic                                       20541   3031    450
##   Black or African American (Hispanic Identity Unknown)        1      1      0
##   Black or African American Hispanic                        2022    517    124
##   Black or African American Not Hispanic                   59796  13026   2945
##   Hispanic (Race Identity Unknown)                          8409   1944    583
##   Multiple Races (Hispanic Identity Unknown)                   1      0      0
##   Multiple Races Hispanic                                   1078    368    108
##   Multiple Races Not Hispanic                               4591   1605    447
##   Not Hispanic (Race Identity Unknown)                     45845   9589   1977
##   Other Race Hispanic                                       3580    894    252
##   Other Race Not Hispanic                                    120     23      4
##   White (Hispanic Identity Unknown)                           11      3      1
##   White Hispanic                                           64133  12557   3215
##   White Not Hispanic                                      272783  53152  11202
  • The null hypothesis is that Race/Ethnicity and Mental Health Illness are independent to one another.

Row or Column %

table(NHIS_Data $ Demo_RaceEthnicity, NHIS_Data $ MentalHealth_MentalIllnessK6_C) %>%
prop.table(1)
##                                                        
##                                                           Low Risk        MMD
##   American Indian or Alaskan Native Hispanic            0.73472042 0.21456437
##   American Indian or Alaskan Native Not Hispanic        0.72963206 0.20565046
##   Asian Hispanic                                        0.79489696 0.15799804
##   Asian Not Hispanic                                    0.85509117 0.12617601
##   Black or African American (Hispanic Identity Unknown) 0.50000000 0.50000000
##   Black or African American Hispanic                    0.75929403 0.19414195
##   Black or African American Not Hispanic                0.78920902 0.17192181
##   Hispanic (Race Identity Unknown)                      0.76892831 0.17776152
##   Multiple Races (Hispanic Identity Unknown)            1.00000000 0.00000000
##   Multiple Races Hispanic                               0.69369369 0.23680824
##   Multiple Races Not Hispanic                           0.69110342 0.24160771
##   Not Hispanic (Race Identity Unknown)                  0.79854035 0.16702374
##   Other Race Hispanic                                   0.75751164 0.18916631
##   Other Race Not Hispanic                               0.81632653 0.15646259
##   White (Hispanic Identity Unknown)                     0.73333333 0.20000000
##   White Hispanic                                        0.80261561 0.15714911
##   White Not Hispanic                                    0.80911618 0.15765698
##                                                        
##                                                                SMI
##   American Indian or Alaskan Native Hispanic            0.05071521
##   American Indian or Alaskan Native Not Hispanic        0.06471748
##   Asian Hispanic                                        0.04710500
##   Asian Not Hispanic                                    0.01873283
##   Black or African American (Hispanic Identity Unknown) 0.00000000
##   Black or African American Hispanic                    0.04656403
##   Black or African American Not Hispanic                0.03886916
##   Hispanic (Race Identity Unknown)                      0.05331017
##   Multiple Races (Hispanic Identity Unknown)            0.00000000
##   Multiple Races Hispanic                               0.06949807
##   Multiple Races Not Hispanic                           0.06728888
##   Not Hispanic (Race Identity Unknown)                  0.03443591
##   Other Race Hispanic                                   0.05332205
##   Other Race Not Hispanic                               0.02721088
##   White (Hispanic Identity Unknown)                     0.06666667
##   White Hispanic                                        0.04023528
##   White Not Hispanic                                    0.03322685
  • Multiple Races (Hispanic Identity Unknown) have the highest percentage of Low Risk cases of mental illness: 100%

  • Black or African American’s (Hispanic Identity Unknown) have the lowest percentage of Low Risk mental illness: 5%

  • Black or African American’s (Hispanic Identity Unknown) have the highest percentage of Moderate Mental Distress: 5%

  • Multiple Races (Hispanic Identity Unknown) have the lowest percentage of Moderate Mental Distress: 0%

  • White, American Indian or Alaskan Native, Multiple Races Hispanic,and Multiple Races have the highest percentage of Serious Mental Illness: 6%

  • Black or African American’s have the lowest percentage of Serious Mental Illness: 0%

NHIS_Data %>%
  group_by(Demo_RaceEthnicity,MentalHealth_MentalIllnessK6_C) %>%
  summarize(n=n()) %>%
  mutate(percent=n/sum(n)) %>%
  ggplot()+
  geom_col(aes(x= Demo_RaceEthnicity, y= percent, fill= MentalHealth_MentalIllnessK6_C))
## `summarise()` has grouped output by 'Demo_RaceEthnicity'. You can override using the `.groups` argument.

Chi-Square Statistical Test
  • data: Demo_RaceEthnicity and MentalHealth_MentalIllnessK6_C
chisq.test(NHIS_Data $ Demo_RaceEthnicity, NHIS_Data $ MentalHealth_MentalIllnessK6_C)
## Warning in chisq.test(NHIS_Data$Demo_RaceEthnicity,
## NHIS_Data$MentalHealth_MentalIllnessK6_C): Chi-squared approximation may be
## incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  NHIS_Data$Demo_RaceEthnicity and NHIS_Data$MentalHealth_MentalIllnessK6_C
## X-squared = 1825.8, df = 32, p-value < 2.2e-16
  • The results from the chi-square test are in favor of me rejecting the null hypothesis. There is a significant relationship, statistically speaking, between Demographic Race/Ethnicity and Mental Health Illness because the p-value is less than .05. These variables are not independent of one another.