#setwd("/Users/bharathsivaraman/Dropbox/R programming/Prob and statistics/Project1")
library(plyr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(Hmisc)
library(corrplot)load("brfss2013.RData")The Behavioral Risk Factor Surveillance System (BRFSS) is an ongoing surveillance system designed to measure behavioral risk factors for the non-institutionalized adult population (18 years of age and older) residing in the US.The BRFSS objective is to collect uniform, state-specific data on preventive health practices and risk behaviors that are linked to chronic diseases, injuries, and preventable infectious diseases that affect the adult population. Factors assessed by the BRFSS in 2013 include tobacco use, HIV/AIDS knowledge and prevention, exercise, immunization, health status, healthy days — health-related quality of life, health care access, inadequate sleep, hypertension awareness, cholesterol awareness, chronic health conditions, alcohol consumption, fruits and vegetables consumption, arthritis burden, and seatbelt use
Research question 1:
What is the correlation between General Health and Strength Training? Also, do veterans strength train more often than Non Veterans?
Variables used for this analysis:
Research question 2:
What is the correlation between Mental Health, Depression, and Heart attact and Mental Health and Physical Health?
Variables used in this analysis:
There are two parts to this analysis:
Research question 3:
Does Number of hours slept determine anxiety level?
Variables used in this
veterans.health <-
brfss2013 %>% select(
one_of(
"seqno",
"X_state",
"idate",
"veteran3",
"genhlth",
"physhlth",
"menthlth",
"poorhlth",
"strength",
"bphigh4",
"sleptim1",
"alcday5",
"chckidny",
"misnervs",
"addepev2",
"cvdinfr4",
"sex",
"bphigh4"
)
)
impute.mean <-
function(x)
replace(x, is.na(x), round(mean(x, na.rm = TRUE), 0))
veterans.health.impute <-
ddply(
veterans.health,
~ X_state,
transform,
poorhlth = impute.mean(poorhlth),
menthlth = impute.mean(menthlth),
physhlth = impute.mean(physhlth),
strength = impute.mean(strength),
sleptim1 = impute.mean(sleptim1),
alcday5=impute.mean(alcday5)
)
veterans.health.impute <-
veterans.health.impute[complete.cases(veterans.health.impute),]Research question 1:
veterans.health.chart <-
veterans.health.impute %>% group_by(genhlth, veteran3) %>%
dplyr::summarize(
avg.strength = mean(strength),
avg.physhlth = mean(physhlth),
menthlth = mean(menthlth)
)
ggplot(veterans.health.chart) + aes(genhlth, (avg.strength)) + geom_point(aes(shape =
veteran3), size =
3, color =
"firebrick") + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
labs(title = "coorelation between\n general health and strength", x = "general health", y =
"strength training days") + theme(legend.title = element_text(color = "Black", face =
"bold")) + scale_shape_discrete(name = "Veteran Status")** Observations from Question1:**
There seems to be a relatonship between the general health and number of days done in strength - People who have a better health have trained more number of days
Veterans seem to strength train more than non veterans
Veterans have a better health condition than non veterans
Research question 2: 1. To draw up a corelation analysis, there cannot be character variables. Convert the characters variables into binary(yes=1,no=0)
corr.data <-
veterans.health %>% select(one_of("menthlth", "addepev2", "cvdinfr4","physhlth")) %>%
mutate(depression = ifelse(tolower(addepev2) == "yes", 1, 0),heartattack=ifelse(tolower(cvdinfr4) == "yes", 1, 0))%>%select(one_of("menthlth","depression","heartattack","physhlth"))
corr.data<-na.delete(corr.data)
cor(corr.data)## menthlth depression heartattack physhlth
## menthlth 1.00000000 0.40879519 0.04952884 0.3474583
## depression 0.40879519 1.00000000 0.05276773 0.2385387
## heartattack 0.04952884 0.05276773 1.00000000 0.1570841
## physhlth 0.34745831 0.23853865 0.15708407 1.0000000
Corr.Chart <- cor(corr.data)
corrplot(Corr.Chart, method="ellipse")Observations from the correlation plot
There is a positive correlation between mental health and Depression. So higher number of days with bad mental health higher is the depression
There is a positive correlation between Physical health and mental health. Bad physical=Bad mental health
Research question 3:
ggplot(veterans.health.impute)+aes(misnervs,(sleptim1))+geom_boxplot(fill="firebrick")+facet_wrap(~sex)+theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
labs(title = "Relationship between sleep and nervousness", x = "Frequency of nervousness", y =
"Number of hours of sleep") + theme(legend.title = element_text(color = "Black", face =
"bold")) Observation from research question 3