1 Upfront Data Management and Cleaning

1.1 Calling the necessary libraries

library(ggplot2)
library(ggthemes)
library(grid)
library(readr)
library(dplyr)
library(lubridate)

1.2 Getting the datasets

BRFSS2014 <- read_csv("c:/users/valen/Box Sync/mislaelv/BRFSS/BRFSS Datasets by year/BRFSS2014.csv")
BRFSS2015 <- read_csv("c:/users/valen/Box Sync/mislaelv/BRFSS/BRFSS Datasets by year/BRFSS2015.csv")
BRFSS2016 <- read_csv("c:/users/valen/Box Sync/mislaelv/BRFSS/BRFSS Datasets by year/BRFSS2016.csv")
BRFSS2017 <- read_csv("c:/users/valen/Box Sync/mislaelv/BRFSS/BRFSS Datasets by year/BRFSS2017.csv")

1.3 Manage datasets

BRFSS2014NEW <- BRFSS2014 %>% 
  select('_STATE', 'IMONTH', 'IDAY', 'IYEAR', 'GENHLTH', 'PHYSHLTH', 
         'MENTHLTH', 'POORHLTH') 
BRFSS2015NEW <- BRFSS2015 %>% 
  select('_STATE',  'INTERVIEW MONTH','INTERVIEW DAY', 'INTERVIEW YEAR', 'GENERAL HEALTH', 
         'NUMBER OF DAYS PHYSICAL HEALTH NOT GOOD', 'NUMBER OF DAYS MENTAL HEALTH NOT GOOD',
         'POOR PHYSICAL OR MENTAL HEALTH') %>% 
  rename(IMONTH = 'INTERVIEW MONTH', IDAY = 'INTERVIEW DAY', 
           PHYSHLTH = 'NUMBER OF DAYS PHYSICAL HEALTH NOT GOOD',
          IYEAR = 'INTERVIEW YEAR', GENHLTH = 'GENERAL HEALTH', 
          MENTHLTH = 'NUMBER OF DAYS MENTAL HEALTH NOT GOOD', 
          POORHLTH = 'POOR PHYSICAL OR MENTAL HEALTH')
BRFSS2016NEW <- BRFSS2016 %>% 
  select('_STATE', 'IMONTH', 'IDAY', 'IYEAR', 'GENHLTH', 'PHYSHLTH', 'MENTHLTH', 'POORHLTH')
BRFSS2017NEW <- BRFSS2017 %>% 
  select('_STATE', 'IMONTH', 'IDAY', 'IYEAR', 'GENHLTH', 'PHYSHLTH', 'MENTHLTH', 'POORHLTH')

1.4 Append datasets

ANALYSISDATA <- bind_rows(BRFSS2014NEW, BRFSS2015NEW, BRFSS2016NEW, BRFSS2017NEW)

1.5 Create new variables

ANALYSISDATA$IDATE <- paste0(ANALYSISDATA$IMONTH, "/", ANALYSISDATA$IDAY, "/", ANALYSISDATA$IYEAR)
ANALYSISDATA$IDATE <- as.Date(ANALYSISDATA$IDATE, 
                              format = "%m/%d/%Y")
ANALYSISDATA$MARIA <- ymd("2017-09-20")
ANALYSISDATA$POSTHURRICANE <- 0 
ANALYSISDATA$POSTHURRICANE[ANALYSISDATA$IDATE > ANALYSISDATA$MARIA] <- 1

1.6 Delete missing data

ANALYSISDATA$GENHLTH [ANALYSISDATA$GENHLTH ==7]<-NA
ANALYSISDATA$GENHLTH [ANALYSISDATA$GENHLTH ==9]<-NA

ANALYSISDATA$MENTHLTH [ANALYSISDATA$MENTHLTH ==77]<-NA 
ANALYSISDATA$MENTHLTH [ANALYSISDATA$MENTHLTH ==88]<-NA 
ANALYSISDATA$MENTHLTH [ANALYSISDATA$MENTHLTH ==99]<-NA 

ANALYSISDATA$PHYSHLTH [ANALYSISDATA$PHYSHLTH ==77]<-NA 
ANALYSISDATA$PHYSHLTH [ANALYSISDATA$PHYSHLTH ==88]<-NA 
ANALYSISDATA$PHYSHLTH [ANALYSISDATA$PHYSHLTH ==99]<-NA 

ANALYSISDATA$POORHLTH [ANALYSISDATA$POORHLTH ==77]<-NA 
ANALYSISDATA$POORHLTH [ANALYSISDATA$POORHLTH ==88]<-NA 
ANALYSISDATA$POORHLTH [ANALYSISDATA$POORHLTH ==99]<-NA 

2 Bargraphs

2.1 Average Self-reported general health

ggplot(ANALYSISDATA, aes(x=POSTHURRICANE, y= GENHLTH, fill=factor(POSTHURRICANE)))+stat_summary(fun.y = mean, geom = "bar")+theme_minimal()+theme(axis.text.x = element_blank())+labs(title = "Average View of Personal Health in General in Puerto Rico ", subtitle = "Before Maria vs After Maria", x= "Before Maria vs After Maria", y= "Average Description of Personal General Health")+ scale_fill_manual(name = "Time Period", values = c("darkgreen", "darkred"), labels = c("before the hurricane", "after the hurricane"))

2.2 Average days of poor mental health

ggplot(ANALYSISDATA, aes(x=POSTHURRICANE, y= MENTHLTH, fill=factor(POSTHURRICANE)))+stat_summary(fun.y = mean, geom = "bar")+theme_minimal()+theme(axis.text.x = element_blank())+labs(title = "Average Days of Poor Mental Health Per 30 Days in Puerto Rico ", subtitle = "Before Maria vs After Maria", x= "Before Maria vs After Maria", y= "Average Days in Last 30 with Poor Mental Health")+ scale_fill_manual(name = "Time Period", values = c("darkgreen", "darkred"), labels = c("before the hurricane", "after the hurricane"))

2.3 Average days of poor physical health

ggplot(ANALYSISDATA, aes(x=POSTHURRICANE, y= PHYSHLTH, fill=factor(POSTHURRICANE)))+stat_summary(fun.y = mean, geom = "bar")+theme_minimal()+theme(axis.text.x = element_blank())+labs(title = "Average Days of Poor Physical Health Per 30 Days in Puerto Rico ", subtitle = "Before Maria vs After Maria", x= "Before Maria vs After Maria", y= "Average Days in Last 30 with Poor Physical Health")+ scale_fill_manual(name = "Time Period", values = c("darkgreen", "darkred"), labels = c("before the hurricane", "after the hurricane"))

2.4 Average days that health impeded usual activities

ggplot(ANALYSISDATA, aes(x=POSTHURRICANE, y= POORHLTH, fill=factor(POSTHURRICANE)))+stat_summary(fun.y = mean, geom = "bar")+theme_minimal()+theme(axis.text.x = element_blank())+labs(title = "Average Days of Impeded Usual Activities Per 30 Days in Puerto Rico ", subtitle = "Before Maria vs After Maria", x= "Before Maria vs After Maria", y= "Average Days in Last 30 Impeded Usual Activities")+ scale_fill_manual(name = "Time Period", values = c("darkgreen", "darkred"), labels = c("before the hurricane", "after the hurricane"))

3 Scatterplot Graphs

3.1 General health score

ggplot(ANALYSISDATA, aes(x=IDATE, y=GENHLTH))+geom_jitter(size = 1/2)+geom_smooth(color="darkred")+geom_smooth(method = "lm")+labs(title = "Self-reported General Health Scores in Puerto Rico Over Time", x="Date", y="Self-reported General Health Score")+theme_minimal()

3.2 Days with poor physical health

ggplot(ANALYSISDATA, aes(x=IDATE, y=PHYSHLTH))+geom_jitter(size = 1/2)+geom_smooth(color="darkred")+geom_smooth(method = "lm")+labs(title = "Self-reported Days of Poor Physical Health in Puerto Rico Over Time", subtitle = "Days in last 30 days with poor physical health", x="Date", y="Number of Days with Poor Physical Health")+theme_minimal()

3.3 Days with poor mental health

ggplot(ANALYSISDATA, aes(x=IDATE, y=MENTHLTH))+geom_jitter(size = 1/2)+geom_smooth(color="darkred")+geom_smooth(method = "lm")+labs(title = "Self-reported Days of Poor Mental Health in Puerto Rico Over Time", subtitle = "Days in last 30 days with poor mental health", x="Date", y="Number of Days with Poor Mental Health")+theme_minimal()

3.4 Days that health impeded usual activities

ggplot(ANALYSISDATA, aes(x=IDATE, y=POORHLTH))+geom_jitter(size = 1/2)+geom_smooth(color="darkred")+geom_smooth(method = "lm")+labs(title = "Days of Impeded Activities in Puerto Rico Over Time", subtitle = "Days in last 30 days that health problems impeded usual activities", x="Date", y="Number of Days with Impeded Activities")+theme_minimal()