library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## -- Conflicts -------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(ggpol)
library(DataExplorer)
library(knitr)
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.6.3
## corrplot 0.84 loaded
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
BRFSS <- read.csv(file="C:/Users/user/Desktop/Capstone Project/Submission/R codes/BRFSS2018-V3.csv", header = TRUE, sep = ",")
plot_intro(BRFSS)

str(BRFSS)
## 'data.frame':    279572 obs. of  34 variables:
##  $ IDAY        : int  5 12 3 10 9 10 10 3 11 12 ...
##  $ MONTH       : Factor w/ 12 levels "Apr","Aug","Dec",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ Region      : Factor w/ 4 levels "Midwest ","Northeast",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ AGE         : Factor w/ 7 levels "18-29","30-39",..: 7 2 5 6 4 3 7 4 3 6 ...
##  $ GENDER      : Factor w/ 2 levels "Female","Male": 1 1 2 1 2 1 2 1 1 1 ...
##  $ RACE        : Factor w/ 6 levels "AIAN","Asian",..: 6 3 6 6 6 3 6 6 6 6 ...
##  $ RELP        : Factor w/ 4 levels "In relationship",..: 4 3 2 1 1 2 1 1 1 4 ...
##  $ EDU         : Factor w/ 4 levels "<8","12-15","16+",..: 3 3 4 3 4 3 4 2 2 4 ...
##  $ EMPL        : Factor w/ 4 levels "Employed","Occupations",..: 1 1 3 2 1 1 3 1 4 3 ...
##  $ INCOME      : Factor w/ 5 levels "$20K-<$35K","$35K-<$50K",..: 2 1 5 4 4 4 1 2 1 1 ...
##  $ RENTHOME    : Factor w/ 3 levels "Other","Own",..: 2 3 2 2 2 3 2 2 2 2 ...
##  $ COMMUN      : Factor w/ 3 levels "Rural","Suburban",..: 1 3 3 3 1 3 3 2 2 3 ...
##  $ ALCpa30     : Factor w/ 3 levels "0","1-9","10-30": 1 2 1 1 2 1 1 3 2 1 ...
##  $ SMOKESTAT   : Factor w/ 4 levels "Daily smoker",..: 3 1 3 3 2 3 2 3 2 3 ...
##  $ EXERpa30    : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 2 1 2 2 1 ...
##  $ SLEPTIM1    : int  7 5 6 8 6 7 10 6 6 6 ...
##  $ GENHLTH     : Factor w/ 5 levels "Excellent","Fair",..: 5 3 1 1 5 3 1 3 5 2 ...
##  $ MENTHLTHpa30: Factor w/ 3 levels "0","1-9","10-30": 1 1 1 1 1 1 1 2 2 1 ...
##  $ PHYSHLTHpa30: Factor w/ 3 levels "0","1-9","10-30": 3 1 1 1 1 2 1 2 2 1 ...
##  $ HLTHPLN     : Factor w/ 2 levels "No","Yes": 2 1 2 2 2 2 2 2 1 2 ...
##  $ CHECKUP     : Factor w/ 4 levels "<1","1-<2","2-<5",..: 1 2 1 1 1 1 1 1 1 1 ...
##  $ MEDICOST    : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 2 1 ...
##  $ ARTHRITIS   : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 1 2 2 ...
##  $ ASTHMA      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
##  $ CANCER      : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 2 1 1 ...
##  $ DEPRESSION  : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 1 ...
##  $ DIABETE     : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 2 ...
##  $ KIDNEY      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ PULMOND     : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 1 1 1 1 ...
##  $ STROKE      : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ LASTDENTV   : Factor w/ 4 levels "<1","1-<2","2-<5",..: 1 2 3 1 1 1 1 1 1 2 ...
##  $ RMVTETH     : Factor w/ 4 levels ">= 6","0","1-5",..: 3 2 3 2 2 2 1 2 1 3 ...
##  $ BMICAT      : Factor w/ 4 levels "Normal","Obese",..: 1 2 3 1 3 3 3 2 3 2 ...
##  $ MICHD       : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
library(ggridges)
library(ggplot2)

1.2. Building Proportion Tables Example

MICHFreq <- table(BRFSS$MICHD)
MICHFreq
## 
##     No    Yes 
## 254978  24594
prop.table(MICHFreq)
## 
##         No        Yes 
## 0.91202982 0.08797018
  1. Graphical data analysis
library(scales)
library(tidyverse)
library(pander)

Defining some sets of color

col2 <- c("#A9A9A9", "#FF8C00")
col3 <- c("#003f5c", "#bc5090","#ffa600")
col4 <- c("#FB6542","#3F681C","#4D585B", "#375E97")
col5 <- c("#BD3E85","#3F681C","#FFBB00", "#FB6542", "#375E97")
col6 <- c("#0072BB", "#FF4C3B","#FFD034","#51A39D","#06425C", "#C6C8CA")
col7 <- c("#0072BB", "#FF4C3B","#FFD034","#51A39D","#06425C", "#C6C8CA","#814374")

The outcome (MICHD)

The characteristics of the sample were examined by calculating frequencies of categorical variables and the mean value of a continuous variable (sleep duration is only a numeric attribute in the data).

The outcome of interest, MICHD, was defined as a “Yes” response to having been “ever diagnosed with” at least one of the following: myocardial infarction/heart attack, angina or coronary heart disease. Of the 279572 individuals included in this analysis, 24594 respondents had MICHD

BRFSS %>% 
  group_by(MICHD) %>% 
  dplyr::count() %>% 
  pander()
MICHD n
No 254978
Yes 24594
df_MICHD <- BRFSS %>% 
  group_by(MICHD) %>% 
  dplyr::count() %>% 
  ungroup()%>% 
  arrange(desc(MICHD)) %>%
  mutate(Percentage = round(n/sum(n),4)*100,
         lab.pos = cumsum(Percentage)-0.5*Percentage)

ggplot(data = df_MICHD, 
       aes(x = "", 
           y = Percentage, 
           fill = MICHD))+
  geom_bar(stat = "identity")+
  coord_polar("y") +
  geom_text(aes(y = lab.pos, 
                label = paste(Percentage,"%", sep = "")), 
            col = "blue",
            size = 6) +
  theme( legend.box = "horizontal",
         legend.text = element_text(color = "black", size = 12))+
  theme_void() +
  scale_fill_manual(values = col2)

Region

library(treemapify)
BRFSS %>% 
  group_by(Region) %>%
  dplyr::count() %>% 
  pander()
Region n
Midwest 76268
Northeast 57288
South 83856
West 62160
df_region <- BRFSS %>% 
  group_by(Region) %>% 
  dplyr::count() %>% 
  ungroup()%>% 
  arrange(desc(Region)) %>%
  mutate(percentage = round(n/sum(n),4)*100,
         lab.pos = cumsum(percentage)-.5*percentage)

ggplot(data = df_region, 
       aes(x = "", 
           y = percentage, 
           fill = Region))+
  geom_bar(stat = "identity")+
  coord_polar("y") +
  geom_text(aes(y = lab.pos, 
                label = paste(percentage,"%", sep = "")), col = "white", size = 5) +
  scale_fill_manual(values = col4) +
  theme_void() +
  theme(legend.title = element_text(color = "black", size = 14),
        legend.text = element_text(color = "black", size = 14))

Demographics (B) B1. AGE B2. GENDER B3. X_RACE B4. RELPT B5. EDU B6. EMPL B7. INCOME1 B8. RENTHOME

Defining theme and geomtext for all bar charts

theme_bar <- theme(axis.text = element_text(size = 12),
                   axis.title = element_text(size = 12, face = "bold"),
                   plot.caption = element_text(color = "grey44",
                                               size = 12,
                                               face = "italic"),
                   legend.text = element_text(colour="black", size = 12))

geomtext_bar <- geom_text(aes(label = paste0(Percentage,"%")), 
                               vjust = -0.3,
                               size = 4,
                               fontface = "bold") 


theme_point <- theme(axis.text = element_text(size = 12, color = "black"),
                    axis.title = element_text(size = 12, face = "bold"),
                    plot.caption = element_text(color = "grey", size = 12, face = "italic"),
                    legend.text = element_text(colour="black", size = 12),
                    legend.position="right")


themebar_stacked <-  theme(axis.text = element_text(size = 12, color = "black"),
                           axis.title = element_text(size = 12, face = "bold"),
                           plot.caption = element_text(color = "grey", size = 12, face = "italic"),
                           legend.text = element_text(colour="black", size = 12),
                           legend.position="right")

themebar_facet <-   theme(axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 12, face= "bold"),
        plot.title = element_text(color = "black", size = 12, face = "bold.italic"),
        plot.caption = element_text(color = "grey", size = 12, face = "italic"),
        strip.text = element_text(size = 12, color = "black", face = "bold.italic"),
        strip.background = element_rect(fill = "slategray1"),
        legend.text = element_text(colour="black", size = 20),
        legend.position = "none")

B1.AGE

ggplot(BRFSS, aes(x = AGE)) +
  geom_bar(aes(y = (..count..)/sum(..count..)),
           fill = "royalblue4", 
           width = 0.75) +
  scale_y_continuous(labels = percent)+
  geom_text(size = 4.5,
            color= "black",
            aes(y = ((..count..)/sum(..count..)),
                label = scales::percent((..count..)/sum(..count..))),
            stat = "count",
            vjust = -0.3) +
  labs(x = "Age group",
       y = "Percent of respondents",
       caption = "Data source: BRFSS2018") + 
  theme_bar

Other way to make the same graph

BRFSS %>% 
  group_by(AGE) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(Agegroup = factor(AGE),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100)) %>% 
  ggplot(aes(x = AGE, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           fill = "royalblue4",
           width = 0.80) +
  geomtext_bar  +
  labs(x = "Age group",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

More than 60% of participants were older than 50 years

library(ggimage)
library(extrafont)
## Registering fonts with R
loadfonts(device = "win")

df1_age <- data.frame(grp = c("18-29","30-39","40-49","50-59","60-69","70-79","80+"),
                  y = 7:1, 
                  img = rep("./man.png",7), 
                  freq = c(10.52,12.32,13.71,19.05,22.61,15.30,6.48))

df2_age <- df1_age[rep(seq_len(nrow(df1_age)), df1_age$freq), 1:3]

df2_age <- df2_age %>% group_by(grp) %>% mutate(x = row_number())

ggplot(df2_age) + 
  geom_image(aes(x = x, 
                 y = y, 
                 image = img), 
             size = 0.045) + 
  theme_void() +
  geom_text(data = df1_age, 
            aes(y = y + 0.1, 
                label = paste(freq,"%", sep = "")), 
            x = 24,
            fontface = 2, 
            size = 4.5, 
            color = "blue") +
  geom_text(data = df1_age,
            aes(y = y + 0.2,
                label = grp), 
            x = -0.3, 
            fontface = 2, 
            size = 4.5, 
            color = "black") +
  xlim(-1,max(df1_age$freq)+2) + 
  ylim(0,max(df1_age$y)+1)

B2. GENDER

tbl_gender <- table(BRFSS$GENDER)
tbl_gender
## 
## Female   Male 
## 145904 133668
prop.table(tbl_gender)
## 
##    Female      Male 
## 0.5218835 0.4781165
library(png)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
genderselection <- read.table(text="
  Gender Freq
     F   52
     M   48
", header=T)
pcts <- round(prop.table(genderselection$Freq)*100)

# Load png file with man and woman
img <- readPNG("manfemale.png")
h <- dim(img)[1]
w <- dim(img)[2]

# Find the rows where feet starts and head ends
pos1 <- which(apply(img[,,1], 1, function(y) any(y==1)))
mn1 <- min(pos1)
mx1 <- max(pos1)
pospctM <- round((mx1-mn1)*pcts[2]/100+mn1)
pospctF <- round((mx1-mn1)*pcts[1]/100+mn1)

# Fill bodies with a different color according to percentages
imgmtx <- img[h:1,,1]
whitemtx <- (imgmtx==1)
colmtx <- matrix(rep(FALSE,h*w),nrow=h)
midpt <- round(w/2)-10
colmtx[mx1:pospctM,1:midpt] <- TRUE
colmtx[mx1:pospctF,(midpt+1):w] <- TRUE
imgmtx[whitemtx & colmtx] <- 0.5

# Plot matrix using heatmap and print text
labs <- c(paste0(pcts[2], "% Males"),paste0(pcts[1], "% Females"))
ax <- list(ticks = '', 
           showticklabels = FALSE, 
           showgrid = FALSE, 
           zeroline = FALSE)
p <- plot_ly(z = imgmtx, 
             showscale = FALSE, 
             type = 'heatmap', 
             width = 500,  
             height = 500) %>%
     add_text(x = c(100,250), y = c(20,20), type = 'heatmap', mode = "text",
        text = labs, showlegend = FALSE, textfont = list(size = 20, color = "#FFFFFF"), inherit = FALSE) %>%
     layout(xaxis = ax,  yaxis = ax)  
p

B3. RACE

BRFSS$RACE <- factor(BRFSS$RACE,
                      levels = c("AIAN",
                                 "Asian",
                                 "Black",
                                 "Hispanic",
                                 "White",
                                 "Others"), ordered = TRUE)
BRFSS %>% 
  group_by(RACE) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(Agegroup = factor(RACE),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100)) %>% 
  ggplot(aes(x = RACE, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "blue", 
           fill = "royalblue4",
           width = 0.75) +
  geomtext_bar +
  labs(x = "Race",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

The majority of participants are white, followed by black, with very few Asian or American Indians

Alternative graphs

library(treemapify)
library(treemap)
## Warning: package 'treemap' was built under R version 3.6.3
plotdata2 <- BRFSS %>%
  dplyr::count(RACE)

ggplot(plotdata2, 
       aes(fill = RACE, 
           area = n,
           label = RACE)) +
  
  geom_treemap() + 
  geom_treemap_text(colour = "dark blue",
                    place = "centre",
                    fontface = "italic") +
  labs(title = "Race of respondents") +
  
  scale_fill_brewer(palette = "RdYlBu")+
  theme(legend.position = "none")

Regroup AIAN, Asian, Multiracial and other into a group OTHERs

df2_race <- data.frame(
  RACE = c("White", "Black","Hispanic", "Others"),
  Percentage = c(78.96, 7.73, 6.69, 6.62))

df3_race <- df2_race %>%
  arrange(desc(RACE)) %>%
  mutate(lab.pos = cumsum(Percentage),
         centre =  lab.pos - 0.5*Percentage)

ggplot(data = df3_race, 
       aes(x = "", 
           y = Percentage, 
           fill = RACE))+
  geom_bar(stat = "identity")+
  coord_polar("y") +
  geom_text(aes(y = centre, 
                label = paste(Percentage,"%", sep = "")), 
            col = "white") +
  theme_void() +
  scale_fill_manual(values = col4)

library(ggforce) # for 'geom_arc_bar'
## 
## Attaching package: 'ggforce'
## The following objects are masked from 'package:ggpol':
## 
##     geom_circle, GeomCircle, stat_circle, StatCircle
BRFSS %>% 
  group_by(RACE) %>% 
  dplyr::count()
## # A tibble: 6 x 2
## # Groups:   RACE [6]
##   RACE          n
##   <ord>     <int>
## 1 AIAN       4652
## 2 Asian      5670
## 3 Black     21602
## 4 Hispanic  18717
## 5 White    220761
## 6 Others     8170
df3_race <- data.frame(
  RACE = c("White", "Black","Hispanic", "Others"),
  Value = c(220761, 21602   , 18717, 18492))

df3_race <- df3_race %>% 
  mutate(end = 2 * pi * cumsum(Value)/sum(Value),
         start = lag(end, default = 0),
         middle = 0.5 * (start + end),
         hjust = ifelse(middle > pi, 1, 0),
         vjust = ifelse(middle < pi/2 | middle > 3 * pi/2, 0, 1))

df3_race$Label <- paste(round(((df3_race$Value/sum(df3_race$Value))*100),2),"%")

ggplot(df3_race) + 
  geom_arc_bar(aes(x0 = 0, 
                   y0 = 0, 
                   r0 = 0, 
                   r = 1,
                   start = start, 
                   end = end, 
                   fill = RACE)) +
  scale_fill_manual(values = col4) +
  geom_text(aes(x = 1.05 * sin(middle), 
                y = 1.05 * cos(middle), 
                label = Label,
                hjust = hjust, 
                vjust = vjust)) +
  coord_fixed() +
  scale_x_continuous(limits = c(-1.5, 1.5),  # Adjust so labels are not cut off
                     name = "", breaks = NULL, labels = NULL) +
  scale_y_continuous(limits = c(-1.25,1.25),      # Adjust so labels are not cut off
                     name = "", breaks = NULL, labels = NULL)

xRace <- c(78.96, 7.72, 6.69, 6.63)
labels <- c("White", "Black","Hispanic", "Others")
pct <- xRace/sum(xRace)*100
lbls <- paste(labels, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(xRace,labels = lbls, col= col4)

AIAN 1.66 Asian 2.03 Black 7.73 Hispanic 6.69 White 78.96 Others 2.92

xRace2 <- c(78.96, 7.73, 6.69,1.66, 2.03,  2.92)
labels2 <- c("White", "Black", "Hispanic","AIAN", "Asian",  "Others")
pct2 <- round(xRace2/sum(xRace2)*100,2)
lbls2 <- paste(labels2, pct2) # add percents to labels
lbls2 <- paste(lbls2,"%",sep="") # ad % to labels
pie(xRace,labels = lbls2, col= col6)

B5. EDUCATION(EDU)

BRFSS$EDU <- factor(BRFSS$EDU,
                      levels = c("<8",
                                 "9-12",
                                 "12-15",
                                 "16+"), ordered = TRUE)    
BRFSS %>% 
  group_by(EDU) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(EDUgroup = factor(EDU),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100)) %>% 
  ggplot(aes(x = EDU , 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "blue", 
           fill = "royalblue4",
           width = 0.5) +
  geomtext_bar +
  labs(x = "Years of education",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

library(echarts4r)
## Warning: package 'echarts4r' was built under R version 3.6.3
## Welcome to echarts4r
## 
## Docs: echarts4r.john-coene.com
df2_edu <- data.frame(
  x = c("<8","9-12", "12-15", "16+"),
  y = c(1.64, 29.41, 27.99, 40.95))

df2_edu %>% 
  e_charts(x) %>% 
  e_pictorial(y, symbol = paste0("image://","https://1.bp.blogspot.com/-klwxpFekdEQ/XOubIhkalyI/AAAAAAAAHlE/25psl9x4oNkbJoLc2CKTXgV2pEj6tAvigCLcBGAs/s1600/pencil.png", size = 0.001)) %>%
  e_theme("westeros") %>% 
  e_labels(show = TRUE, postion = "top", fontSize = 20, color = "blue")%>%
  e_legend(show = FALSE) %>%
  e_x_axis(splitLine=list(show = FALSE,fontSize = 30)) %>%
  e_y_axis(show=FALSE, splitLine=list(show = FALSE,fontSize = 20))
## Warning in if (deparse(substitute(symbol)) %in% colnames(e$x$data[[1]])) symbol
## <- deparse(substitute(symbol)): the condition has length > 1 and only the first
## element will be used

B6. EMPLOYMENT

BRFSS$EMPL <- factor(BRFSS$EMPL,
                      levels = c("Employed",
                                 "Unemployed",
                                 "Occupations",
                                 "Retired"), ordered = TRUE)
tbl_empl <- table(BRFSS$EMPL)
pro_tbl_empl <- prop.table(tbl_empl)
pro_tbl_empl 
## 
##    Employed  Unemployed Occupations     Retired 
##  0.55142861  0.09831099  0.06583993  0.28442047
df_empl <- BRFSS %>% 
  group_by(EMPL) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(EMPLgroup = factor(EMPL),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

df_empl %>% 
  pander()
EMPL Count EMPLgroup Percentage label
Employed 154164 Employed 55.14 55.1%
Unemployed 27485 Unemployed 9.83 9.8%
Occupations 18407 Occupations 6.58 6.6%
Retired 79516 Retired 28.44 28.4%
ggplot(df_empl, 
       aes(x = EMPL, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "blue", 
           fill = "royalblue4",
           width = 0.5) +
  geomtext_bar +
  labs(x = "Employment status",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

B4. RELP (Relationship STATUS)

BRFSS %>% 
  group_by(RELP) %>% 
  dplyr::count()
## # A tibble: 4 x 2
## # Groups:   RELP [4]
##   RELP                 n
##   <fct>            <int>
## 1 In relationship 161013
## 2 Living apart     44731
## 3 Never married    45149
## 4 Widowed          28679
BRFSS$RELP <- factor(BRFSS$RELP,
                      levels = c("In relationship",
                                 "Living apart",
                                 "Never married",
                                 "Widowed"), ordered = TRUE)

The table is maybe better

df_RELP <- BRFSS %>% 
  group_by(RELP) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(RELPgroup = factor(RELP),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

df_RELP %>% 
  pander()
RELP Count RELPgroup Percentage label
In relationship 161013 In relationship 57.59 57.59%
Living apart 44731 Living apart 16 16.00%
Never married 45149 Never married 16.15 16.15%
Widowed 28679 Widowed 10.26 10.26%
ggplot(df_RELP, 
       aes(x = RELP, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "blue", 
           fill = "royalblue4",
           width = 0.5) +
  geomtext_bar +
  labs(x = "Relationship status",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

B5. INCOME (LEVELS of INCOME)

BRFSS$INCOME <- factor(BRFSS$INCOME,
                      levels = c("<$20K",
                                 "$20K-<$35K",
                                 "$35K-<$50K",
                                 "$50K-<$75K",
                                 "$75K+"), ordered = TRUE)
df_income <- BRFSS %>% 
  group_by(INCOME) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(Income_range = factor(INCOME),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

df_income %>% 
  pander()
INCOME Count Income_range Percentage label
<$20K 40024 <$20K 14.32 14.32%
$20K-<$35K 51546 $20K-<$35K 18.44 18.44%
$35K-<$50K 38677 $35K-<$50K 13.83 13.83%
$50K-<$75K 47127 $50K-<$75K 16.86 16.86%
$75K+ 102198 $75K+ 36.56 36.56%
ggplot(df_income, 
       aes(x = INCOME, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "blue", 
           fill = "royalblue4",
           width = 0.5) +
  geomtext_bar +
  labs(x = "Income range",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

For presentation

library(ggimage)
library(extrafont)
loadfonts(device = "win")

df1Income <- data.frame(grp = c("<$20K","$20K-<$35K","$35K-<$50K","$50K-<$75K","$75K+"),
                  y = 5:1, 
                  img = rep("./coin.jpg",5), 
                  freq=c(14.32,18.44,13.83,16.86,36.56))

df2Income<- df1Income[rep(seq_len(nrow(df1Income)), df1Income$freq), 1:3]

df2Income <- df2Income %>% group_by(grp) %>% mutate(x = row_number())

ggplot(df2Income) + 
  geom_image(aes(x = x, 
                 y = y, 
                 image = img), size = 0.045) + 
  theme_void() +
  geom_text(data = df1Income, aes(y = y + 0.1, label=paste(freq,"%", sep = "")),  x= -0.5,fontface = 2, size = 4, color="blue") +
  geom_text(data = df1Income, aes(y = y-.3, label = grp), x = 0.5, fontface = 2, size = 4, color="black") +
  xlim(-1,max(df1Income$freq)+2) + ylim(0,max(df1Income$y)+1)

Examining the association between income and education level The question we’d like to answer is: is edcuaiton level related to revenue?

ggplot(BRFSS, aes(x= EDU , group = INCOME)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="INCOME") +
    facet_grid(~ INCOME) +
    scale_y_continuous(labels = scales::percent)+
  labs(x = "Year of education",
       y = "Percent of respondents",
       caption = "Data source: BRFSS2018") +
  themebar_facet

df_EduIncome <- BRFSS %>% 
  group_by(EDU, INCOME) %>% 
  dplyr::summarise(Count =  n()) 

df_EduIncome 
## # A tibble: 20 x 3
## # Groups:   EDU [4]
##    EDU   INCOME     Count
##    <ord> <ord>      <int>
##  1 <8    <$20K       2373
##  2 <8    $20K-<$35K  1322
##  3 <8    $35K-<$50K   439
##  4 <8    $50K-<$75K   216
##  5 <8    $75K+        244
##  6 9-12  <$20K      21070
##  7 9-12  $20K-<$35K 22708
##  8 9-12  $35K-<$50K 12608
##  9 9-12  $50K-<$75K 11564
## 10 9-12  $75K+      14283
## 11 12-15 <$20K      11238
## 12 12-15 $20K-<$35K 16782
## 13 12-15 $35K-<$50K 12896
## 14 12-15 $50K-<$75K 14518
## 15 12-15 $75K+      22825
## 16 16+   <$20K       5343
## 17 16+   $20K-<$35K 10734
## 18 16+   $35K-<$50K 12734
## 19 16+   $50K-<$75K 20829
## 20 16+   $75K+      64846
library(tidyverse)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:plotly':
## 
##     arrange, mutate, rename, summarise
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
df2_EduIncome <- ddply(df_EduIncome,.(INCOME),transform, Percentage = Count*100/sum(Count))
df2_EduIncome 
##      EDU     INCOME Count Percentage
## 1     <8      <$20K  2373  5.9289426
## 2   9-12      <$20K 21070 52.6434140
## 3  12-15      <$20K 11238 28.0781531
## 4    16+      <$20K  5343 13.3494903
## 5     <8 $20K-<$35K  1322  2.5646995
## 6   9-12 $20K-<$35K 22708 44.0538548
## 7  12-15 $20K-<$35K 16782 32.5573274
## 8    16+ $20K-<$35K 10734 20.8241183
## 9     <8 $35K-<$50K   439  1.1350415
## 10  9-12 $35K-<$50K 12608 32.5981850
## 11 12-15 $35K-<$50K 12896 33.3428136
## 12   16+ $35K-<$50K 12734 32.9239600
## 13    <8 $50K-<$75K   216  0.4583360
## 14  9-12 $50K-<$75K 11564 24.5379506
## 15 12-15 $50K-<$75K 14518 30.8061196
## 16   16+ $50K-<$75K 20829 44.1975937
## 17    <8      $75K+   244  0.2387522
## 18  9-12      $75K+ 14283 13.9758117
## 19 12-15      $75K+ 22825 22.3340966
## 20   16+      $75K+ 64846 63.4513396
ggplot( )+
  geom_bar(data=df2_EduIncome, 
           aes(x = INCOME, 
               y = Percentage,
               fill = EDU),
               stat = "identity")+
   labs(y = "Percent of respondents (%)", 
        x = "Salary Bracket") +
  scale_fill_manual(values = col4) +
  scale_y_continuous(breaks = seq(0, 100, 20)) +
  themebar_stacked

ggplot(data=df2_EduIncome, aes(x = INCOME, 
                           y = Percentage, 
                           group = EDU)) +
  geom_line(linetype = "dashed", 
            aes(color = EDU), 
            size = 0.5) +
  geom_point(aes(color = EDU), 
             size = 6) +
  scale_color_manual(values = col4) +
   labs(y = "Percent of respondents (%)", x = "Salary Bracket") +
  theme_point +
  guides(color=guide_legend("Year of education")) 

ggplot(BRFSS, aes(x= INCOME , group = EDU)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col5) +
    labs(y = "Percent of respondents", fill = "INCOME") +
    facet_grid(~ EDU) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

df3_EduIncome <- ddply(df_EduIncome,.(EDU),transform, Percentage = Count*100/sum(Count))
df3_EduIncome 
##      EDU     INCOME Count Percentage
## 1     <8      <$20K  2373  51.654332
## 2     <8 $20K-<$35K  1322  28.776665
## 3     <8 $35K-<$50K   439   9.555943
## 4     <8 $50K-<$75K   216   4.701785
## 5     <8      $75K+   244   5.311276
## 6   9-12      <$20K 21070  25.622317
## 7   9-12 $20K-<$35K 22708  27.614218
## 8   9-12 $35K-<$50K 12608  15.332044
## 9   9-12 $50K-<$75K 11564  14.062481
## 10  9-12      $75K+ 14283  17.368939
## 11 12-15      <$20K 11238  14.360010
## 12 12-15 $20K-<$35K 16782  21.444179
## 13 12-15 $35K-<$50K 12896  16.478616
## 14 12-15 $50K-<$75K 14518  18.551221
## 15 12-15      $75K+ 22825  29.165975
## 16   16+      <$20K  5343   4.666946
## 17   16+ $20K-<$35K 10734   9.375819
## 18   16+ $35K-<$50K 12734  11.122757
## 19   16+ $50K-<$75K 20829  18.193491
## 20   16+      $75K+ 64846  56.640987
ggplot( )+
  geom_bar(data=df3_EduIncome, 
           aes(x = EDU, 
               y = Percentage,
               fill = INCOME),
               stat = "identity")+
   labs(y = "Percent of respondents (%)", 
        x = "Years of education") +
  scale_fill_manual(values = col5) +
  themebar_stacked 

ggplot(data=df3_EduIncome, aes(x = EDU, 
                           y = Percentage, 
                           group = INCOME)) +
  geom_line(linetype = "dashed", 
            aes(color = INCOME), 
            size = 0.5) +
  geom_point(aes(color = INCOME), 
             size = 6) +
  scale_color_manual(values = col5) +
  labs(y = "Percent of respondents (%)", x = "Year of education") +
  theme_point +
  guides(color=guide_legend("Salary Bracket")) 

GENDER and INCOME

df_GenderIncome <- BRFSS %>% 
  group_by(GENDER, INCOME) %>% 
  dplyr::summarise(Count =  n()) 

df_GenderIncome %>% 
  pander()
GENDER INCOME Count
Female <$20K 24445
Female $20K-<$35K 29608
Female $35K-<$50K 20421
Female $50K-<$75K 23800
Female $75K+ 47630
Male <$20K 15579
Male $20K-<$35K 21938
Male $35K-<$50K 18256
Male $50K-<$75K 23327
Male $75K+ 54568
df2_GenderIncome <- ddply(df_GenderIncome,.(INCOME),transform, Percentage = Count*100/sum(Count))
                      
df2_GenderIncome <- df2_GenderIncome %>% 
  mutate(pos = cumsum(Percentage) - (0.5 * Percentage))

df2_GenderIncome %>% 
  pander()
GENDER INCOME Count Percentage pos
Female <$20K 24445 61.08 30.54
Male <$20K 15579 38.92 80.54
Female $20K-<$35K 29608 57.44 128.7
Male $20K-<$35K 21938 42.56 178.7
Female $35K-<$50K 20421 52.8 226.4
Male $35K-<$50K 18256 47.2 276.4
Female $50K-<$75K 23800 50.5 325.3
Male $50K-<$75K 23327 49.5 375.3
Female $75K+ 47630 46.61 423.3
Male $75K+ 54568 53.39 473.3
ggplot( )+
  geom_bar(data = df2_GenderIncome, 
           aes(x = INCOME, 
               y = Percentage,
               fill = GENDER),
               stat = "identity") +
   labs(y = "Percent of respondents (%)", 
        x = "Salary Bracket") +
  scale_fill_manual(values = col2) +
  scale_y_continuous(breaks = seq(0, 100, 20)) +
  themebar_stacked

It is hard to see the difference in this graph

ggplot(BRFSS, aes(x= GENDER , 
                  group = INCOME)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.75) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col2) +
    labs(y = "Percent of respondents", fill="GENDER") +
    facet_grid(~ INCOME) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(data = df2_GenderIncome, 
       aes(x = INCOME,
           y = Percentage,
           group = GENDER)) +
  geom_line(linetype = "dashed", 
            aes(color = GENDER), 
            size = 0.5) +
  geom_point(aes(color = GENDER), 
             size = 6) +
  scale_color_manual(values = c("red","blue") ) +
  labs(x = "Salary Bracket",
       y = "Percent of respondents (%)") +
  theme_point +
  guides(color=guide_legend("GENDER")) 

ggplot(BRFSS, aes(x= INCOME , group = GENDER)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.6) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col5) +
    labs(y = "Percent of respondents", fill = "INCOME") +
    facet_grid(~  GENDER) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

library(DescTools)

Strong association : AGE, EMPL Moderate to strong : RELP,INCOME Weak to moderate : GENDER, EDU, RACE

Community

df_COMMUN <- BRFSS %>% 
  group_by(COMMUN) %>% 
  dplyr::summarise(Count = n()) %>% 
  ungroup()%>% 
  arrange(desc(COMMUN)) 

df_COMMUN
## # A tibble: 3 x 2
##   COMMUN    Count
##   <fct>     <int>
## 1 Urban    192173
## 2 Suburban  45058
## 3 Rural     42341
df2_COMMUN <- df_COMMUN %>% 
  mutate(Percentage = round(Count/sum(Count),4)*100,
         lab.pos = cumsum(Percentage)-0.5*Percentage)

df2_COMMUN
## # A tibble: 3 x 4
##   COMMUN    Count Percentage lab.pos
##   <fct>     <int>      <dbl>   <dbl>
## 1 Urban    192173       68.7    34.4
## 2 Suburban  45058       16.1    76.8
## 3 Rural     42341       15.1    92.4
geomtext_pie <- geom_text(aes(y = lab.pos,
                              label = paste(Percentage,"%", sep = "")),
                          col = "blue",
                          size = 5)
ggplot(data = df2_COMMUN, 
       aes(x = "", 
           y = Percentage, 
           fill = COMMUN)) +
  geom_bar(stat = "identity") +
  coord_polar("y") +
  geomtext_pie +
  theme_void() +
    theme(legend.title = element_text(color = "black", size = 14),
        legend.text = element_text(color = "black", size = 14)) +
  scale_fill_manual(values = col3)

Xcommun <- c(68.74, 16.12, 15.14)
labels2 <- c("Urban", "Suburban","Rural")
pct2 <- Xcommun/sum(Xcommun)*100
lbls2 <- paste(labels2, pct2) # add percents to labels
lbls2 <- paste(lbls2,"%",sep="") # ad % to labels
pie(Xcommun,labels = lbls2, col= col3)

RENT/OWN

library(dplyr)
library(plyr)
library(pander)
Xownerchip <- c(72.68, 23.55, 3.78)
labels3 <- c("Own", "Rent","Other situation")
pct3 <-Xownerchip /sum(Xownerchip )*100
lbls3 <- paste(labels3, round(pct3,2) )# add percents to labels
lbls3 <- paste(lbls3,"%" ,sep="") # ad % to labels
pie(Xownerchip ,labels = lbls3, col= col3)

Lifestyle (C) Health behaviors

BRFSS %>% 
  group_by(SMOKESTAT) %>% 
  dplyr::count() %>% 
  pander()
SMOKESTAT n
Daily smoker 29247
Former smoker 80542
Never smoker 158075
Someday smoker 11708
BRFSS$SMOKESTAT <- factor(BRFSS$SMOKESTAT,
                      levels = c("Daily smoker",
                                 "Someday smoker",
                                 "Former smoker",
                                 "Never smoker"), ordered = TRUE)
df_SMOKESTAT <- BRFSS %>% 
  group_by(SMOKESTAT) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(SMOKESTATgroup = factor(SMOKESTAT),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

ggplot(df_SMOKESTAT, 
       aes(x = SMOKESTAT, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "olivedrab4", 
           fill = "olivedrab4",
           width = 0.5) +
  scale_y_continuous(breaks = seq(0,60,10)) +
  geomtext_bar +
  labs(x = "Smoker status",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

C4.EXERCISE

df_EXERpa30 <- BRFSS %>% 
  group_by(EXERpa30) %>% 
  dplyr::summarise(Count = n()) %>% 
  ungroup()%>% 
  arrange(desc(EXERpa30)) 

df2_EXERpa30 <- df_EXERpa30 %>% 
  mutate(Percentage = round(Count/sum(Count),4)*100,
         lab.pos = cumsum(Percentage)-0.5*Percentage)

df2_EXERpa30 %>% 
  pander()
EXERpa30 Count Percentage lab.pos
Yes 217106 77.66 38.83
No 62466 22.34 88.83
ggplot(data = df2_EXERpa30, 
       aes(x = "", 
           y = Percentage, 
           fill = EXERpa30))+
  geom_bar(stat = "identity")+
  coord_polar("y") +
  geomtext_pie +
  theme_void() +
  theme(legend.title = element_text(color = "black", size = 14),
        legend.text = element_text(color = "black", size = 14)) +
  scale_fill_manual(values = c("steelblue","olivedrab4"))

C2.ALCpa30

df_ALCpa30 <- BRFSS %>% 
  group_by(ALCpa30) %>% 
  dplyr::summarise(Count = n())

df_ALCpa30 %>% 
  pander()
ALCpa30 Count
0 123079
1-9 98975
10-30 57518
ggplot(df_ALCpa30, 
       aes(x = ALCpa30, 
           y = Count)) +
  geom_bar(stat = "identity",
           color = "olivedrab4", 
           fill = "olivedrab4",
           width = 0.5) +
  geom_text(aes(label=paste0(Count)), 
             vjust = -0.3,
             size = 4,
             color= "black",
             fontface = "bold") +
  labs(x = "# of days where people have at least one drink",
       y = "Frequency",
       caption = "Data source: BRFSS2018") +
  theme_bar

df_ALCpa30 <- BRFSS %>% 
  group_by(ALCpa30) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(ALCpa30group = factor(ALCpa30),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

ggplot(df_ALCpa30, 
       aes(x = ALCpa30, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "olivedrab4", 
           fill = "olivedrab4",
           width = 0.45) +
  geomtext_bar +
  labs(x = "Number of days where people have at least one drink",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

SMOKESTAT : strong assocation with the outcome Maybe we should remove all the data that were collected during the past 30 days

C4. SLEPTIM1

library(ggforce)

Numerical Data Descriptive Statistics

ggplot(data = BRFSS, aes(SLEPTIM1)) + 
  geom_bar(stat= "count", 
                 fill= "olivedrab4") +  
  coord_cartesian(clip = "off") +
  labs( x = "Sleep duration (hours)",
        y = "Frequency",
        subtitle = "On average, how many hours of sleep do you get in a 24-hour period?",
        caption = "Data source: BRFSS2018") + 
  scale_y_continuous(limits = c(0, 100000), expand = c(0, 0)) +
  scale_x_continuous(limits = c(0, 15), expand = c(0, 0)) +
  theme_bar

summary(BRFSS$SLEPTIM1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   6.000   7.000   7.018   8.000  12.000
ggplot(BRFSS, 
       aes(x = PULMOND, 
           y = SLEPTIM1)) +
  geom_violin(fill = "cornflowerblue") +
  geom_boxplot(width = .2, 
               fill = "orange",
               outlier.color = "orange",
               outlier.size = 2) +
  labs(title = "Sleep duration distribution by PULMOND status")

ggplot(BRFSS, 
       aes(x = MICHD, 
           y = SLEPTIM1, fill = MICHD)) +
  geom_violin(trim = FALSE,fill = "cornflowerblue") +
  geom_boxplot(width = 0.1, 
               fill = "grey",
               outlier.color = "grey",
               outlier.size = 2) +
  labs(title = "Sleep duration distribution by MICHD status") 

D1.GENHLTH

BRFSS$GENHLTH <- factor(BRFSS$GENHLTH,
                                levels =  c("Poor",
                                            "Fair",
                                            "Good",
                                            "Very Good",
                                            "Excellent"), ordered = TRUE)
df_GENHLTH <- BRFSS %>% 
  group_by(GENHLTH) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(SMOKESTATgroup = factor(GENHLTH),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

ggplot(df_GENHLTH, 
       aes(x = GENHLTH, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "purple4", 
           fill = "purple4",
           width = 0.6) +
  scale_y_continuous(breaks = seq(0,60,10)) +
  geomtext_bar +
  labs(x = "In general, your health is:",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

ggplot(BRFSS, aes(x = GENHLTH, group = INCOME)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col5) +
    labs(y = "Percent of respondents", fill = "INCOME") +
    facet_grid(~ INCOME) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

ggplot(BRFSS, aes(x = INCOME , group = GENHLTH)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col5) +
    labs(y = "Percent of respondents", fill = "GENHLTH") +
    facet_grid(~  GENHLTH) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

Healthy Days (E)— Health Related Quality of Life E1. PHYSHLTH (Number of Days Physical Health Not Good) 3 level not good physical health status: 0 days, 1-9 days, 10-30 days E2. MENTHLTH (Number of Days Mental Health Not Good) 3 level not good mental health status: 0 days, 1-9 days, 10-30 days

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(grid)
p_PHYSHLTH <- ggplot(BRFSS, aes(x = PHYSHLTHpa30)) +
  geom_bar(aes(y = (..count..)/sum(..count..)),
           fill = "purple4",
           width = 0.6) +
  scale_y_continuous(labels = percent,
                     limits = c(0, 0.7))+
  geom_text(size = 5,
            color= "black",
            aes(y = ((..count..)/sum(..count..)),
                label = scales::percent((..count..)/sum(..count..))),
            stat = "count", 
            vjust = -0.5) +
  labs(x = "Physical Health Not Good",
       y = "Percent of Respondents") +
  theme_bar


p_MENTHLTH <- ggplot(BRFSS, aes(x = MENTHLTHpa30)) +
  geom_bar(aes(y = (..count..)/sum(..count..)),
           fill = "purple4",
           width = 0.6) +
  scale_y_continuous(labels = percent,
                     limits = c(0, 0.7))+
  geom_text(size = 5,
            color= "black",
            aes(y = ((..count..)/sum(..count..)),
                label = scales::percent((..count..)/sum(..count..))),
            stat = "count", 
            vjust = -0.5) +
  labs(x = "Mental Health Not Good",
       y = "Percent of Respondents") +
  theme_bar


grid.arrange(p_PHYSHLTH, p_MENTHLTH, ncol = 2)

HRQOL <- data.frame(
  Health = c("Physical","Physical","Physical",
          "Mental","Mental","Mental"),
  NumberofDays = rep(c("0", "1-9", "10-30"),2),
  Percentage = c(63.5, 21.4, 15.1,
                 66.3, 19.7, 14.0 ))
HRQOL %>% 
  pander()
Health NumberofDays Percentage
Physical 0 63.5
Physical 1-9 21.4
Physical 10-30 15.1
Mental 0 66.3
Mental 1-9 19.7
Mental 10-30 14
ggplot(data = HRQOL,
       aes(x = Health,
       y = Percentage,
       fill = NumberofDays)) + 
  geom_bar(stat = "identity",
           position = "dodge",
           size = 0.25)+
  scale_y_continuous(limits = c(0, 70))+
  scale_fill_manual(values= col3)+
  geom_text(aes(label = paste0(Percentage,"%")), 
            position = position_dodge(width = 0.9), 
            vjust= -0.25,
            size = 5,
            color = "black") +
  labs(x = "Health related quality of life",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
 theme_bar

ggplot(data = HRQOL,
       aes(x = NumberofDays,
       y = Percentage,
       fill = Health)) + 
  geom_bar(stat = "identity",
           position = "dodge")+
  scale_y_continuous(limits = c(0, 70))+
  scale_fill_manual(values= col2)+
  geom_text(aes(label = paste0(Percentage,"%")), 
            position = position_dodge(width = 0.9), 
            vjust = -0.25,
            size = 4.5,
            color = "black") +
  labs(x = "Number of days where health is not good",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

ggplot(BRFSS, aes(x = GENHLTH, group = EMPL)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 5, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col5) +
    labs(y = "Percent of respondents", fill = "EMPL") +
    facet_grid(~  EMPL) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

ggplot(BRFSS, aes(x = MENTHLTHpa30, group = EMPL )) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.75) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col3) +
    labs(y = "Percent of respondents", fill = "EMPL") +
    facet_grid(~  EMPL) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

ggplot(BRFSS, aes(x = PHYSHLTHpa30, group = EMPL )) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.75) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col3) +
    labs(y = "Percent of respondents", fill = "EMPL") +
    facet_grid(~  EMPL) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

ggplot(BRFSS, aes(x = GENHLTH, group = EXERpa30)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.75) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col5) +
    labs(y = "Percent of respondents", fill = "EMPL") +
    facet_grid(~ EXERpa30) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

ggplot(BRFSS, aes(x = MENTHLTHpa30, group = EXERpa30)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.6) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col3) +
    labs(y = "Percent of respondents", fill = "EMPL") +
    facet_grid(~ EXERpa30) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

ggplot(BRFSS, aes(x = PHYSHLTHpa30, group = EXERpa30)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.6) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col3) +
    labs(y = "Percent of respondents", fill = "EMPL") +
    facet_grid(~ EXERpa30) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

Health Care Access (F) F1. HPLN1 (Have any health care coverage) ==> HPLN F2. MEDCOST (Could Not See Doctor Because of Cost) F3. CHECKUP1

df_HLTHPLN <- BRFSS %>% 
  group_by(HLTHPLN) %>% 
  dplyr::summarise(Count = n()) %>% 
  ungroup()%>% 
  arrange(desc(HLTHPLN)) 

df2_HLTHPLN <- df_HLTHPLN %>% 
  mutate(Percentage = round(Count/sum(Count),4)*100,
         lab.pos = cumsum(Percentage)-0.5*Percentage)

df2_HLTHPLN %>% 
  pander()
HLTHPLN Count Percentage lab.pos
Yes 259509 92.82 46.41
No 20063 7.18 96.41
ggplot(data = df2_HLTHPLN, 
       aes(x = "", 
           y = Percentage, 
           fill = HLTHPLN))+
  geom_bar(stat = "identity")+
  coord_polar("y") +
  geom_text(aes(y = lab.pos, 
                label = paste(Percentage,"%", sep = "")), col = "white", size = 4) +
  theme_void() +
  scale_fill_manual(values = c("steelblue","darkgreen")) +
  theme(legend.title = element_text(color = "black", size = 14),
        legend.text = element_text(color = "black", size = 14))

df_MEDICOST <- BRFSS %>% 
  group_by(MEDICOST) %>% 
  dplyr::summarise(Count = n()) %>% 
  ungroup()%>% 
  arrange(desc(MEDICOST)) 

df2_MEDICOST <- df_MEDICOST %>% 
  mutate(percentage = round(Count/sum(Count),4)*100,
         lab.pos = cumsum(percentage)-0.5*percentage)

df2_MEDICOST  
## # A tibble: 2 x 4
##   MEDICOST  Count percentage lab.pos
##   <fct>     <int>      <dbl>   <dbl>
## 1 Yes       27236       9.74    4.87
## 2 No       252336      90.3    54.9
ggplot(data = df2_MEDICOST, 
       aes(x = "", 
           y = percentage, 
           fill = MEDICOST))+
  geom_bar(stat = "identity")+
  coord_polar("y") +
  geom_text(aes(y = lab.pos, 
                label = paste(percentage,"%", sep = "")), col = "white", size = 4) +
  theme_void() +
  scale_fill_manual(values = c("steelblue","darkgreen"))+
   theme(legend.title = element_text(color = "black", size = 14),
        legend.text = element_text(color = "black", size = 14))

df_CHECKUP <- BRFSS %>% 
  group_by(CHECKUP) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(CHECKUPgroup = factor(CHECKUP),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

ggplot(df_CHECKUP, 
       aes(x = CHECKUP, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "darkgreen", 
           fill = "darkgreen",
           width = 0.6) +
  scale_y_continuous(breaks = seq(0,100,20)) +
  geomtext_bar +
  labs(x = "Last visited a doctor for a routine checkup (Year)",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

BMICAT

BRFSS$BMICAT <- factor(BRFSS$BMICAT,
                            levels = c("Underweight",
                                       "Normal",
                                       "Overweight",
                                       "Obese"), ordered = TRUE)
df_BMICAT <- BRFSS %>% 
  group_by(BMICAT) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(BMICATgroup = factor(BMICAT),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

ggplot(df_BMICAT, 
       aes(x = BMICAT, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "firebrick4", 
           fill = "firebrick4",
           width = 0.6) +
  scale_y_continuous(breaks = seq(0,50,10)) +
  geomtext_bar +
  labs(x = "BMI categories",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

BMICAT and EDUCATION

ggplot(BRFSS, aes(x= BMICAT , group = EDU)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill = "BMICATE") +
    facet_grid(~ EDU) +
    scale_y_continuous(labels = scales::percent) +
  theme_bar

df_EduBMI <- BRFSS %>% 
  group_by(EDU, BMICAT) %>% 
  dplyr::summarise(Count =  n()) 

df2_EduBMI <- ddply(df_EduBMI,.(EDU),transform, Percentage = Count*100/sum(Count))

df2_EduBMI
##      EDU      BMICAT Count Percentage
## 1     <8 Underweight    75   1.632564
## 2     <8      Normal  1065  23.182412
## 3     <8  Overweight  1711  37.244232
## 4     <8       Obese  1743  37.940792
## 5   9-12 Underweight  1449   1.762066
## 6   9-12      Normal 21689  26.375056
## 7   9-12  Overweight 29002  35.268080
## 8   9-12       Obese 30093  36.594798
## 9  12-15 Underweight  1086   1.387700
## 10 12-15      Normal 21402  27.347653
## 11 12-15  Overweight 27732  35.436180
## 12 12-15       Obese 28039  35.828467
## 13   16+ Underweight  1400   1.222857
## 14   16+      Normal 38916  33.991929
## 15   16+  Overweight 42820  37.401953
## 16   16+       Obese 31350  27.383261
ggplot(data = df2_EduBMI, 
       aes(x = EDU, 
           y = Percentage,
           group = BMICAT)) +
  geom_line(linetype = "dashed", 
            aes(color = BMICAT), 
            size = 0.5) +
  geom_point(aes(color = BMICAT), 
             size = 6) +
  scale_color_manual(values = col4) +
  labs(x = "Education year",
       y = "Percent of respondents (%)") +
  theme_point +
  guides(color=guide_legend("Education year"))

ggplot(BRFSS, aes(x =  AGE, group =  BMICAT)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col7) +
    labs(y = "Percent of respondents", fill = "EDU") +
    facet_grid(~ BMICAT ) +
    scale_y_continuous(labels = scales::percent)+
  theme_bar

df_BMIage <- BRFSS %>% 
  group_by(AGE, BMICAT) %>% 
  dplyr::summarise(Count =  n()) 

df_BMIage
## # A tibble: 28 x 3
## # Groups:   AGE [7]
##    AGE   BMICAT      Count
##    <fct> <ord>       <int>
##  1 18-29 Underweight   892
##  2 18-29 Normal      12996
##  3 18-29 Overweight   8765
##  4 18-29 Obese        6762
##  5 30-39 Underweight   431
##  6 30-39 Normal      10903
##  7 30-39 Overweight  11740
##  8 30-39 Obese       11376
##  9 40-49 Underweight   350
## 10 40-49 Normal      10107
## # ... with 18 more rows
ggplot(data=df_BMIage, aes(x=AGE, 
                           y = Count, 
                           group=BMICAT)) +
  geom_line(linetype="dashed", 
            aes(color = BMICAT), 
            size=1) +
  geom_point(aes(color = BMICAT), 
             size= 6) +
  scale_color_manual(values= col4) +
  scale_y_continuous(breaks = seq(0,50000,10000)) +
  labs(x = "Age group",
       y = "Frequency",
       caption = "Data source: BRFSS2018") +
  theme_point

How to calculate proportion by age group?

df2_BMIage <- ddply(df_BMIage,.(AGE),transform, Percentage = Count*100/sum(Count))
df2_BMIage 
##      AGE      BMICAT Count Percentage
## 1  18-29 Underweight   892  3.0324664
## 2  18-29      Normal 12996 44.1815400
## 3  18-29  Overweight  8765 29.7977223
## 4  18-29       Obese  6762 22.9882713
## 5  30-39 Underweight   431  1.2510885
## 6  30-39      Normal 10903 31.6487663
## 7  30-39  Overweight 11740 34.0783745
## 8  30-39       Obese 11376 33.0217707
## 9  40-49 Underweight   350  0.9128847
## 10 40-49      Normal 10107 26.3615023
## 11 40-49  Overweight 13354 34.8304643
## 12 40-49       Obese 14529 37.8951487
## 13 50-59 Underweight   533  1.0008826
## 14 50-59      Normal 13463 25.2812048
## 15 50-59  Overweight 19398 36.4261168
## 16 50-59       Obese 19859 37.2917958
## 17 60-69 Underweight   734  1.1610985
## 18 60-69      Normal 16629 26.3050494
## 19 60-69  Overweight 23827 37.6914072
## 20 60-69       Obese 22026 34.8424450
## 21 70-79 Underweight   603  1.4092736
## 22 70-79      Normal 11924 27.8676264
## 23 70-79  Overweight 17015 39.7658222
## 24 70-79       Obese 13246 30.9572777
## 25   80+ Underweight   467  2.5786858
## 26   80+      Normal  7050 38.9287686
## 27   80+  Overweight  7166 39.5692987
## 28   80+       Obese  3427 18.9232468
ggplot(data=df2_BMIage, aes(x=AGE, 
                           y = Percentage, 
                           group=BMICAT)) +
  geom_line(linetype  = "dashed", 
            aes(color = BMICAT), 
            size = 0.5) +
  geom_point(aes(color = BMICAT), 
             size= 6) +
  scale_color_manual(values= col4) +
   labs(y = "Percent of respondents (%)", x = "Age range") +
        theme_point

**INCOME and BMI

df_BMIincome <- BRFSS %>% 
  group_by(INCOME, BMICAT) %>% 
  dplyr::summarise(Count =  n())

df2_BMIincome <- ddply(df_BMIincome,.(INCOME),transform, Percentage = Count*100/sum(Count))
df2_BMIincome
##        INCOME      BMICAT Count Percentage
## 1       <$20K Underweight  1048  2.6184289
## 2       <$20K      Normal 11416 28.5228863
## 3       <$20K  Overweight 12299 30.7290626
## 4       <$20K       Obese 15261 38.1296222
## 5  $20K-<$35K Underweight   955  1.8527141
## 6  $20K-<$35K      Normal 14914 28.9333799
## 7  $20K-<$35K  Overweight 17685 34.3091607
## 8  $20K-<$35K       Obese 17992 34.9047453
## 9  $35K-<$50K Underweight   524  1.3548104
## 10 $35K-<$50K      Normal 10930 28.2596892
## 11 $35K-<$50K  Overweight 14057 36.3445976
## 12 $35K-<$50K       Obese 13166 34.0409029
## 13 $50K-<$75K Underweight   498  1.0567191
## 14 $50K-<$75K      Normal 13355 28.3383199
## 15 $50K-<$75K  Overweight 17550 37.2397988
## 16 $50K-<$75K       Obese 15724 33.3651622
## 17      $75K+ Underweight   985  0.9638153
## 18      $75K+      Normal 32457 31.7589385
## 19      $75K+  Overweight 39674 38.8207206
## 20      $75K+       Obese 29082 28.4565256
ggplot(data=df2_BMIincome, aes(x= INCOME, 
                           y = Percentage, 
                           group=BMICAT)) +
  geom_line(linetype="dashed", 
            aes(color = BMICAT), 
            size = 0.5) +
  geom_point(aes(color =BMICAT), 
             size= 6) +
  scale_color_manual(values=  col4) +
   labs(y = "Percent of respondents (%)", x = "Income bracket") +
  theme_point

Overweight increase with income, from 30% for 20K to 38% for 75K+ Obsese decreses with income, from 36.9 % for 20 K to 28.9 for 75K+ The normal weight is similar for all The underweight decreases from 2.5% to 1 %.

ggplot(data=df2_BMIincome, aes(x= BMICAT, 
                           y = Percentage, 
                           group = INCOME)) +
  geom_line(linetype="dashed", 
            aes(color = INCOME), 
            size = 0.5) +
  geom_point(aes(color = INCOME), 
             size= 6) +
  scale_color_manual(values=  col5) +
   labs(y = "Percent of respondents (%)", x = "BMI categories") +
  theme_point

df3_BMIincome <- ddply(df_BMIincome,.(BMICAT),transform, Percentage = Count*100/sum(Count))
df3_BMIincome
##        INCOME      BMICAT Count Percentage
## 1       <$20K Underweight  1048   26.13466
## 2  $20K-<$35K Underweight   955   23.81546
## 3  $35K-<$50K Underweight   524   13.06733
## 4  $50K-<$75K Underweight   498   12.41895
## 5       $75K+ Underweight   985   24.56359
## 6       <$20K      Normal 11416   13.74230
## 7  $20K-<$35K      Normal 14914   17.95310
## 8  $35K-<$50K      Normal 10930   13.15726
## 9  $50K-<$75K      Normal 13355   16.07642
## 10      $75K+      Normal 32457   39.07093
## 11      <$20K  Overweight 12299   12.14536
## 12 $20K-<$35K  Overweight 17685   17.46408
## 13 $35K-<$50K  Overweight 14057   13.88140
## 14 $50K-<$75K  Overweight 17550   17.33077
## 15      $75K+  Overweight 39674   39.17839
## 16      <$20K       Obese 15261   16.72897
## 17 $20K-<$35K       Obese 17992   19.72266
## 18 $35K-<$50K       Obese 13166   14.43245
## 19 $50K-<$75K       Obese 15724   17.23650
## 20      $75K+       Obese 29082   31.87942
ggplot(data=df3_BMIincome, 
       aes(x= BMICAT, 
           y = Percentage,
           group = INCOME)) +
  geom_line(linetype = "dashed", 
            aes(color = INCOME), 
            size = 0.5) +
  geom_point(aes(color = INCOME), 
             size= 6) +
  scale_color_manual(values= col5) +
   labs(x = "BMI categories",
        y = "Percent of respondents (%)") +
     theme_point

ggplot(data = df3_BMIincome, 
       aes(x = INCOME, 
           y = Percentage,
           group = BMICAT)) +
  geom_line(linetype = "dashed", 
            aes(color = BMICAT), 
            size = 0.5) +
  geom_point(aes(color = BMICAT), 
             size= 6) +
  scale_color_manual(values= col5) +
   labs(x = "Income range",
        y = "Percent of respondents (%)") +
     theme_point

df_INCOMEBMI <- BRFSS %>% 
  group_by(INCOME, BMICAT) %>% 
  dplyr::summarise(Count =  n()) 

df_INCOMEBMI 
## # A tibble: 20 x 3
## # Groups:   INCOME [5]
##    INCOME     BMICAT      Count
##    <ord>      <ord>       <int>
##  1 <$20K      Underweight  1048
##  2 <$20K      Normal      11416
##  3 <$20K      Overweight  12299
##  4 <$20K      Obese       15261
##  5 $20K-<$35K Underweight   955
##  6 $20K-<$35K Normal      14914
##  7 $20K-<$35K Overweight  17685
##  8 $20K-<$35K Obese       17992
##  9 $35K-<$50K Underweight   524
## 10 $35K-<$50K Normal      10930
## 11 $35K-<$50K Overweight  14057
## 12 $35K-<$50K Obese       13166
## 13 $50K-<$75K Underweight   498
## 14 $50K-<$75K Normal      13355
## 15 $50K-<$75K Overweight  17550
## 16 $50K-<$75K Obese       15724
## 17 $75K+      Underweight   985
## 18 $75K+      Normal      32457
## 19 $75K+      Overweight  39674
## 20 $75K+      Obese       29082
ggplot(data = df_INCOMEBMI , aes(x = INCOME, 
                           y = Count, 
                           group = BMICAT)) +
  geom_line(linetype = "dashed", 
            aes(color = BMICAT), 
            size = 0.5) +
  geom_point(aes(color = BMICAT), 
             size= 6) +
  scale_color_manual(values = col4) +
   scale_y_continuous(breaks = seq(0,20000,5000)) +
  labs(x = "Income braket",
       y = "Frequency",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 12, face="bold"),
        legend.position = "none",
        plot.caption = element_text(color = "grey", size = 12, face = "italic")) +
  theme(legend.position="top")

by using face_grid (margin = true), we will obtain an grid of total

ggplot(BRFSS, aes(x= BMICAT, group = RACE)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat = "count",
             width = 0.6) +
    geom_text(size = 4.5, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat = "count",
              color = "black",
              vjust = -0.5) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", 
         x = "BMI categories") +
    facet_grid(~ RACE) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

Table is maybe the best option here

df_BMIrace <- BRFSS %>% 
  group_by(RACE, BMICAT) %>% 
  dplyr::summarise(Count =  n())

df2_BMIrace <- ddply(df_BMIrace,.(RACE),transform, Percentage = Count*100/sum(Count))

df2_BMIrace %>% 
  pander()
RACE BMICAT Count Percentage
AIAN Underweight 76 1.634
AIAN Normal 1045 22.46
AIAN Overweight 1567 33.68
AIAN Obese 1964 42.22
Asian Underweight 187 3.298
Asian Normal 2737 48.27
Asian Overweight 1962 34.6
Asian Obese 784 13.83
Black Underweight 253 1.171
Black Normal 4577 21.19
Black Overweight 7269 33.65
Black Obese 9503 43.99
Hispanic Underweight 248 1.325
Hispanic Normal 4871 26.02
Hispanic Overweight 7073 37.79
Hispanic Obese 6525 34.86
White Underweight 3121 1.414
White Normal 67495 30.57
White Overweight 80612 36.52
White Obese 69533 31.5
Others Underweight 125 1.53
Others Normal 2347 28.73
Others Overweight 2782 34.05
Others Obese 2916 35.69

Obese : AIAN, Black, Multiracial Obese + Overweight : AsiaN IS THE LEASt UNDERWEIGHT : ASIAN

ggplot(BRFSS, aes(x= GENDER , group = BMICAT)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.75) +
    geom_text(size = 4.5, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col2) +
    labs(y = "Percent of respondents", fill="GENDER") +
    facet_grid(~ BMICAT) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

BRFSS %>% 
  filter(BMICAT == "Obese") %>% 
  group_by(GENDER) %>% 
  dplyr::count()
## # A tibble: 2 x 2
## # Groups:   GENDER [2]
##   GENDER     n
##   <fct>  <int>
## 1 Female 47853
## 2 Male   43372
BRFSS %>% 
  filter(BMICAT == "Underweight") %>% 
  group_by(GENDER) %>% 
  dplyr::count()
## # A tibble: 2 x 2
## # Groups:   GENDER [2]
##   GENDER     n
##   <fct>  <int>
## 1 Female  2780
## 2 Male    1230
df_BMIgender<- BRFSS %>% 
  group_by(GENDER, BMICAT) %>% 
  dplyr::summarise(Count =  n())

df2_BMIgender <- ddply(df_BMIgender,.(GENDER),transform, Percentage = Count*100/sum(Count))

df2_BMIgender  %>% 
  pander()
GENDER BMICAT Count Percentage
Female Underweight 2780 1.905
Female Normal 50141 34.37
Female Overweight 45130 30.93
Female Obese 47853 32.8
Male Underweight 1230 0.9202
Male Normal 32931 24.64
Male Overweight 56135 42
Male Obese 43372 32.45
#This is the best graph EVER
ggplot(df2_BMIgender, aes(BMICAT, Percentage, fill = GENDER)) + 
  geom_bar(position="dodge",
           stat="identity")+
   scale_fill_manual(values= col2) +
      labs(x = "BMI categories", 
           y = "Percent of Respondents (%)",
        caption = "Data source: BRFSS2018") +
   geom_text(size = 4.5,
            color= "blue",
            aes(y = Percentage,
                label = paste0(round(Percentage,1),"%")),
            stat = "identity", 
            position = position_dodge2(width = 0.9, preserve = "single"), 
            vjust= -0.5, 
            hjust= 0.5) +
  themebar_facet

ggplot(BRFSS, aes(x = BMICAT, group = GENDER)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.75) +
    geom_text(size = 5, 
              aes(label = scales::percent(..prop..),
                  y = ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = 0.5,
              hjust = -0.1) +
  scale_fill_manual(values= col4) +
    labs(x= "Categories of BMI",
      y = "Percent of respondents") +
    facet_grid(~ GENDER) +
  coord_flip() +
    scale_y_continuous(labels = scales::percent,limits = c(0,0.5))+
  themebar_facet

Amomg male group, 41% of them are overweight while those of female are only 31% 2 % of female are underweight, this number decrease to 1 % for male.

ggplot(BRFSS, aes(x = BMICAT, group = GENHLTH)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.75) +
    geom_text(size = 4.5, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="GENDER") +
    facet_grid(~ GENHLTH) +
    scale_y_continuous(labels = scales::percent)+
  theme(axis.text = element_text(size = 12, color = "black", angle = 90),
        axis.title = element_text(size = 12, face= "bold"),
        plot.title = element_text(color = "black", size = 12, face = "bold.italic"),
        plot.caption = element_text(color = "grey", size = 12, face = "italic"),
        strip.text = element_text(size = 12, color = "black", face = "bold.italic"),
        strip.background = element_rect(fill = "slategray1"),
        legend.text = element_text(colour="black", size = 20),
        legend.position = "none")

ggplot(BRFSS, aes(x =  BMICAT, group = EXERpa30  )) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.75) +
    geom_text(size = 4.5, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="GENDER") +
    facet_grid(~ EXERpa30) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ORAL HEALTH H1. RMVTETH

BRFSS$RMVTETH <- factor(BRFSS$RMVTETH,
  levels= c("0",
            "1-5",
            ">= 6",
            "All"), ordered = TRUE)

df_RMVTETH <- BRFSS %>% 
  group_by(RMVTETH) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(CHECKUPgroup = factor(RMVTETH),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

ggplot(df_RMVTETH, 
       aes(x = RMVTETH, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "darkgreen", 
           fill = "darkgreen",
           width = 0.6) +
  scale_y_continuous(breaks = seq(0,50,10)) +
  geomtext_bar +
  labs(x = "Number of teeth that have been removed",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

ggplot(BRFSS, aes(x= RMVTETH , group = AGE)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="IRMVTETH") +
    facet_grid(~ AGE) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

df_RMVTETH <- BRFSS %>%
  group_by(RMVTETH) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(Rmvteth = factor(RMVTETH),
         Ratio = Count/sum(Count),
         label = percent(Ratio %>% round(4)))

df_RMVTETH %>% 
  pander()
RMVTETH Count Rmvteth Ratio label
0 146320 0 0.5234 52.3%
1-5 83126 1-5 0.2973 29.7%
>= 6 32768 >= 6 0.1172 11.7%
All 17358 All 0.06209 6.2%

Age in function of of teeth removed

df_RmvtethAge <- BRFSS %>% 
  group_by(AGE, RMVTETH) %>% 
  dplyr::summarise(Count =  n()) 

df2_RmvtethAge <- ddply(df_RmvtethAge, .(RMVTETH), transform, Percentage = Count*100/sum(Count))

df2_RmvtethAge
##      AGE RMVTETH Count Percentage
## 1  18-29       0 25312 17.2990705
## 2  30-39       0 24595 16.8090487
## 3  40-49       0 24450 16.7099508
## 4  50-59       0 27550 18.8285949
## 5  60-69       0 26068 17.8157463
## 6  70-79       0 13598  9.2933297
## 7    80+       0  4747  3.2442592
## 8  18-29     1-5  3667  4.4113755
## 9  30-39     1-5  8150  9.8043933
## 10 40-49     1-5 10597 12.7481173
## 11 50-59     1-5 16824 20.2391550
## 12 60-69     1-5 22288 26.8123090
## 13 70-79     1-5 15391 18.5152660
## 14   80+     1-5  6209  7.4693838
## 15 18-29    >= 6   358  1.0925293
## 16 30-39    >= 6  1322  4.0344238
## 17 40-49    >= 6  2492  7.6049805
## 18 50-59    >= 6  6100 18.6157227
## 19 60-69    >= 6  9718 29.6569824
## 20 70-79    >= 6  8527 26.0223389
## 21   80+    >= 6  4251 12.9730225
## 22 18-29     All    78  0.4493605
## 23 30-39     All   383  2.2064754
## 24 40-49     All   801  4.6145869
## 25 50-59     All  2779 16.0099090
## 26 60-69     All  5142 29.6232285
## 27 70-79     All  5272 30.3721627
## 28   80+     All  2903 16.7242770
col7 <-  c("#0072BB", "#FF4C3B","#FFD034","#51A39D","#06425C", "#C6C8CA","#814374")
ggplot( )+
  geom_bar(data = df2_RmvtethAge, 
           aes(x = RMVTETH, 
               y = Percentage,
               fill = AGE),
               stat = "identity")+
   labs(y = "Percent of respondents (%)", 
        x = "Number of teeth removed") +
  scale_fill_manual(values = col7) +
  scale_y_continuous(breaks = seq(0, 100, 20)) +
  themebar_stacked

df2_AgeRmvteth <- ddply(df_RmvtethAge ,.(AGE),transform, Percentage = Count*100/sum(Count))
df2_AgeRmvteth
##      AGE RMVTETH Count Percentage
## 1  18-29       0 25312 86.0513344
## 2  18-29     1-5  3667 12.4664287
## 3  18-29    >= 6   358  1.2170661
## 4  18-29     All    78  0.2651708
## 5  30-39       0 24595 71.3933237
## 6  30-39     1-5  8150 23.6574746
## 7  30-39    >= 6  1322  3.8374456
## 8  30-39     All   383  1.1117562
## 9  40-49       0 24450 63.7715180
## 10 40-49     1-5 10597 27.6395409
## 11 40-49    >= 6  2492  6.4997392
## 12 40-49     All   801  2.0892019
## 13 50-59       0 27550 51.7341746
## 14 50-59     1-5 16824 31.5925863
## 15 50-59    >= 6  6100 11.4547537
## 16 50-59     All  2779  5.2184853
## 17 60-69       0 26068 41.2363958
## 18 60-69     1-5 22288 35.2568970
## 19 60-69    >= 6  9718 15.3726905
## 20 60-69     All  5142  8.1340167
## 21 70-79       0 13598 31.7799383
## 22 70-79     1-5 15391 35.9703655
## 23 70-79    >= 6  8527 19.9284846
## 24 70-79     All  5272 12.3212116
## 25   80+       0  4747 26.2120375
## 26   80+     1-5  6209 34.2849255
## 27   80+    >= 6  4251 23.4732192
## 28   80+     All  2903 16.0298178

RMVTETH and AGE

ggplot( )+
  geom_bar(data = df2_AgeRmvteth, 
           aes(x = AGE, 
               y = Percentage,
               fill =  RMVTETH),
               stat = "identity")+
   labs(y = "Percent of respondents (%)", 
        x = "Age group") +
  scale_fill_manual(values = col4) +
  scale_y_continuous(breaks = seq(0, 100, 20)) +
  themebar_stacked

RMVTETH and EDU

ggplot(BRFSS, aes(x= RMVTETH , group = EDU)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="IRMVTETH") +
    facet_grid(~ EDU) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(BRFSS, aes(x= RMVTETH , group = EMPL)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="IRMVTETH") +
    facet_grid(~ EMPL) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(BRFSS, aes(x= RMVTETH , group = INCOME)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="IRMVTETH") +
    facet_grid(~ INCOME) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(BRFSS, aes(x= RMVTETH , group = EXERpa30)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="RMVTETH") +
    facet_grid(~ EXERpa30) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(BRFSS, aes(x= RMVTETH , group = SMOKESTAT)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="IRMVTETH") +
    facet_grid(~ SMOKESTAT) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(data = df2_AgeRmvteth, aes(x = AGE, 
                           y = Percentage, 
                           group = RMVTETH)) +
  geom_line(linetype = "dashed", 
            aes(color = RMVTETH), 
            size = 0.5) +
  geom_point(aes(color = RMVTETH), 
             size = 6) +
  scale_color_manual(values = col4) +
  labs(y = "Percent of respondents (%)", 
       x = "Age group") +
  theme_point +
  guides(color=guide_legend("Nb of removed teeth")) 

H1.LASTDENTV

df_LASTDENTV <- BRFSS %>% 
  group_by(LASTDENTV) %>% 
  dplyr::summarise(Count =  n()) %>% 
  mutate(LASTDENTVgroup = factor(LASTDENTV),
         Percentage = round(Count*100/sum(Count),2),
         label =  percent(Percentage/100))

ggplot(df_LASTDENTV, 
       aes(x = LASTDENTV, 
           y = Percentage)) +
  geom_bar(stat = "identity",
           color = "darkgreen", 
           fill = "darkgreen",
           width = 0.6) +
  scale_y_continuous(breaks = seq(0,70,10)) +
  geomtext_bar +
  labs(x = "Last time visited dentist",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

df_RmvtethLastdentv <- BRFSS %>% 
  group_by(LASTDENTV, RMVTETH) %>% 
  dplyr::summarise(Count =  n()) 

df_RmvtethLastdentv 
## # A tibble: 16 x 3
## # Groups:   LASTDENTV [4]
##    LASTDENTV RMVTETH  Count
##    <fct>     <ord>    <int>
##  1 <1        0       110367
##  2 <1        1-5      60837
##  3 <1        >= 6     19637
##  4 <1        All       4618
##  5 1-<2      0        13766
##  6 1-<2      1-5       9053
##  7 1-<2      >= 6      4292
##  8 1-<2      All       2104
##  9 2-<5      0        10687
## 10 2-<5      1-5       7089
## 11 2-<5      >= 6      3942
## 12 2-<5      All       2771
## 13 5+        0        11500
## 14 5+        1-5       6147
## 15 5+        >= 6      4897
## 16 5+        All       7865
df2_RmvtethLastdentv <- ddply(df_RmvtethLastdentv,.(LASTDENTV),transform, Percentage = Count*100/sum(Count))
df2_RmvtethLastdentv 
##    LASTDENTV RMVTETH  Count Percentage
## 1         <1       0 110367  56.465550
## 2         <1     1-5  60837  31.125198
## 3         <1    >= 6  19637  10.046608
## 4         <1     All   4618   2.362644
## 5       1-<2       0  13766  47.119630
## 6       1-<2     1-5   9053  30.987506
## 7       1-<2    >= 6   4292  14.691083
## 8       1-<2     All   2104   7.201780
## 9       2-<5       0  10687  43.640002
## 10      2-<5     1-5   7089  28.947691
## 11      2-<5    >= 6   3942  16.097023
## 12      2-<5     All   2771  11.315284
## 13        5+       0  11500  37.817751
## 14        5+     1-5   6147  20.214410
## 15        5+    >= 6   4897  16.103785
## 16        5+     All   7865  25.864053
ggplot( )+
  geom_bar(data = df2_RmvtethLastdentv, 
           aes(x = LASTDENTV, 
               y = Percentage,
               fill =  RMVTETH),
               stat = "identity")+
   labs(y = "Percent of respondents (%)", 
        x = "Last time visited dentist") +
  scale_fill_manual(values = col4) +
  scale_y_continuous(breaks = seq(0, 100, 10)) +
  themebar_stacked

ggplot(data = df2_RmvtethLastdentv, aes(x = LASTDENTV, 
                           y = Percentage, 
                           group = RMVTETH)) +
  geom_line(linetype = "dashed", 
            aes(color = RMVTETH), 
            size = 0.5) +
  geom_point(aes(color = RMVTETH), 
             size = 6) +
  scale_color_manual(values = col4) +
  labs(y = "Percent of respondents (%)", x = "Last time visited dental clinic") +
  theme_point +
  guides(color=guide_legend("Nb of removed teeth")) 

Visiting dentist and income

ggplot(BRFSS, aes(x= LASTDENTV , group = INCOME)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="IRMVTETH") +
    facet_grid(~ INCOME) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(BRFSS, aes(x= LASTDENTV, group = EXERpa30)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="IRMVTETH") +
    facet_grid(~ EXERpa30) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(BRFSS, aes(x= LASTDENTV, group = HLTHPLN)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="RMVTETH") +
    facet_grid(~ HLTHPLN) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(BRFSS, aes(x= LASTDENTV, group = MEDICOST)) + # teeth removed by age
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill="RMVTETH") +
    facet_grid(~ MEDICOST) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

df_IncomeLastdentv <- BRFSS %>% 
  group_by(LASTDENTV, INCOME) %>% 
  dplyr::summarise(Count =  n()) 

df2_IncomeLastdentv <- ddply(df_IncomeLastdentv,.(LASTDENTV),transform, Percentage = Count*100/sum(Count))

df2_IncomeLastdentv
##    LASTDENTV     INCOME Count Percentage
## 1         <1      <$20K 18372   9.399414
## 2         <1 $20K-<$35K 29558  15.122353
## 3         <1 $35K-<$50K 26273  13.441694
## 4         <1 $50K-<$75K 35474  18.149075
## 5         <1      $75K+ 85782  43.887465
## 6       1-<2      <$20K  5811  19.890467
## 7       1-<2 $20K-<$35K  6872  23.522163
## 8       1-<2 $35K-<$50K  4488  15.361972
## 9       1-<2 $50K-<$75K  4558  15.601575
## 10      1-<2      $75K+  7486  25.623823
## 11      2-<5      <$20K  5836  23.831108
## 12      2-<5 $20K-<$35K  6277  25.631916
## 13      2-<5 $35K-<$50K  3705  15.129242
## 14      2-<5 $50K-<$75K  3621  14.786231
## 15      2-<5      $75K+  5050  20.621504
## 16        5+      <$20K 10005  32.901444
## 17        5+ $20K-<$35K  8839  29.067053
## 18        5+ $35K-<$50K  4211  13.847874
## 19        5+ $50K-<$75K  3474  11.424249
## 20        5+      $75K+  3880  12.759380
ggplot( )+
  geom_bar(data = df2_IncomeLastdentv, 
           aes(x = LASTDENTV, 
               y = Percentage,
               fill =  INCOME),
               stat = "identity")+
   labs(y = "Percent of respondents (%)", 
        x = "Last time visited dentist") +
  scale_fill_manual(values = col5) +
  scale_y_continuous(breaks = seq(0, 100, 10)) +
  themebar_stacked

df3_IncomeLastdentv <- ddply(df_IncomeLastdentv,.(INCOME),transform, Percentage = Count*100/sum(Count))
df3_IncomeLastdentv
##    LASTDENTV     INCOME Count Percentage
## 1         <1      <$20K 18372  45.902459
## 2       1-<2      <$20K  5811  14.518789
## 3       2-<5      <$20K  5836  14.581251
## 4         5+      <$20K 10005  24.997501
## 5         <1 $20K-<$35K 29558  57.342956
## 6       1-<2 $20K-<$35K  6872  13.331781
## 7       2-<5 $20K-<$35K  6277  12.177473
## 8         5+ $20K-<$35K  8839  17.147790
## 9         <1 $35K-<$50K 26273  67.929260
## 10      1-<2 $35K-<$50K  4488  11.603796
## 11      2-<5 $35K-<$50K  3705   9.579337
## 12        5+ $35K-<$50K  4211  10.887608
## 13        <1 $50K-<$75K 35474  75.273198
## 14      1-<2 $50K-<$75K  4558   9.671738
## 15      2-<5 $50K-<$75K  3621   7.683494
## 16        5+ $50K-<$75K  3474   7.371570
## 17        <1      $75K+ 85782  83.937063
## 18      1-<2      $75K+  7486   7.324997
## 19      2-<5      $75K+  5050   4.941388
## 20        5+      $75K+  3880   3.796552
ggplot( )+
  geom_bar(data = df3_IncomeLastdentv, 
           aes(x = INCOME , 
               y = Percentage,
               fill =  LASTDENTV),
               stat = "identity")+
   labs(y = "Percent of respondents (%)", 
        x = "Income bracket") +
  scale_fill_manual(values = col4) +
  scale_y_continuous(breaks = seq(0, 100, 10)) +
  themebar_stacked

ggplot(data = df3_IncomeLastdentv, 
       aes(x = INCOME, 
           y = Percentage,
           group = LASTDENTV)) +
  geom_line(linetype = "dashed", 
            aes(color = LASTDENTV), 
            size = 0.5) +
  geom_point(aes(color = LASTDENTV), 
             size = 6) +
  scale_color_manual(values = col5) +
  labs(x = "Income range",
       y = "Percent of respondents (%)") +
  theme_point +
  guides(color=guide_legend("Last time visited dentist"))

Last than 1 year — more frequent than from 1 to 2 year higher income - visit dentist more

df_LASTDENTVRACE <- BRFSS %>% 
  select(LASTDENTV, RACE) %>% 
  filter(RACE == "White" | RACE == "Black"| RACE == "Hispanic" | RACE == "Asian") %>% 
  group_by(LASTDENTV, RACE) %>% 
  dplyr::summarise(Count =  n()) 


df2_LASTDENTVRACE <- ddply(df_LASTDENTVRACE,.(RACE),transform, Percentage = round(Count*100/sum(Count),2))

df2_LASTDENTVRACE 
##    LASTDENTV     RACE  Count Percentage
## 1         <1    Asian   4114      72.56
## 2       1-<2    Asian    661      11.66
## 3       2-<5    Asian    468       8.25
## 4         5+    Asian    427       7.53
## 5         <1    Black  13113      60.70
## 6       1-<2    Black   3242      15.01
## 7       2-<5    Black   2509      11.61
## 8         5+    Black   2738      12.67
## 9         <1 Hispanic  11553      61.72
## 10      1-<2 Hispanic   2741      14.64
## 11      2-<5 Hispanic   2176      11.63
## 12        5+ Hispanic   2247      12.01
## 13        <1    White 158845      71.95
## 14      1-<2    White  20871       9.45
## 15      2-<5    White  17920       8.12
## 16        5+    White  23125      10.48
ggplot(df2_LASTDENTVRACE, 
       aes(x = LASTDENTV, 
           y = Percentage,
           fill = LASTDENTV)) +
  geom_bar(stat = "identity",
           width = 0.75) +
  scale_fill_manual(values= c("blue", "cyan4", "cyan3","cyan2", "red")) +
  scale_y_continuous(breaks = seq(0,100,20)) +
  geomtext_bar +
  facet_wrap(~ RACE, nrow = 2) +
  labs(x = "Last time visited dentist (Year)",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") +
  theme_bar

G. CHRONIC HEALTH CONDITIONS

geombar_CHC <- geom_bar(aes(y = (..count..)/sum(..count..)),
           fill = "firebrick4",
           alpha = 0.9,
           width = 0.5) 
  
geomtext_CHC <- geom_text(size = 4,
            color= "blue",
            aes(y = ((..count..)/sum(..count..)),
                label = scales::percent((..count..)/sum(..count..))),
            stat = "count", 
            vjust = -0.5)

G1.ARTHRITIS

ggplot(BRFSS, aes(x = ARTHRITIS)) +
  geombar_CHC +
  geomtext_CHC +
  scale_y_continuous(labels = percent) +
  labs(x = "Arthritis?",
       y = "Percent of Respondents",
      caption = "Data source: BRFSS2018") + 
  theme_bar

G2. PULMOND chronic obstructive pulmonary disease, C.O.P.D., emphysema or chronic bronchitis?

ggplot(BRFSS, aes(x = PULMOND)) +
  geombar_CHC +
  geomtext_CHC +
  scale_y_continuous(labels = percent)+
  labs(x = "PULMOND?",
       y = "Percent of Respondents",
       caption = "Data source: BRFSS2018") +
  theme_bar

G3.ASTHMA

ggplot(BRFSS, aes(x = ASTHMA)) +
  geombar_CHC +
  geomtext_CHC +
  scale_y_continuous(labels = percent)+
  labs(x = "ASTHMA?",
       y = "Percent of Respondents",
        caption = "Data source: BRFSS2018") +
  theme_bar

G4.DEPRESSION

ggplot(BRFSS, aes(x = DEPRESSION)) +
  geombar_CHC +
  geomtext_CHC +
  scale_y_continuous(labels = percent)+
  labs(x = "DEPRESSION?",
       y = "Percent of Respondents",
        caption = "Data source: BRFSS2018") +
  theme_bar

G5.KIDNEY

ggplot(BRFSS, aes(x = KIDNEY)) +
  geombar_CHC +
  geomtext_CHC +
  scale_y_continuous(labels = percent)+
  labs(x = "DEPRESSION?",
       y = "Percent of Respondents",
        caption = "Data source: BRFSS2018") +
  theme_bar

G6. DIABETE

ggplot(BRFSS, aes(x = DIABETE)) +
  geombar_CHC +
  geomtext_CHC +
  scale_y_continuous(labels = percent)+
  labs(x = "DEPRESSION?",
       y = "Percent of Respondents",
        caption = "Data source: BRFSS2018") +
  theme_bar

DIABETE and BMI and EXERCISE

ggplot(BRFSS, aes(x = DIABETE,  group = BMICAT)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.6) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col2) +
    labs(y = "Percent of respondents", fill = "EDIABETE") +
    facet_grid(~ BMICAT) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

ggplot(BRFSS, aes(x = BMICAT,  group = DIABETE)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat = "count",
             width = 0.6) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat = "count",
              color = "black",
              vjust = -.25) +
  scale_fill_manual(values= col4) +
    labs(y = "Percent of respondents", fill = "BMICAT") +
    facet_grid(~ DIABETE) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

library(ggpubr)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
## The following object is masked from 'package:ggimage':
## 
##     theme_transparent
theme_boxplot <-   theme(text = element_text(size = 12),
        axis.text = element_text(size = 12),
        axis.title = element_text(size = 12,face="bold"),
        plot.caption = element_text(color = "grey",size = 12, face = "italic"),
        strip.text = element_text(size = 12, color = "blue", face = "bold.italic"),
        strip.background = element_rect(fill = "slategray1"),
        legend.position = "none")
ggboxplot(BRFSS, 
          x = "DIABETE", 
          y = "SLEPTIM1",
          fill = "DIABETE",
          ylab = "Sleep duration (hours)", 
          xlab = "DIABETE status") +
  theme_boxplot

# Compute the analysis of variance
diabete.aov <- aov(SLEPTIM1 ~  DIABETE, data = BRFSS)
# Summary of the analysis
summary(diabete.aov)
##                 Df Sum Sq Mean Sq F value Pr(>F)
## DIABETE          1      1  0.5602   0.333  0.564
## Residuals   279570 469878  1.6807

As the p-value is higher than the significance level 0.05, we can conclude that there are NO significant differences between the groups

TukeyHSD(diabete.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = SLEPTIM1 ~ DIABETE, data = BRFSS)
## 
## $DIABETE
##               diff          lwr        upr     p adj
## Yes-No 0.004077576 -0.009764728 0.01791988 0.5637007
# 1. Homogeneity of variances
plot(diabete.aov, 1)

In the plot above, there is no evident relationships between residuals and fitted values (the mean of each groups), which is good. So, we can assume the homogeneity of variances.

plot(diabete.aov, 2)

G7. CANCER

ggplot(BRFSS, aes(x = CANCER)) +
  geombar_CHC +
  geomtext_CHC +
  scale_y_continuous(labels = percent)+
  labs(x = "DEPRESSION?",
       y = "Percent of Respondents",
        caption = "Data source: BRFSS2018") +
  theme_bar

G7. STROKE

ggplot(BRFSS, aes(x = STROKE)) +
  geombar_CHC +
  geomtext_CHC +
  scale_y_continuous(labels = percent)+
  labs(x = "STROKE?",
       y = "Percent of Respondents",
        caption = "Data source: BRFSS2018") +
  theme_bar

ChronicHC <- data.frame(
  CHC = c("ARTHRITIS",
         'ASTHMA',
          "CANCER",
          "DEPRESSION",
          "DIABETE",
          "KIDNEY",
         "PLUMOND", 
          "STROKE",
         "MICHD"),
  Percentage = c(33.40, 13.82, 16.97, 18.81, 14.02, 3.59, 7.95, 3.94, 8.80 ))

ChronicHC
##          CHC Percentage
## 1  ARTHRITIS      33.40
## 2     ASTHMA      13.82
## 3     CANCER      16.97
## 4 DEPRESSION      18.81
## 5    DIABETE      14.02
## 6     KIDNEY       3.59
## 7    PLUMOND       7.95
## 8     STROKE       3.94
## 9      MICHD       8.80
col8 <-  c("#0072BB", "#FF4C3B","#FFD034","#51A39D","#06425C", "#C6C8CA","#814374", "red", "salmon")
ggplot(data = ChronicHC, aes(x= reorder(CHC, -Percentage),
                             y = Percentage,
                             fill =  as.factor(Percentage))) + 
  geom_bar(stat = "identity",
           width = 0.60) +
  geom_text(aes(label = paste0(Percentage,"%")), 
              vjust = -0.25,
              size = 4.5,
              color = "black") +
   scale_fill_manual(values = col8) +
  theme(legend.position="none") +
    labs(x = "Chronic health condition",
       y = "Percent of Respondents (%)",
        caption = "Data source: BRFSS2018")+
  theme_bar

MICHD*

ggplot(BRFSS, aes(x = MICHD)) +
  geombar_CHC +
  geomtext_CHC +
  scale_y_continuous(labels = percent)+
  labs(x = "DEPRESSION?",
       y = "Percent of Respondents",
        caption = "Data source: BRFSS2018") +
  theme_bar

df_CHC <- data.frame(
  ChronicHLTHcond = c("CANCER","CANCER",
                      "DIABETE","DIABETE",
                      "KIDNEY","KIDNEY",
                      "DEPRESSION","DEPRESSION",
                      "ASTHMA","ASTHMA",
                      "PULMOND","PULMOND",
                      "ARTHRITIS","ARTHRITIS",
                      "STROKE","STROKE",
                      "MICHD","MICHD"),
  Status = rep(c("NO", "YES"),9),
  Percentage = c(83, 17, 
                 86, 14, 
                 96, 4,
                 81, 19,
                 86, 14,
                 92, 8,
                 67, 33,
                 96, 4,
                 91, 9))
df_CHC %>% 
  pander()
ChronicHLTHcond Status Percentage
CANCER NO 83
CANCER YES 17
DIABETE NO 86
DIABETE YES 14
KIDNEY NO 96
KIDNEY YES 4
DEPRESSION NO 81
DEPRESSION YES 19
ASTHMA NO 86
ASTHMA YES 14
PULMOND NO 92
PULMOND YES 8
ARTHRITIS NO 67
ARTHRITIS YES 33
STROKE NO 96
STROKE YES 4
MICHD NO 91
MICHD YES 9
df2_CHC <- df_CHC %>% 
  mutate(lab_ypos = cumsum(Percentage/10) - 0.5* Percentage/10) 

df2_CHC
##    ChronicHLTHcond Status Percentage lab_ypos
## 1           CANCER     NO         83     4.15
## 2           CANCER    YES         17     9.15
## 3          DIABETE     NO         86    14.30
## 4          DIABETE    YES         14    19.30
## 5           KIDNEY     NO         96    24.80
## 6           KIDNEY    YES          4    29.80
## 7       DEPRESSION     NO         81    34.05
## 8       DEPRESSION    YES         19    39.05
## 9           ASTHMA     NO         86    44.30
## 10          ASTHMA    YES         14    49.30
## 11         PULMOND     NO         92    54.60
## 12         PULMOND    YES          8    59.60
## 13       ARTHRITIS     NO         67    63.35
## 14       ARTHRITIS    YES         33    68.35
## 15          STROKE     NO         96    74.80
## 16          STROKE    YES          4    79.80
## 17           MICHD     NO         91    84.55
## 18           MICHD    YES          9    89.55
ggplot(df2_CHC, 
       aes(x = ChronicHLTHcond,
           y = Percentage,
           fill = Status)) + 
    geom_bar(position = "dodge", 
             stat = "identity")+
    geom_text(aes(label = paste0(Percentage,"%")), 
              position = position_dodge(width=0.9), 
              vjust = -0.25,
              size = 5,
              color = "black") +
  scale_fill_manual(values=c("#5F9EA0", "#E1B378")) +
  labs(x = "Chronic Healthe Condition",
       y = "Percent of Respondents (%)",
        caption = "Data source: BRFSS2018") +
theme_bar

BRFSS %>% 
  group_by(CANCER, KIDNEY, ASTHMA, ARTHRITIS, PULMOND, DEPRESSION, DIABETE, STROKE, MICHD) %>% 
  dplyr::count() %>% 
  head(20)
## # A tibble: 20 x 10
## # Groups:   CANCER, KIDNEY, ASTHMA, ARTHRITIS, PULMOND, DEPRESSION, DIABETE,
## #   STROKE, MICHD [20]
##    CANCER KIDNEY ASTHMA ARTHRITIS PULMOND DEPRESSION DIABETE STROKE MICHD      n
##    <fct>  <fct>  <fct>  <fct>     <fct>   <fct>      <fct>   <fct>  <fct>  <int>
##  1 No     No     No     No        No      No         No      No     No    105809
##  2 No     No     No     No        No      No         No      No     Yes     2988
##  3 No     No     No     No        No      No         No      Yes    No      1080
##  4 No     No     No     No        No      No         No      Yes    Yes      340
##  5 No     No     No     No        No      No         Yes     No     No      8738
##  6 No     No     No     No        No      No         Yes     No     Yes     1000
##  7 No     No     No     No        No      No         Yes     Yes    No       350
##  8 No     No     No     No        No      No         Yes     Yes    Yes      161
##  9 No     No     No     No        No      Yes        No      No     No     15038
## 10 No     No     No     No        No      Yes        No      No     Yes      445
## 11 No     No     No     No        No      Yes        No      Yes    No       247
## 12 No     No     No     No        No      Yes        No      Yes    Yes       64
## 13 No     No     No     No        No      Yes        Yes     No     No      1490
## 14 No     No     No     No        No      Yes        Yes     No     Yes      170
## 15 No     No     No     No        No      Yes        Yes     Yes    No        84
## 16 No     No     No     No        No      Yes        Yes     Yes    Yes       51
## 17 No     No     No     No        Yes     No         No      No     No      1887
## 18 No     No     No     No        Yes     No         No      No     Yes      383
## 19 No     No     No     No        Yes     No         No      Yes    No        71
## 20 No     No     No     No        Yes     No         No      Yes    Yes       55

106927 people are CHC free 279966 - 106927 = 173039 (61.8%) people have at least one chronic health condition 43 people have all the CHCs (8)

df3_CHC <- BRFSS %>% 
  mutate(Cancer =  case_when(CANCER == "Yes" ~ 1,
                             CANCER == "No" ~ 0),
         Kidney =  case_when(KIDNEY == "Yes" ~ 1,
                             KIDNEY  == "No" ~ 0),
         Asthma =  case_when(ASTHMA == "Yes" ~ 1,
                             ASTHMA == "No" ~ 0),
         Arthritis = case_when(ARTHRITIS == "Yes" ~ 1,
                                ARTHRITIS == "No" ~ 0),
         Pulmond =  case_when(PULMOND == "Yes" ~ 1,
                              PULMOND == "No" ~ 0),
         Depression =  case_when(DEPRESSION == "Yes" ~ 1,
                                 DEPRESSION == "No" ~ 0),
         Diabete =  case_when(DIABETE == "Yes" ~ 1,
                              DIABETE == "No" ~ 0),
         Stroke = case_when(STROKE == "Yes" ~ 1,
                            STROKE == "No" ~ 0),
         Michd =  case_when(MICHD == "Yes" ~ 1,
                            MICHD == "No" ~ 0),
         )

Take only column with numeric values

df4_CHC <- df3_CHC %>% 
  select(Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete, Stroke,Michd)
df5_CHC <-  df4_CHC %>% 
  group_by(Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete, Stroke, Michd) %>% 
  dplyr::count(name = "subtotal_case")

head(df5_CHC,30)
## # A tibble: 30 x 10
## # Groups:   Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete,
## #   Stroke, Michd [30]
##    Cancer Kidney Asthma Arthritis Pulmond Depression Diabete Stroke Michd
##     <dbl>  <dbl>  <dbl>     <dbl>   <dbl>      <dbl>   <dbl>  <dbl> <dbl>
##  1      0      0      0         0       0          0       0      0     0
##  2      0      0      0         0       0          0       0      0     1
##  3      0      0      0         0       0          0       0      1     0
##  4      0      0      0         0       0          0       0      1     1
##  5      0      0      0         0       0          0       1      0     0
##  6      0      0      0         0       0          0       1      0     1
##  7      0      0      0         0       0          0       1      1     0
##  8      0      0      0         0       0          0       1      1     1
##  9      0      0      0         0       0          1       0      0     0
## 10      0      0      0         0       0          1       0      0     1
## # ... with 20 more rows, and 1 more variable: subtotal_case <int>
tail(df5_CHC)
## # A tibble: 6 x 10
## # Groups:   Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete,
## #   Stroke, Michd [6]
##   Cancer Kidney Asthma Arthritis Pulmond Depression Diabete Stroke Michd
##    <dbl>  <dbl>  <dbl>     <dbl>   <dbl>      <dbl>   <dbl>  <dbl> <dbl>
## 1      1      1      1         1       1          1       0      1     0
## 2      1      1      1         1       1          1       0      1     1
## 3      1      1      1         1       1          1       1      0     0
## 4      1      1      1         1       1          1       1      0     1
## 5      1      1      1         1       1          1       1      1     0
## 6      1      1      1         1       1          1       1      1     1
## # ... with 1 more variable: subtotal_case <int>
df5_CHC$NbofCHC <- rowSums(df5_CHC[,c(1:9)])
df6_CHC <- df5_CHC
df6_CHC
## # A tibble: 507 x 11
## # Groups:   Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete,
## #   Stroke, Michd [507]
##    Cancer Kidney Asthma Arthritis Pulmond Depression Diabete Stroke Michd
##     <dbl>  <dbl>  <dbl>     <dbl>   <dbl>      <dbl>   <dbl>  <dbl> <dbl>
##  1      0      0      0         0       0          0       0      0     0
##  2      0      0      0         0       0          0       0      0     1
##  3      0      0      0         0       0          0       0      1     0
##  4      0      0      0         0       0          0       0      1     1
##  5      0      0      0         0       0          0       1      0     0
##  6      0      0      0         0       0          0       1      0     1
##  7      0      0      0         0       0          0       1      1     0
##  8      0      0      0         0       0          0       1      1     1
##  9      0      0      0         0       0          1       0      0     0
## 10      0      0      0         0       0          1       0      0     1
## # ... with 497 more rows, and 2 more variables: subtotal_case <int>,
## #   NbofCHC <dbl>

All the people living with at one chronic health condition

df6_CHC %>% 
  filter(NbofCHC == 1)
## # A tibble: 9 x 11
## # Groups:   Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete,
## #   Stroke, Michd [9]
##   Cancer Kidney Asthma Arthritis Pulmond Depression Diabete Stroke Michd
##    <dbl>  <dbl>  <dbl>     <dbl>   <dbl>      <dbl>   <dbl>  <dbl> <dbl>
## 1      0      0      0         0       0          0       0      0     1
## 2      0      0      0         0       0          0       0      1     0
## 3      0      0      0         0       0          0       1      0     0
## 4      0      0      0         0       0          1       0      0     0
## 5      0      0      0         0       1          0       0      0     0
## 6      0      0      0         1       0          0       0      0     0
## 7      0      0      1         0       0          0       0      0     0
## 8      0      1      0         0       0          0       0      0     0
## 9      1      0      0         0       0          0       0      0     0
## # ... with 2 more variables: subtotal_case <int>, NbofCHC <dbl>
df6_CHC %>% 
  group_by(NbofCHC) %>% 
  dplyr::summarise(totalcase = sum(subtotal_case))
## # A tibble: 10 x 2
##    NbofCHC totalcase
##      <dbl>     <int>
##  1       0    105809
##  2       1     81744
##  3       2     48701
##  4       3     24441
##  5       4     11351
##  6       5      4861
##  7       6      1886
##  8       7       617
##  9       8       135
## 10       9        27
df7_CHC <- df6_CHC %>% 
  group_by(NbofCHC) %>% 
  dplyr::summarise(Total_case = sum(subtotal_case))

df7_CHC
## # A tibble: 10 x 2
##    NbofCHC Total_case
##      <dbl>      <int>
##  1       0     105809
##  2       1      81744
##  3       2      48701
##  4       3      24441
##  5       4      11351
##  6       5       4861
##  7       6       1886
##  8       7        617
##  9       8        135
## 10       9         27
ggplot(data = df7_CHC,
           aes(x = NbofCHC, 
           y  = Total_case,
           fill =  Total_case)) +
  geom_bar(stat = "identity") +
  scale_fill_gradient(low = "darkred", high = "yellow")+
  geom_text(aes(label = Total_case), 
            vjust = -0.5, 
            color = "darkblue", 
            size = 4) +
  theme_minimal() +
  scale_y_continuous(limits = c(0,120000),
                     breaks = seq(0,120000, by = 20000))+
  scale_x_continuous(breaks = seq(0,9,by = 1))+
  labs(x = "Number of chronic health conditions",
       y = "Frequency",
       caption = "Data source: BRFSS2018") +
  theme_bar

df8_CHC <- df7_CHC %>% 
  mutate(Percentage = round(Total_case*100/sum(Total_case),2))

df8_CHC
## # A tibble: 10 x 3
##    NbofCHC Total_case Percentage
##      <dbl>      <int>      <dbl>
##  1       0     105809      37.8 
##  2       1      81744      29.2 
##  3       2      48701      17.4 
##  4       3      24441       8.74
##  5       4      11351       4.06
##  6       5       4861       1.74
##  7       6       1886       0.67
##  8       7        617       0.22
##  9       8        135       0.05
## 10       9         27       0.01
ggplot(data = df8_CHC,
           aes(x = NbofCHC, 
               y  = Percentage,
               fill = Percentage)) +
  scale_fill_gradient(low = "darkred", high = "yellow")+
  geom_bar(stat = "identity")+
  geom_text(aes(label = paste0(Percentage,"%", "\n", Total_case)), 
            vjust = -0.1, 
            color = "darkblue", 
            size = 4.5) +
  scale_x_continuous(breaks = seq(0,9,by = 1))+
  scale_y_continuous(limits = c(0,40)) +
  theme_minimal() +
  labs(x = "Number of chronic health conditions",
       y = "Percentage (%)",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size=12),
        axis.title = element_text(size=12,face="bold"),
        plot.caption = element_text(color = "grey44",size = 12, face = "italic"),
        legend.title = element_text(color = "blue", size = 12),
        legend.text = element_text(size=12),
        legend.position = "non")

Among American aged 18 and more, almost 1 out of every 3 (~ 33%) adults were overweight or/and obese while 1.6% were underweight, based on their measured BMI.

SLEEPTIM (numeric attribute and chronic health condtion) A. Sleep duration and ASTHMA

ggplot(BRFSS, aes(x= SLEPTIM1 , group = ASTHMA)) + 
    geom_bar(aes(y = ..prop.., fill = factor(..x..)), 
         stat="count",width = 1) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~CANCER) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

sleptime_asthma <- ggplot(BRFSS, 
       aes(x = ASTHMA, 
           y = SLEPTIM1,
           fill = ASTHMA)) +
  geom_boxplot() +
  scale_fill_hue(l = 40, c = 35)+
  labs( x = "Cancer status",
        y = "Sleep duration (hours)",
        caption = "Data source: BRFSS2018")

sleptime_asthma + theme_boxplot

Assumption 3. Do the two populations have the same variances? We’ll use F-test to test for homogeneity in variances. This can be performed with the function var.test() as follow:

res.ftest_ASTHMA <- var.test(SLEPTIM1 ~ ASTHMA, data = BRFSS)
res.ftest_ASTHMA
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by ASTHMA
## F = 0.73867, num df = 240911, denom df = 38659, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7275073 0.7499429
## sample estimates:
## ratio of variances 
##          0.7386712
res.ftest_ARTHRITIS <- var.test(SLEPTIM1 ~ ARTHRITIS, data = BRFSS, alternative = "two.sided")
res.ftest_ARTHRITIS
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by ARTHRITIS
## F = 0.6884, num df = 186212, denom df = 93358, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.6807799 0.6960839
## sample estimates:
## ratio of variances 
##          0.6883965
res.ftest_CANCER <- var.test(SLEPTIM1 ~ CANCER, data = BRFSS, alternative = "two.sided")
res.ftest_CANCER
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by CANCER
## F = 0.90813, num df = 232122, denom df = 47448, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8955097 0.9208742
## sample estimates:
## ratio of variances 
##           0.908133
res.ftest_DEPRESSION <- var.test(SLEPTIM1 ~ DEPRESSION, data = BRFSS, alternative = "two.sided")
res.ftest_DEPRESSION
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by DEPRESSION
## F = 0.57769, num df = 226976, denom df = 52594, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5699777 0.5854759
## sample estimates:
## ratio of variances 
##          0.5776913
res.ftest_DIABETE <- var.test(SLEPTIM1 ~ DIABETE, data = BRFSS, alternative = "two.sided")
res.ftest_DIABETE
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by DIABETE
## F = 0.6913, num df = 240382, denom df = 39188, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.6809148 0.7017925
## sample estimates:
## ratio of variances 
##          0.6913035
res.ftest_KIDNEY <- var.test(SLEPTIM1 ~ KIDNEY, data = BRFSS, alternative = "two.sided")
res.ftest_KIDNEY
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by KIDNEY
## F = 0.62827, num df = 269521, denom df = 10049, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.6107080 0.6460933
## sample estimates:
## ratio of variances 
##          0.6282687
res.ftest_PULMOND <- var.test(SLEPTIM1 ~ PULMOND, data = BRFSS, alternative = "two.sided")
res.ftest_PULMOND
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by PULMOND
## F = 0.54672, num df = 257356, denom df = 22214, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5361798 0.5573743
## sample estimates:
## ratio of variances 
##          0.5467181
res.ftest_STROKE <- var.test(SLEPTIM1 ~ STROKE, data = BRFSS,  alternative = "two.sided")
res.ftest_STROKE
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by STROKE
## F = 0.55674, num df = 268546, denom df = 11024, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.5418525 0.5718449
## sample estimates:
## ratio of variances 
##           0.556741
res.ftest_MICHD <- var.test(SLEPTIM1 ~ MICHD, data = BRFSS)
res.ftest_MICHD
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by MICHD
## F = 0.65222, num df = 254977, denom df = 24593, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.6402132 0.6643555
## sample estimates:
## ratio of variances 
##          0.6522193
t.test(SLEPTIM1 ~ ASTHMA, data = BRFSS)
## 
##  Welch Two Sample t-test
## 
## data:  SLEPTIM1 by ASTHMA
## t = 25.728, df = 48258, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1880149 0.2190244
## sample estimates:
##  mean in group No mean in group Yes 
##          7.046199          6.842680

All the F-test have p < 0.01 ==> reject the Null Hypotheses == accept the alternative hypothesis == There are difference between two variances of two groups. var.equal will set to be FALSE

t.test(SLEPTIM1 ~ ARTHRITIS, data = BRFSS, var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  SLEPTIM1 by ARTHRITIS
## t = 12.489, df = 159401, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.05818593 0.07984851
## sample estimates:
##  mean in group No mean in group Yes 
##          7.041103          6.972086
t.test(SLEPTIM1 ~ CANCER, data = BRFSS, var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  SLEPTIM1 by CANCER
## t = -25.076, df = 66232, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1820817 -0.1556816
## sample estimates:
##  mean in group No mean in group Yes 
##          6.989394          7.158275
t.test(SLEPTIM1 ~ DEPRESSION, data = BRFSS, var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  SLEPTIM1 by DEPRESSION
## t = 25.771, df = 67338, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1765140 0.2055738
## sample estimates:
##  mean in group No mean in group Yes 
##          7.053997          6.862953
t.test(SLEPTIM1 ~ DIABETE, data = BRFSS, var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  SLEPTIM1 by DIABETE
## t = -0.5059, df = 48419, p-value = 0.6129
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01987534  0.01172019
## sample estimates:
##  mean in group No mean in group Yes 
##          7.017485          7.021562
t.test(SLEPTIM1 ~ KIDNEY, alternative = "two.sided", var.equal = FALSE, data = BRFSS)
## 
##  Welch Two Sample t-test
## 
## data:  SLEPTIM1 by KIDNEY
## t = 0.46426, df = 10525, p-value = 0.6425
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02443215  0.03959710
## sample estimates:
##  mean in group No mean in group Yes 
##          7.018329          7.010746
t.test(SLEPTIM1 ~ PULMOND, alternative = "two.sided", var.equal = FALSE, data = BRFSS)
## 
##  Welch Two Sample t-test
## 
## data:  SLEPTIM1 by PULMOND
## t = 14.65, df = 24355, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1478766 0.1935567
## sample estimates:
##  mean in group No mean in group Yes 
##          7.031621          6.860905
t.test(SLEPTIM1 ~ STROKE, alternative = "two.sided", var.equal = FALSE, data = BRFSS)
## 
##  Welch Two Sample t-test
## 
## data:  SLEPTIM1 by STROKE
## t = -3.1169, df = 11533, p-value = 0.001832
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08366195 -0.01906121
## sample estimates:
##  mean in group No mean in group Yes 
##          7.016031          7.067392
t.test(SLEPTIM1 ~ MICHD, alternative = "two.sided", var.equal = FALSE, data = BRFSS)
## 
##  Welch Two Sample t-test
## 
## data:  SLEPTIM1 by MICHD
## t = -2.5889, df = 27774, p-value = 0.009634
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.046917246 -0.006485714
## sample estimates:
##  mean in group No mean in group Yes 
##          7.015707          7.042409
res.ftest <- var.test(SLEPTIM1 ~ CANCER, data = BRFSS)
res.ftest
## 
##  F test to compare two variances
## 
## data:  SLEPTIM1 by CANCER
## F = 0.90813, num df = 232122, denom df = 47448, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8955097 0.9208742
## sample estimates:
## ratio of variances 
##           0.908133

There are significant difference between the variance of two sets of dsata. Therefore, we will use t-test with unequal variance.

ggplot(BRFSS, 
       aes(x = PULMOND, 
           y = SLEPTIM1)) +
  geom_violin(fill = "cornflowerblue") +
  geom_boxplot(width = .2, 
               fill = "orange",
               outlier.color = "orange",
               outlier.size = 2) +
  labs(title = "Sleep duration distribution by PULMOND status")

ggplot(BRFSS, 
       aes(x = MICHD, 
           y = SLEPTIM1, fill = MICHD)) +
  geom_violin(trim = FALSE,fill = "cornflowerblue") +
  geom_boxplot(width = 0.1, 
               fill = "grey",
               outlier.color = "grey",
               outlier.size = 2) +
  labs(title = "Sleep duration distribution by MICHD status") 

library(ggforce)

Numerical Data Descriptive Statistics

ggplot(data = BRFSS, aes(SLEPTIM1)) + 
  geom_bar(stat= "count", 
                 fill= "olivedrab4") +  
  coord_cartesian(clip = "off") +
  labs( x = "Sleep duration (hours)",
        y = "Frequency",
        subtitle = "On average, how many hours of sleep do you get in a 24-hour period?",
        caption = "Data source: BRFSS2018") + 
  scale_y_continuous(limits = c(0, 100000), expand = c(0, 0)) +
  scale_x_continuous(limits = c(0, 15), expand = c(0, 0)) +
  theme_bar

****BIVARIATE analysis 1****

ggplot(BRFSS, aes(x= SLEPTIM1 , group = CANCER)) + 
    geom_bar(aes(y = ..prop.., fill = factor(..x..)), 
         stat="count",width = 1) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~CANCER) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

Boxplot

library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
## 
##     get_legend
## The following object is masked from 'package:ggimage':
## 
##     theme_nothing
library(ggplot2)
theme_boxplot <-   theme(text = element_text(size = 12),
        axis.text = element_text(size = 12),
        axis.title = element_text(size = 12,face="bold"),
        plot.caption = element_text(color = "grey",size = 12, face = "italic"),
        strip.text = element_text(size = 12, color = "blue", face = "bold.italic"),
        strip.background = element_rect(fill = "slategray1"),
        legend.position = "none")
sleptime_cancer <- ggplot(BRFSS, 
       aes(x = CANCER, 
           y = SLEPTIM1,
           fill = CANCER)) +
  geom_boxplot() +
  scale_fill_hue(l = 40, c = 35)+
  labs( x = "Cancer status",
        y = "Sleep duration (hours)",
        caption = "Data source: BRFSS2018")

sleptime_cancer + theme_boxplot

BRFSS %>% 
  summarise(mean = mean(SLEPTIM1),
            std = sd(SLEPTIM1),
            median = median(SLEPTIM1))
##       mean      std median
## 1 7.018056 1.296423      7

Compare two means sample size

BRFSS %>% 
  group_by(CANCER) %>% 
  dplyr::count()
## # A tibble: 2 x 2
## # Groups:   CANCER [2]
##   CANCER      n
##   <fct>   <int>
## 1 No     232123
## 2 Yes     47449
library(purrr)
BRFSS %>% 
  split(.$CANCER) %>% 
  map(summary) %>% 
  pander()

The confident interval does not contain zero == > reject H0 ==> The difference statistically significant (Estimate for difference = 7.14- 6.98 = 0.16 hours= 9.6 minutes)

sleptime_Arthritis <- ggplot(BRFSS, 
                          aes(x = ARTHRITIS, 
                              y = SLEPTIM1,
                              fill = ARTHRITIS)) +
  geom_boxplot() +
  scale_fill_hue(l = 40, c = 35)+
  labs(x = "Arthritis status",
      y = "Sleep duration (hours)",
      caption = "Data source: BRFSS2018") 
 
sleptime_Arthritis +  theme_boxplot

sleptime_pulmond <- ggplot(BRFSS, 
                          aes(x = PULMOND, 
                              y = SLEPTIM1,
                              fill = PULMOND)) +
  geom_boxplot() +
  scale_fill_hue(l = 40, c = 35)+
  labs(x = "Pulmonary disease status",
      y = "Sleep duration (hours)",
      caption = "Data source: BRFSS2018") 
 

sleptime_pulmond +  theme_boxplot

sleptime_ASTHMA <- ggplot(BRFSS, 
                          aes(x = ASTHMA, 
                              y = SLEPTIM1,
                              fill = ASTHMA)) +
  geom_boxplot() +
  scale_fill_hue(l = 40, c = 35)+
  labs(x = "Asthma status",
      y = "Sleep duration (hours)",
      caption = "Data source: BRFSS2018") 
 
sleptime_ASTHMA +  theme_boxplot

sleptime_DEPRESSION <- ggplot(BRFSS, 
                          aes(x = DEPRESSION, 
                              y = SLEPTIM1,
                              fill = DEPRESSION)) +
  geom_boxplot() +
  scale_fill_hue(l = 40, c = 35)+
  labs(x = "Depressive disorder status",
      y = "Sleep duration (hours)",
      caption = "Data source: BRFSS2018") 
 
sleptime_DEPRESSION + theme_boxplot

SLEPTIME and other risk factors (multiple levels)

BRFSS %>% 
  group_by(AGE) %>% 
  dplyr::summarise(count = n(),
              meanslpt =  mean(SLEPTIM1),
            stdslpt = sd(SLEPTIM1))
## # A tibble: 7 x 4
##   AGE   count meanslpt stdslpt
##   <fct> <int>    <dbl>   <dbl>
## 1 18-29 29415     6.92    1.27
## 2 30-39 34450     6.79    1.24
## 3 40-49 38340     6.82    1.25
## 4 50-59 53253     6.87    1.29
## 5 60-69 63216     7.09    1.28
## 6 70-79 42788     7.32    1.28
## 7 80+   18110     7.50    1.40
ggboxplot(BRFSS, 
          x = "AGE", 
          y = "SLEPTIM1",
          fill = "AGE",
          ylab = "Sleep duration (hours)", 
          xlab = "Age group") +
  theme_boxplot

# Compute the analysis of variance
age.aov <- aov(SLEPTIM1 ~ AGE, data = BRFSS)
# Summary of the analysis
summary(age.aov)
##                 Df Sum Sq Mean Sq F value Pr(>F)    
## AGE              6  13090  2181.7    1335 <2e-16 ***
## Residuals   279565 456789     1.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As the p-value is less than the significance level 0.05, we can conclude that there are significant differences between the groups

TukeyHSD(age.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = SLEPTIM1 ~ AGE, data = BRFSS)
## 
## $AGE
##                    diff          lwr         upr     p adj
## 30-39-18-29 -0.13357706 -0.163495798 -0.10365832 0.0000000
## 40-49-18-29 -0.09810146 -0.127312774 -0.06889015 0.0000000
## 50-59-18-29 -0.05324172 -0.080619810 -0.02586364 0.0000002
## 60-69-18-29  0.16682431  0.140224952  0.19342366 0.0000000
## 70-79-18-29  0.39362814  0.365083602  0.42217268 0.0000000
## 80+-18-29    0.58132152  0.545724945  0.61691810 0.0000000
## 40-49-30-39  0.03547560  0.007498282  0.06345291 0.0035015
## 50-59-30-39  0.08033534  0.054277917  0.10639276 0.0000000
## 60-69-30-39  0.30040137  0.275163396  0.32563934 0.0000000
## 70-79-30-39  0.52720520  0.499924801  0.55448560 0.0000000
## 80+-30-39    0.71489858  0.680307459  0.74948971 0.0000000
## 50-59-40-49  0.04485974  0.019617732  0.07010175 0.0000033
## 60-69-40-49  0.26492577  0.240530586  0.28932095 0.0000000
## 70-79-40-49  0.49172961  0.465226962  0.51823225 0.0000000
## 80+-40-49    0.67942299  0.645441878  0.71340410 0.0000000
## 60-69-50-59  0.22006603  0.197898848  0.24223321 0.0000000
## 70-79-50-59  0.44686987  0.422402559  0.47133717 0.0000000
## 80+-50-59    0.63456325  0.602144507  0.66698199 0.0000000
## 70-79-60-69  0.22680384  0.203211145  0.25039653 0.0000000
## 80+-60-69    0.41449722  0.382733390  0.44626104 0.0000000
## 80+-70-79    0.18769338  0.154283715  0.22110305 0.0000000
# 1. Homogeneity of variances
plot(age.aov, 1)

In the plot above, there is no evident relationships between residuals and fitted values (the mean of each groups), which is good. So, we can assume the homogeneity of variances.

plot(age.aov, 2)

SLEPTIM and GENDER

ggboxplot(BRFSS, 
          x = "GENDER", 
          y = "SLEPTIM1",
          fill = "GENDER",
          ylab = "Sleep duration (hours)", 
          xlab = "Gender") +
 theme_boxplot

t.test(BRFSS$SLEPTIM1~BRFSS$GENDER)
## 
##  Welch Two Sample t-test
## 
## data:  BRFSS$SLEPTIM1 by BRFSS$GENDER
## t = 7.4379, df = 278554, p-value = 1.026e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.02685357 0.04606973
## sample estimates:
## mean in group Female   mean in group Male 
##             7.035489             6.999027

SLEPTIME and INCOME

ggboxplot(BRFSS, 
          x = "INCOME", 
          y = "SLEPTIM1",
          fill = "INCOME",
          ylab = "Sleep duration (hours)", 
          xlab = "Income range") +
  theme_boxplot

BRFSS %>% 
  group_by(INCOME) %>% 
  dplyr::summarise(count = n(),
              meanslpt =  mean(SLEPTIM1),
            stdslpt = sd(SLEPTIM1)) %>% 
  pander()
INCOME count meanslpt stdslpt
<$20K 40024 6.934 1.678
$20K-<$35K 51546 7.023 1.435
$35K-<$50K 38677 7.042 1.289
$50K-<$75K 47127 7.04 1.193
$75K+ 102198 7.03 1.081
# Compute the analysis of variance
income.aov <- aov(SLEPTIM1 ~ INCOME, data = BRFSS)
# Summary of the analysis
summary(income.aov)
##                 Df Sum Sq Mean Sq F value Pr(>F)    
## INCOME           4    342   85.50   50.91 <2e-16 ***
## Residuals   279567 469537    1.68                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

H0 : There is no difference between group Ha : A least one of them is different p < 0.01 ==> reject the null hypothesis = accept the Ha

TukeyHSD(income.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = SLEPTIM1 ~ INCOME, data = BRFSS)
## 
## $INCOME
##                               diff          lwr         upr     p adj
## $20K-<$35K-<$20K       0.088680936  0.065129401 0.112232470 0.0000000
## $35K-<$50K-<$20K       0.107635460  0.082429467 0.132841454 0.0000000
## $50K-<$75K-<$20K       0.105910131  0.081880817 0.129939446 0.0000000
## $75K+-<$20K            0.095806543  0.074961525 0.116651561 0.0000000
## $35K-<$50K-$20K-<$35K  0.018954524 -0.004826748 0.042735797 0.1894985
## $50K-<$75K-$20K-<$35K  0.017229196 -0.005301112 0.039759503 0.2260634
## $75K+-$20K-<$35K       0.007125607 -0.011972078 0.026223293 0.8473075
## $50K-<$75K-$35K-<$50K -0.001725329 -0.025979856 0.022529198 0.9996844
## $75K+-$35K-<$50K      -0.011828917 -0.032933157 0.009275322 0.5435353
## $75K+-$50K-<$75K      -0.010103589 -0.029787459 0.009580282 0.6275639

***RACE and SLEPTIME

BRFSS %>% 
  group_by(RACE) %>% 
  dplyr::summarise(count = n(),
              meanslpt =  mean(SLEPTIM1),
            stdslpt = sd(SLEPTIM1)) %>% 
  pander()
RACE count meanslpt stdslpt
AIAN 4652 6.922 1.545
Asian 5670 6.768 1.175
Black 21602 6.836 1.548
Hispanic 18717 6.92 1.33
White 220761 7.062 1.252
Others 8170 6.779 1.47
ggboxplot(BRFSS, 
          x = "RACE", 
          y = "SLEPTIM1",
          fill = "RACE",
          ylab = "Sleep duration (hours)", 
          xlab = "Race") +
  theme_boxplot

# Compute the analysis of variance
race.aov <- aov(SLEPTIM1 ~ RACE, data = BRFSS)
# Summary of the analysis
summary(race.aov)
##                 Df Sum Sq Mean Sq F value Pr(>F)    
## RACE             5   2179   435.8   260.5 <2e-16 ***
## Residuals   279566 467700     1.7                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(race.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = SLEPTIM1 ~ RACE, data = BRFSS)
## 
## $RACE
##                         diff         lwr          upr     p adj
## Asian-AIAN      -0.153500116 -0.22641429 -0.080585946 0.0000000
## Black-AIAN      -0.086137012 -0.14571314 -0.026560881 0.0005409
## Hispanic-AIAN   -0.001895133 -0.06227936  0.058489090 0.9999992
## White-AIAN       0.139760404  0.08515321  0.194367596 0.0000000
## Others-AIAN     -0.142439519 -0.21013948 -0.074739561 0.0000000
## Black-Asian      0.067363104  0.01236327  0.122362938 0.0064160
## Hispanic-Asian   0.151604984  0.09573083  0.207479134 0.0000000
## White-Asian      0.293260520  0.24368622  0.342834824 0.0000000
## Others-Asian     0.011060597 -0.05264930  0.074770491 0.9963733
## Hispanic-Black   0.084241879  0.04743482  0.121048942 0.0000000
## White-Black      0.225897416  0.19962098  0.252173848 0.0000000
## Others-Black    -0.056302507 -0.10417515 -0.008429866 0.0104282
## White-Hispanic   0.141655537  0.11359507  0.169716004 0.0000000
## Others-Hispanic -0.140544386 -0.18941901 -0.091669764 0.0000000
## Others-White    -0.282199923 -0.32372606 -0.240673790 0.0000000

****Attribute and the outcome (MICHD)****

geombar_facet <- geom_bar(aes(y = ..prop.., 
                              fill = factor(..x..)), 
                          stat="count",width = 1)
geomtext_facet <- geom_text(size = 4, 
                            aes(label = scales::percent(..prop..),
                                y= ..prop.. ), 
                            stat= "count", vjust = -.25)

A1 REGION

ggplot(BRFSS, aes(x = MICHD , group = Region)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "peachpuff4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~Region) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

Demographic B1.AGE and MICHD

ggplot(BRFSS, aes(x = MICHD , group = AGE)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "royalblue4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ AGE) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

MICHDage <- data.frame(
  age_group = c("18-29","30-39","40-49", "50-59", "60-69","70-79","80+"),
  Percentage = c(0.64, 1.44, 3.10, 6.78, 11.54, 17.64, 23.59))
MICHDage
##   age_group Percentage
## 1     18-29       0.64
## 2     30-39       1.44
## 3     40-49       3.10
## 4     50-59       6.78
## 5     60-69      11.54
## 6     70-79      17.64
## 7       80+      23.59
ggplot(data = MICHDage,
           aes(x = age_group, 
               y  = Percentage)) +
  geom_bar(stat = "identity",
           fill = "royalblue4",
           width =  0.75)+
  geom_text(aes(label = paste0(Percentage,"%")), 
            vjust = -0.5, 
            color = "black", 
            size = 4.5) +
  scale_y_continuous(limits = c(0,25)) +
  theme_minimal() +
  labs(x = "Age group",
       y = "Percentage (%)",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(size = 12),
        legend.position = "non")

ggplot(data = MICHDage,
           aes(x = age_group, 
               y  = Percentage)) +
  geom_point(stat = "identity",
             position = "identity",
             shape = 21, 
             colour = "blue", 
             fill = "blue", size = 3) + 
  geom_text(aes(label = paste0(Percentage,"%")), 
            vjust = -0.5, 
            color = "black", 
            size = 4.5) +
  scale_y_continuous(limits = c(0,25)) +
  theme_minimal() +
  labs(x = "Age group",
       y = "Percentage (%)",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(size = 12),
        legend.position = "non")

B2.GENDER and MICHD

ggplot(BRFSS, aes(x= MICHD , group = GENDER)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "royalblue4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~GENDER) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

table(BRFSS$MICHD,BRFSS$GENDER)
##      
##       Female   Male
##   No  136133 118845
##   Yes   9771  14823
prop.test(table(BRFSS$MICHD, BRFSS$GENDER), correct= FALSE) 
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$MICHD, BRFSS$GENDER)
## X-squared = 1677.6, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.1301941 0.1430238
## sample estimates:
##   prop 1   prop 2 
## 0.533901 0.397292

RACE and MICHD

ggplot(BRFSS, aes(x= MICHD , group = RACE)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "royalblue4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~RACE) +
    scale_y_continuous(labels = scales::percent) + 
  themebar_facet

ggplot(BRFSS, aes(x= MICHD , group = EDU)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "royalblue4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ EDU) +
    scale_y_continuous(labels = scales::percent) +
 themebar_facet

MICHDedu <- data.frame(
  edu_group = c("<8","9-12","12-15", "16+"),
  Percentage = c(16.2, 11.2, 9.2, 6.5))
MICHDedu
##   edu_group Percentage
## 1        <8       16.2
## 2      9-12       11.2
## 3     12-15        9.2
## 4       16+        6.5
positions <- c("<8","9-12","12-15", "16+")

ggplot(data = MICHDedu ,
           aes(x = positions, 
               y  = Percentage)) +
  geom_bar(stat = "identity",
           fill = "royalblue4",
           width = 0.6)+
  geom_text(aes(label = paste0(Percentage,"%")), 
            vjust = -0.5, 
            color = "black", 
            size = 4.5) +
  scale_x_discrete(limits = positions) +
  scale_y_continuous(limits = c(0,20)) +
  theme_minimal() +
  labs(x = "Years of education categories",
       y = "Percentage (%)",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(size = 12),
        legend.position = "non")

ggplot(BRFSS, aes(x= MICHD , group = RELP)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "royalblue4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ RELP) +
    scale_y_continuous(labels = scales::percent) +
 themebar_facet

AGE and widowed

BRFSS %>% 
  group_by(AGE,RELP) %>% 
  dplyr::count()
## # A tibble: 28 x 3
## # Groups:   AGE, RELP [28]
##    AGE   RELP                n
##    <fct> <ord>           <int>
##  1 18-29 In relationship  9521
##  2 18-29 Living apart      991
##  3 18-29 Never married   18861
##  4 18-29 Widowed            42
##  5 30-39 In relationship 21906
##  6 30-39 Living apart     4101
##  7 30-39 Never married    8267
##  8 30-39 Widowed           176
##  9 40-49 In relationship 26044
## 10 40-49 Living apart     6995
## # ... with 18 more rows
ggplot(BRFSS, aes(x= AGE, group = RELP)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat="count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col7) +
    labs(y = "Percent of respondents", fill = "Age group") +
    facet_grid(~ RELP) +
    scale_y_continuous(labels = scales::percent) +
  theme_bar

B6. EMPL

ggplot(BRFSS, aes(x = MICHD , group = EMPL)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "royalblue4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ EMPL) +
  themebar_facet

ggplot(BRFSS, aes(x= AGE, group = EMPL)) + 
    geom_bar(aes(y = ..prop.., 
                 fill = factor(..x..)), 
             stat = "count",
             width = 0.8) +
    geom_text(size = 4, 
              aes(label = scales::percent(..prop..),
                  y= ..prop.. ), 
              stat= "count",
              color = "blue",
              vjust = -.25) +
  scale_fill_manual(values= col7) +
    labs(y = "Percent of respondents", fill = "Age group") +
    facet_grid(~EMPL) +
    scale_y_continuous(labels = scales::percent) +
  theme_bar

B7. Income levels (INCOME)

ggplot(BRFSS, aes(x= MICHD , group = INCOME)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "royalblue4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ INCOME) +
    scale_y_continuous(labels = scales::percent) + 
  themebar_facet

MICHDincome <- data.frame(
  edu_group = c("<$20K","$20K-<$35K","$35K-<$50K", "$50K-<$75K","$75K+"),
  Percentage = c(14.9, 11.5,9.5, 7.8, 5.3))
MICHDincome
##    edu_group Percentage
## 1      <$20K       14.9
## 2 $20K-<$35K       11.5
## 3 $35K-<$50K        9.5
## 4 $50K-<$75K        7.8
## 5      $75K+        5.3
position_edu <- c("<$20K","$20K-<$35K","$35K-<$50K", "$50K-<$75K","$75K+")

ggplot(data = MICHDincome,
           aes(x = position_edu, 
               y = Percentage)) +
  geom_bar(stat = "identity",
           fill = "royalblue4",
           width = 0.6)+
  geom_text(aes(label = paste0(Percentage,"%")), 
            vjust = -0.5, 
            color = "black", 
            size = 4.5) +
  scale_x_discrete(limits = position_edu) +
  scale_y_continuous(limits = c(0,18)) +
  theme_minimal() +
  labs(x = "Income levels",
       y = "Percentage (%)",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(size = 12),
        legend.position = "non")

B8.Geographic locale

ggplot(BRFSS, aes(x= MICHD , group = COMMUN)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "royalblue4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ COMMUN) +
    scale_y_continuous(labels = scales::percent) + 
  themebar_facet

Lifestyle (C) C1. X_SMOKER3 Four-level smoker status —> SMOKESTAT C2. ALCDAY5 Days in past 30 had alcoholic beverage —> ALCpa30 C3. SLEPTIM1 How Much Time Do You Sleep C4. EXERANY2 Exercise in Past 30 Days

C1. X_SMOKER3 Four-level smoker status —> SMOKESTAT

ggplot(BRFSS, aes(x = MICHD , group = SMOKESTAT)) + 
    geombar_facet +
    geomtext_facet  +
  scale_fill_manual(values = c("gainsboro", "olivedrab4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ SMOKESTAT) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

prop.test(table(BRFSS$SMOKESTAT, BRFSS$MICHD), correct=FALSE) 
## 
##  4-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$SMOKESTAT, BRFSS$MICHD)
## X-squared = 3786.7, df = 3, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4 
## 0.8933224 0.9036556 0.8658091 0.9396616

C2. ALCDAY5 Days in past 30 had alcoholic beverage —> ALCpa30

ggplot(BRFSS, aes(x = MICHD , group = ALCpa30)) + 
    geombar_facet +
    geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "olivedrab4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ ALCpa30) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

prop.test(table(BRFSS$ALCpa30, BRFSS$MICHD), correct = FALSE) 
## 
##  3-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$ALCpa30, BRFSS$MICHD)
## X-squared = 2257.3, df = 2, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3 
## 0.8833757 0.9360242 0.9320561

C3. SLEPTIM1 How Much Time Do You Sleep

ggplot(BRFSS, aes(x = MICHD , group = SLEPTIM1)) + 
   geombar_facet +
  geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "olivedrab4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ SLEPTIM1) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

prop.test(table(BRFSS$SLEPTIM1, BRFSS$MICHD), correct = FALSE) 
## 
##  10-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$SLEPTIM1, BRFSS$MICHD)
## X-squared = 2095.6, df = 9, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4    prop 5    prop 6    prop 7    prop 8 
## 0.8150538 0.8381890 0.8964931 0.9178640 0.9332331 0.9088374 0.8906505 0.8343991 
##    prop 9   prop 10 
## 0.8532609 0.8106769
Sleep <- data.frame(
  Duration =  c(3:12),
  Percentage = c(18.49, 16.18, 10.35, 8.21, 6.68, 9.12, 10.93, 16.56, 14.67, 18.93)
)

Sleep
##    Duration Percentage
## 1         3      18.49
## 2         4      16.18
## 3         5      10.35
## 4         6       8.21
## 5         7       6.68
## 6         8       9.12
## 7         9      10.93
## 8        10      16.56
## 9        11      14.67
## 10       12      18.93
ggplot(data = Sleep, aes(x = Duration,
                        y = Percentage)) +
  geom_line(linetype = "dashed", 
            size = 0.5) +
  geom_point(size = 6, color = "blue") +
  
  labs(y = "Percent of respondents (%)", 
       x = "Duration (h)") +
  theme_point

ggplot(data = Sleep,
           aes(x = Duration, 
               y  = Percentage,
               fill = Percentage)) +
  scale_fill_gradient(low = "olivedrab4", high = "darkred")+
  geom_bar(stat = "identity")+
  geom_text(aes(label = paste0(Percentage,"%")), 
            vjust = -0.5, 
            color = "black", 
            size = 4.5) +
  scale_x_continuous(breaks = seq(0,14,by = 1))+
  scale_y_continuous(limits = c(0,20)) +
  theme_minimal() +
  labs(x = "Sleep duration (h)",
       y = "Percentage (%)",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(size = 12),
        legend.position = "non")

C4. EXERANY2 Exercise in Past 30 Days

ggplot(BRFSS, aes(x= MICHD , group = EXERpa30)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "olivedrab4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ EXERpa30) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

prop.test(table(BRFSS$EXERpa30, BRFSS$MICHD), correct=FALSE) 
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$EXERpa30, BRFSS$MICHD)
## X-squared = 2775.8, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.07069454 -0.06482085
## sample estimates:
##    prop 1    prop 2 
## 0.8594115 0.9271692

Health Status (D) D1. GENHLTH

ggplot(BRFSS, aes(x= MICHD , group = GENHLTH)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "orangered4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ GENHLTH) +
    scale_y_continuous(labels = scales::percent)+ 
  themebar_facet

prop.test(table(BRFSS$GENHLTH, BRFSS$MICHD), correct = FALSE) 
## 
##  5-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$GENHLTH, BRFSS$MICHD)
## X-squared = 18845, df = 4, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4    prop 5 
## 0.6677332 0.8065433 0.9026237 0.9550222 0.9779830
Sleep <- data.frame(
  Duration =  c(3:12),
  Percentage = c(18.49, 16.18, 10.35, 8.21, 6.68, 9.12, 10.93, 16.56, 14.67, 18.93)
)

Sleep
##    Duration Percentage
## 1         3      18.49
## 2         4      16.18
## 3         5      10.35
## 4         6       8.21
## 5         7       6.68
## 6         8       9.12
## 7         9      10.93
## 8        10      16.56
## 9        11      14.67
## 10       12      18.93
MICHD_GENHLTH <- data.frame(
  GENHLTH_group = c("Poor","Fair","Good", "Very good","Excellent"),
  Percentage = c(33.2,19.3,9.7, 4.5, 2.2))


position_GENHLTH <- c("Poor","Fair","Good", "Very good","Excellent")

ggplot(data = MICHD_GENHLTH,
           aes(x = position_GENHLTH, 
               y = Percentage)) +
  geom_bar(stat = "identity",
           fill = "purple4",
           width = 0.6)+
  geom_text(aes(label = paste0(Percentage,"%")), 
            vjust = -0.5, 
            color = "black", 
            size = 4.5) +
  scale_x_discrete(limits = position_GENHLTH) +
  scale_y_continuous(limits = c(0,35)) +
  theme_minimal() +
  labs(x = "Would you say that in general your health is:...",
       y = "Percentage (%)",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(size = 12),
        legend.position = "non")

Healthy Days (E)— Health Related Quality of Life E1.PHYSHLTH (Number of Days Physical Health Not Good) E2.MENTHLTH (Number of Days Mental Health Not Good)

ggplot(BRFSS, aes(x= MICHD , group = PHYSHLTHpa30)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "purple4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ PHYSHLTHpa30) +
    scale_y_continuous(labels = scales::percent)+
 themebar_facet

prop.test(table(BRFSS$PHYSHLTHpa30, BRFSS$MICHD), correct = FALSE) 
## 
##  3-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$PHYSHLTHpa30, BRFSS$MICHD)
## X-squared = 8082.2, df = 2, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3 
## 0.9381334 0.9134742 0.8002322
ggplot(BRFSS, aes(x= MICHD , group = MENTHLTHpa30)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "purple4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ MENTHLTHpa30) +
    scale_y_continuous(labels = scales::percent)+
 themebar_facet

prop.test(table(BRFSS$MENTHLTHpa30, BRFSS$MICHD), correct = FALSE) 
## 
##  3-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$MENTHLTHpa30, BRFSS$MICHD)
## X-squared = 661.24, df = 2, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3 
## 0.9128522 0.9303403 0.8823198

Health Care Access (F) F1. HPLN1 (Have any health care coverage) ==> HLTHPLN F2. MEDCOST (Could Not See Doctor Because of Cost) ==> MEDICOST F3. CHECKUP1 ==> CHECKUP

ggplot(BRFSS, aes(x= MICHD , group = HLTHPLN)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ HLTHPLN) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

ggplot(BRFSS, aes(x= MICHD , group = MEDICOST)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ MEDICOST) +
    scale_y_continuous(labels = scales::percent)+
themebar_facet

ggplot(BRFSS, aes(x= MICHD , group = CHECKUP)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ CHECKUP) +
    scale_y_continuous(labels = scales::percent)+
themebar_facet

BRFSS %>% 
  group_by(CHECKUP, MICHD) %>% 
  dplyr::count()
## # A tibble: 8 x 3
## # Groups:   CHECKUP, MICHD [8]
##   CHECKUP MICHD      n
##   <fct>   <fct>  <int>
## 1 <1      No    200580
## 2 <1      Yes    22738
## 3 1-<2    No     26573
## 4 1-<2    Yes     1002
## 5 2-<5    No     13765
## 6 2-<5    Yes      410
## 7 5+      No     14060
## 8 5+      Yes      444

Chronic Health Conditions (G) G1. ASTHMA3 ==> ASTHMA G2. CHCCOPD1 ==> PULMOND G3. HAVARTH3 ==> ARTHRITIS G4. ADDEPEV2 ==> DEPRESSION G5. CHCKDNY1 ==> KIDNEY G6. DIABETE3 ==> DIABETE G7. CHCSCNCR,CHCOCNCR ==> CANCER

BRFSS %>% 
  select(ASTHMA, PULMOND,ARTHRITIS,DEPRESSION, KIDNEY,DIABETE,CANCER, MICHD) %>% 
  group_by (MICHD) %>% 
  count()
##     ASTHMA PULMOND ARTHRITIS DEPRESSION KIDNEY DIABETE CANCER MICHD   freq
## 1       No      No        No         No     No      No     No    No 106889
## 2       No      No        No         No     No      No     No   Yes   3328
## 3       No      No        No         No     No      No    Yes    No  12899
## 4       No      No        No         No     No      No    Yes   Yes   1226
## 5       No      No        No         No     No     Yes     No    No   9088
## 6       No      No        No         No     No     Yes     No   Yes   1161
## 7       No      No        No         No     No     Yes    Yes    No   1689
## 8       No      No        No         No     No     Yes    Yes   Yes    378
## 9       No      No        No         No    Yes      No     No    No    904
## 10      No      No        No         No    Yes      No     No   Yes    128
## 11      No      No        No         No    Yes      No    Yes    No    442
## 12      No      No        No         No    Yes      No    Yes   Yes    100
## 13      No      No        No         No    Yes     Yes     No    No    401
## 14      No      No        No         No    Yes     Yes     No   Yes    165
## 15      No      No        No         No    Yes     Yes    Yes    No    155
## 16      No      No        No         No    Yes     Yes    Yes   Yes     73
## 17      No      No        No        Yes     No      No     No    No  15285
## 18      No      No        No        Yes     No      No     No   Yes    509
## 19      No      No        No        Yes     No      No    Yes    No   1812
## 20      No      No        No        Yes     No      No    Yes   Yes    166
## 21      No      No        No        Yes     No     Yes     No    No   1574
## 22      No      No        No        Yes     No     Yes     No   Yes    221
## 23      No      No        No        Yes     No     Yes    Yes    No    297
## 24      No      No        No        Yes     No     Yes    Yes   Yes     55
## 25      No      No        No        Yes    Yes      No     No    No    217
## 26      No      No        No        Yes    Yes      No     No   Yes     36
## 27      No      No        No        Yes    Yes      No    Yes    No     85
## 28      No      No        No        Yes    Yes      No    Yes   Yes     15
## 29      No      No        No        Yes    Yes     Yes     No    No     97
## 30      No      No        No        Yes    Yes     Yes     No   Yes     40
## 31      No      No        No        Yes    Yes     Yes    Yes    No     34
## 32      No      No        No        Yes    Yes     Yes    Yes   Yes     13
## 33      No      No       Yes         No     No      No     No    No  28413
## 34      No      No       Yes         No     No      No     No   Yes   2730
## 35      No      No       Yes         No     No      No    Yes    No   8670
## 36      No      No       Yes         No     No      No    Yes   Yes   1318
## 37      No      No       Yes         No     No     Yes     No    No   5295
## 38      No      No       Yes         No     No     Yes     No   Yes   1207
## 39      No      No       Yes         No     No     Yes    Yes    No   1748
## 40      No      No       Yes         No     No     Yes    Yes   Yes    503
## 41      No      No       Yes         No    Yes      No     No    No    770
## 42      No      No       Yes         No    Yes      No     No   Yes    201
## 43      No      No       Yes         No    Yes      No    Yes    No    475
## 44      No      No       Yes         No    Yes      No    Yes   Yes    182
## 45      No      No       Yes         No    Yes     Yes     No    No    488
## 46      No      No       Yes         No    Yes     Yes     No   Yes    282
## 47      No      No       Yes         No    Yes     Yes    Yes    No    266
## 48      No      No       Yes         No    Yes     Yes    Yes   Yes    167
## 49      No      No       Yes        Yes     No      No     No    No   8089
## 50      No      No       Yes        Yes     No      No     No   Yes    762
## 51      No      No       Yes        Yes     No      No    Yes    No   2162
## 52      No      No       Yes        Yes     No      No    Yes   Yes    333
## 53      No      No       Yes        Yes     No     Yes     No    No   1826
## 54      No      No       Yes        Yes     No     Yes     No   Yes    458
## 55      No      No       Yes        Yes     No     Yes    Yes    No    523
## 56      No      No       Yes        Yes     No     Yes    Yes   Yes    170
## 57      No      No       Yes        Yes    Yes      No     No    No    320
## 58      No      No       Yes        Yes    Yes      No     No   Yes     81
## 59      No      No       Yes        Yes    Yes      No    Yes    No    165
## 60      No      No       Yes        Yes    Yes      No    Yes   Yes     60
## 61      No      No       Yes        Yes    Yes     Yes     No    No    210
## 62      No      No       Yes        Yes    Yes     Yes     No   Yes    100
## 63      No      No       Yes        Yes    Yes     Yes    Yes    No     85
## 64      No      No       Yes        Yes    Yes     Yes    Yes   Yes     52
## 65      No     Yes        No         No     No      No     No    No   1958
## 66      No     Yes        No         No     No      No     No   Yes    438
## 67      No     Yes        No         No     No      No    Yes    No    612
## 68      No     Yes        No         No     No      No    Yes   Yes    165
## 69      No     Yes        No         No     No     Yes     No    No    346
## 70      No     Yes        No         No     No     Yes     No   Yes    157
## 71      No     Yes        No         No     No     Yes    Yes    No    127
## 72      No     Yes        No         No     No     Yes    Yes   Yes     59
## 73      No     Yes        No         No    Yes      No     No    No     33
## 74      No     Yes        No         No    Yes      No     No   Yes     23
## 75      No     Yes        No         No    Yes      No    Yes    No     41
## 76      No     Yes        No         No    Yes      No    Yes   Yes     29
## 77      No     Yes        No         No    Yes     Yes     No    No     24
## 78      No     Yes        No         No    Yes     Yes     No   Yes     40
## 79      No     Yes        No         No    Yes     Yes    Yes    No     13
## 80      No     Yes        No         No    Yes     Yes    Yes   Yes     13
## 81      No     Yes        No        Yes     No      No     No    No    615
## 82      No     Yes        No        Yes     No      No     No   Yes    117
## 83      No     Yes        No        Yes     No      No    Yes    No    157
## 84      No     Yes        No        Yes     No      No    Yes   Yes     28
## 85      No     Yes        No        Yes     No     Yes     No    No    116
## 86      No     Yes        No        Yes     No     Yes     No   Yes     57
## 87      No     Yes        No        Yes     No     Yes    Yes    No     35
## 88      No     Yes        No        Yes     No     Yes    Yes   Yes     26
## 89      No     Yes        No        Yes    Yes      No     No    No     25
## 90      No     Yes        No        Yes    Yes      No     No   Yes     10
## 91      No     Yes        No        Yes    Yes      No    Yes    No     11
## 92      No     Yes        No        Yes    Yes      No    Yes   Yes      8
## 93      No     Yes        No        Yes    Yes     Yes     No    No     10
## 94      No     Yes        No        Yes    Yes     Yes     No   Yes     14
## 95      No     Yes        No        Yes    Yes     Yes    Yes    No      1
## 96      No     Yes        No        Yes    Yes     Yes    Yes   Yes      5
## 97      No     Yes       Yes         No     No      No     No    No   1926
## 98      No     Yes       Yes         No     No      No     No   Yes    529
## 99      No     Yes       Yes         No     No      No    Yes    No    742
## 100     No     Yes       Yes         No     No      No    Yes   Yes    321
## 101     No     Yes       Yes         No     No     Yes     No    No    487
## 102     No     Yes       Yes         No     No     Yes     No   Yes    278
## 103     No     Yes       Yes         No     No     Yes    Yes    No    186
## 104     No     Yes       Yes         No     No     Yes    Yes   Yes    140
## 105     No     Yes       Yes         No    Yes      No     No    No     76
## 106     No     Yes       Yes         No    Yes      No     No   Yes     55
## 107     No     Yes       Yes         No    Yes      No    Yes    No     67
## 108     No     Yes       Yes         No    Yes      No    Yes   Yes     52
## 109     No     Yes       Yes         No    Yes     Yes     No    No     53
## 110     No     Yes       Yes         No    Yes     Yes     No   Yes     89
## 111     No     Yes       Yes         No    Yes     Yes    Yes    No     33
## 112     No     Yes       Yes         No    Yes     Yes    Yes   Yes     56
## 113     No     Yes       Yes        Yes     No      No     No    No   1044
## 114     No     Yes       Yes        Yes     No      No     No   Yes    326
## 115     No     Yes       Yes        Yes     No      No    Yes    No    374
## 116     No     Yes       Yes        Yes     No      No    Yes   Yes    142
## 117     No     Yes       Yes        Yes     No     Yes     No    No    310
## 118     No     Yes       Yes        Yes     No     Yes     No   Yes    190
## 119     No     Yes       Yes        Yes     No     Yes    Yes    No    100
## 120     No     Yes       Yes        Yes     No     Yes    Yes   Yes     82
## 121     No     Yes       Yes        Yes    Yes      No     No    No     72
## 122     No     Yes       Yes        Yes    Yes      No     No   Yes     40
## 123     No     Yes       Yes        Yes    Yes      No    Yes    No     37
## 124     No     Yes       Yes        Yes    Yes      No    Yes   Yes     37
## 125     No     Yes       Yes        Yes    Yes     Yes     No    No     44
## 126     No     Yes       Yes        Yes    Yes     Yes     No   Yes     63
## 127     No     Yes       Yes        Yes    Yes     Yes    Yes    No     19
## 128     No     Yes       Yes        Yes    Yes     Yes    Yes   Yes     36
## 129    Yes      No        No         No     No      No     No    No  11164
## 130    Yes      No        No         No     No      No     No   Yes    287
## 131    Yes      No        No         No     No      No    Yes    No   1200
## 132    Yes      No        No         No     No      No    Yes   Yes    104
## 133    Yes      No        No         No     No     Yes     No    No    879
## 134    Yes      No        No         No     No     Yes     No   Yes    113
## 135    Yes      No        No         No     No     Yes    Yes    No    185
## 136    Yes      No        No         No     No     Yes    Yes   Yes     46
## 137    Yes      No        No         No    Yes      No     No    No     94
## 138    Yes      No        No         No    Yes      No     No   Yes     15
## 139    Yes      No        No         No    Yes      No    Yes    No     48
## 140    Yes      No        No         No    Yes      No    Yes   Yes      8
## 141    Yes      No        No         No    Yes     Yes     No    No     35
## 142    Yes      No        No         No    Yes     Yes     No   Yes     15
## 143    Yes      No        No         No    Yes     Yes    Yes    No     13
## 144    Yes      No        No         No    Yes     Yes    Yes   Yes      7
## 145    Yes      No        No        Yes     No      No     No    No   3372
## 146    Yes      No        No        Yes     No      No     No   Yes    111
## 147    Yes      No        No        Yes     No      No    Yes    No    345
## 148    Yes      No        No        Yes     No      No    Yes   Yes     28
## 149    Yes      No        No        Yes     No     Yes     No    No    358
## 150    Yes      No        No        Yes     No     Yes     No   Yes     43
## 151    Yes      No        No        Yes     No     Yes    Yes    No     58
## 152    Yes      No        No        Yes     No     Yes    Yes   Yes     15
## 153    Yes      No        No        Yes    Yes      No     No    No     52
## 154    Yes      No        No        Yes    Yes      No     No   Yes      4
## 155    Yes      No        No        Yes    Yes      No    Yes    No     16
## 156    Yes      No        No        Yes    Yes      No    Yes   Yes      7
## 157    Yes      No        No        Yes    Yes     Yes     No    No     19
## 158    Yes      No        No        Yes    Yes     Yes     No   Yes      8
## 159    Yes      No        No        Yes    Yes     Yes    Yes    No     11
## 160    Yes      No        No        Yes    Yes     Yes    Yes   Yes      4
## 161    Yes      No       Yes         No     No      No     No    No   3562
## 162    Yes      No       Yes         No     No      No     No   Yes    341
## 163    Yes      No       Yes         No     No      No    Yes    No   1135
## 164    Yes      No       Yes         No     No      No    Yes   Yes    169
## 165    Yes      No       Yes         No     No     Yes     No    No    784
## 166    Yes      No       Yes         No     No     Yes     No   Yes    192
## 167    Yes      No       Yes         No     No     Yes    Yes    No    280
## 168    Yes      No       Yes         No     No     Yes    Yes   Yes     86
## 169    Yes      No       Yes         No    Yes      No     No    No    120
## 170    Yes      No       Yes         No    Yes      No     No   Yes     28
## 171    Yes      No       Yes         No    Yes      No    Yes    No     70
## 172    Yes      No       Yes         No    Yes      No    Yes   Yes     24
## 173    Yes      No       Yes         No    Yes     Yes     No    No     79
## 174    Yes      No       Yes         No    Yes     Yes     No   Yes     36
## 175    Yes      No       Yes         No    Yes     Yes    Yes    No     35
## 176    Yes      No       Yes         No    Yes     Yes    Yes   Yes     24
## 177    Yes      No       Yes        Yes     No      No     No    No   2102
## 178    Yes      No       Yes        Yes     No      No     No   Yes    189
## 179    Yes      No       Yes        Yes     No      No    Yes    No    511
## 180    Yes      No       Yes        Yes     No      No    Yes   Yes     94
## 181    Yes      No       Yes        Yes     No     Yes     No    No    566
## 182    Yes      No       Yes        Yes     No     Yes     No   Yes    135
## 183    Yes      No       Yes        Yes     No     Yes    Yes    No    145
## 184    Yes      No       Yes        Yes     No     Yes    Yes   Yes     60
## 185    Yes      No       Yes        Yes    Yes      No     No    No     81
## 186    Yes      No       Yes        Yes    Yes      No     No   Yes     22
## 187    Yes      No       Yes        Yes    Yes      No    Yes    No     47
## 188    Yes      No       Yes        Yes    Yes      No    Yes   Yes     20
## 189    Yes      No       Yes        Yes    Yes     Yes     No    No     78
## 190    Yes      No       Yes        Yes    Yes     Yes     No   Yes     39
## 191    Yes      No       Yes        Yes    Yes     Yes    Yes    No     26
## 192    Yes      No       Yes        Yes    Yes     Yes    Yes   Yes     20
## 193    Yes     Yes        No         No     No      No     No    No    921
## 194    Yes     Yes        No         No     No      No     No   Yes    131
## 195    Yes     Yes        No         No     No      No    Yes    No    236
## 196    Yes     Yes        No         No     No      No    Yes   Yes     73
## 197    Yes     Yes        No         No     No     Yes     No    No    159
## 198    Yes     Yes        No         No     No     Yes     No   Yes     73
## 199    Yes     Yes        No         No     No     Yes    Yes    No     68
## 200    Yes     Yes        No         No     No     Yes    Yes   Yes     32
## 201    Yes     Yes        No         No    Yes      No     No    No     33
## 202    Yes     Yes        No         No    Yes      No     No   Yes     12
## 203    Yes     Yes        No         No    Yes      No    Yes    No     12
## 204    Yes     Yes        No         No    Yes      No    Yes   Yes      8
## 205    Yes     Yes        No         No    Yes     Yes     No    No      8
## 206    Yes     Yes        No         No    Yes     Yes     No   Yes     14
## 207    Yes     Yes        No         No    Yes     Yes    Yes    No      4
## 208    Yes     Yes        No         No    Yes     Yes    Yes   Yes      9
## 209    Yes     Yes        No        Yes     No      No     No    No    462
## 210    Yes     Yes        No        Yes     No      No     No   Yes     89
## 211    Yes     Yes        No        Yes     No      No    Yes    No     93
## 212    Yes     Yes        No        Yes     No      No    Yes   Yes     29
## 213    Yes     Yes        No        Yes     No     Yes     No    No    123
## 214    Yes     Yes        No        Yes     No     Yes     No   Yes     44
## 215    Yes     Yes        No        Yes     No     Yes    Yes    No     29
## 216    Yes     Yes        No        Yes     No     Yes    Yes   Yes     16
## 217    Yes     Yes        No        Yes    Yes      No     No    No     16
## 218    Yes     Yes        No        Yes    Yes      No     No   Yes      9
## 219    Yes     Yes        No        Yes    Yes      No    Yes    No     11
## 220    Yes     Yes        No        Yes    Yes      No    Yes   Yes      3
## 221    Yes     Yes        No        Yes    Yes     Yes     No    No      9
## 222    Yes     Yes        No        Yes    Yes     Yes     No   Yes     10
## 223    Yes     Yes        No        Yes    Yes     Yes    Yes    No     11
## 224    Yes     Yes        No        Yes    Yes     Yes    Yes   Yes      7
## 225    Yes     Yes       Yes         No     No      No     No    No   1017
## 226    Yes     Yes       Yes         No     No      No     No   Yes    275
## 227    Yes     Yes       Yes         No     No      No    Yes    No    445
## 228    Yes     Yes       Yes         No     No      No    Yes   Yes    154
## 229    Yes     Yes       Yes         No     No     Yes     No    No    374
## 230    Yes     Yes       Yes         No     No     Yes     No   Yes    192
## 231    Yes     Yes       Yes         No     No     Yes    Yes    No    153
## 232    Yes     Yes       Yes         No     No     Yes    Yes   Yes    112
## 233    Yes     Yes       Yes         No    Yes      No     No    No     62
## 234    Yes     Yes       Yes         No    Yes      No     No   Yes     38
## 235    Yes     Yes       Yes         No    Yes      No    Yes    No     45
## 236    Yes     Yes       Yes         No    Yes      No    Yes   Yes     32
## 237    Yes     Yes       Yes         No    Yes     Yes     No    No     54
## 238    Yes     Yes       Yes         No    Yes     Yes     No   Yes     64
## 239    Yes     Yes       Yes         No    Yes     Yes    Yes    No     35
## 240    Yes     Yes       Yes         No    Yes     Yes    Yes   Yes     45
## 241    Yes     Yes       Yes        Yes     No      No     No    No    976
## 242    Yes     Yes       Yes        Yes     No      No     No   Yes    280
## 243    Yes     Yes       Yes        Yes     No      No    Yes    No    306
## 244    Yes     Yes       Yes        Yes     No      No    Yes   Yes    149
## 245    Yes     Yes       Yes        Yes     No     Yes     No    No    403
## 246    Yes     Yes       Yes        Yes     No     Yes     No   Yes    230
## 247    Yes     Yes       Yes        Yes     No     Yes    Yes    No    143
## 248    Yes     Yes       Yes        Yes     No     Yes    Yes   Yes    102
## 249    Yes     Yes       Yes        Yes    Yes      No     No    No     78
## 250    Yes     Yes       Yes        Yes    Yes      No     No   Yes     49
## 251    Yes     Yes       Yes        Yes    Yes      No    Yes    No     44
## 252    Yes     Yes       Yes        Yes    Yes      No    Yes   Yes     43
## 253    Yes     Yes       Yes        Yes    Yes     Yes     No    No     76
## 254    Yes     Yes       Yes        Yes    Yes     Yes     No   Yes     79
## 255    Yes     Yes       Yes        Yes    Yes     Yes    Yes    No     35
## 256    Yes     Yes       Yes        Yes    Yes     Yes    Yes   Yes     52

G1. ASTHMA

table1 <- table(BRFSS$ASTHMA, BRFSS$MICHD)
table1
##      
##           No    Yes
##   No  221067  19845
##   Yes  33911   4749
ggplot(BRFSS, aes(x = MICHD , group = ASTHMA)) + 
  geombar_facet +
  geomtext_facet  +
  scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ ASTHMA) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

prop.test(table(BRFSS$ASTHMA, BRFSS$MICHD), correct = FALSE) 
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$ASTHMA, BRFSS$MICHD)
## X-squared = 679.92, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.03701430 0.04391704
## sample estimates:
##    prop 1    prop 2 
## 0.9176255 0.8771599

G2. PULMOND

ggplot(BRFSS, aes(x= MICHD , group = PULMOND)) + 
  geombar_facet +
  geomtext_facet  +
  scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ PULMOND) +
  themebar_facet

prop.test(table(BRFSS$PULMOND, BRFSS$MICHD), correct = FALSE) 
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$PULMOND, BRFSS$MICHD)
## X-squared = 10375, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.1958027 0.2076973
## sample estimates:
##    prop 1    prop 2 
## 0.9280610 0.7263111

G3. ARTHRITIS

ggplot(BRFSS, aes(x = MICHD , group = ARTHRITIS)) + 
  geombar_facet +
  geomtext_facet  +
  scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ ARTHRITIS) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

prop.test(table(BRFSS$ARTHRITIS, BRFSS$MICHD), correct=FALSE) 
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$ARTHRITIS, BRFSS$MICHD)
## X-squared = 7715.3, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.09723545 0.10231039
## sample estimates:
##    prop 1    prop 2 
## 0.9453475 0.8455746

G4. DEPRESSION

ggplot(BRFSS, aes(x = MICHD, group = DEPRESSION)) + 
  geombar_facet +
  geomtext_facet  +
  scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ DEPRESSION) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

prop.test(table(BRFSS$DEPRESSION, BRFSS$MICHD), correct = FALSE) 
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$DEPRESSION, BRFSS$MICHD)
## X-squared = 761.52, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.03484365 0.04080930
## sample estimates:
##    prop 1    prop 2 
## 0.9191460 0.8813195

G5.KIDNEY

ggplot(BRFSS, aes(x = MICHD , group = KIDNEY)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ KIDNEY) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

prop.test(table(BRFSS$KIDNEY, BRFSS$MICHD), correct=FALSE) 
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$KIDNEY, BRFSS$MICHD)
## X-squared = 5868.8, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.2114302 0.2294740
## sample estimates:
##    prop 1    prop 2 
## 0.9199546 0.6995025

G6. DIABETE3 ==> DIABETE

ggplot(BRFSS, aes(x = MICHD , group = DIABETE)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ DIABETE) +
  themebar_facet

prop.test(table(BRFSS$DIABETE, BRFSS$MICHD), correct = FALSE) 
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$DIABETE, BRFSS$MICHD)
## X-squared = 8616.9, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.1390752 0.1474029
## sample estimates:
##    prop 1    prop 2 
## 0.9321083 0.7888693

G7. CHCSCNCR,CHCOCNCR ==> CANCER

ggplot(BRFSS, aes(x = MICHD , group = CANCER)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ CANCER) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

prop.test(table(BRFSS$CANCER, BRFSS$MICHD), correct = FALSE) 
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$CANCER, BRFSS$MICHD)
## X-squared = 3695.9, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.08329296 0.09022248
## sample estimates:
##    prop 1    prop 2 
## 0.9267544 0.8399966
ggplot(BRFSS, aes(x = MICHD , group = STROKE)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ STROKE) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

Among pp who have MICHD

CHC <- c("ARTHRITIS", "ASHMA", " CANCER","DEPRESSION", "DIABETE","KIDNEY", "PULMON", "STROKE")
CHC_NO <- c(7.0,9.6,8.7,9.4,8.0,9.0,8.0,8.0)
CHC_YES <- c(17,14.9,17.9,14.3,23,32,30,36.0)
dataCHC <- cbind.data.frame(CHC, CHC_NO, CHC_YES)
dataCHC
##          CHC CHC_NO CHC_YES
## 1  ARTHRITIS    7.0    17.0
## 2      ASHMA    9.6    14.9
## 3     CANCER    8.7    17.9
## 4 DEPRESSION    9.4    14.3
## 5    DIABETE    8.0    23.0
## 6     KIDNEY    9.0    32.0
## 7     PULMON    8.0    30.0
## 8     STROKE    8.0    36.0
library(ggthemes)
## 
## Attaching package: 'ggthemes'
## The following object is masked from 'package:cowplot':
## 
##     theme_map
CHC_MICHD <- read.csv(file="C:/Users/user/Desktop/Capstone Project/Submission/R codes/Chronic health.csv", header=TRUE, sep=",")
CHC_MICHD
##    ï..ChronicHLTH CHC.status MICHD.NO MICHD.YES
## 1          ASTHMA         NO     90.4       9.6
## 2          ASTHMA        YES     85.1      14.9
## 3         PULMOND         NO     92.0       8.0
## 4         PULMOND        YES     70.0      30.0
## 5       ARTHRITIS         NO     93.0       7.0
## 6       ARTHRITIS        YES     83.0      17.0
## 7      DEPRESSION         NO     90.6       9.4
## 8      DEPRESSION        YES     85.7      14.3
## 9          KIDNEY         NO     91.0       9.0
## 10         KIDNEY        YES     68.0      32.0
## 11        DIABETE         NO     92.0       8.0
## 12        DIABETE        YES     77.0      23.0
## 13         CANCER         NO     91.3       8.7
## 14         CANCER        YES     82.1      17.9
## 15         STROKE         NO     92.0       8.0
## 16         STROKE        YES     64.0      36.0
ggplot(CHC_MICHD, aes(CHC.status, MICHD.YES)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ï..ChronicHLTH, nrow = 2) +
  geom_text(aes(label = paste0(MICHD.YES, "%"), 
                y = MICHD.YES),
            vjust = 1.4, 
            size = 5, 
            color = "white") +
  theme(axis.text.x = element_text(angle = 90,  hjust = 1),
        axis.ticks = element_blank(),
        axis.text.y = element_blank()) +
  themebar_facet +
   aes(fill = as.factor(CHC.status)) +
  scale_fill_manual(values = c("#00AFBB", "goldenrod4")) +
  labs(x = "Status (Yes/No) of chronic health condition",
       y = "Percent of respondents (%)",
       caption = "Data source: BRFSS2018") 

Among people having MICHD

ggplot(CHC_MICHD, aes(CHC.status,ï..ChronicHLTH, 
                      fill = MICHD.YES)) +
  geom_tile(show.legend = TRUE) +
  geom_text(aes(label = paste0(MICHD.YES, "%")), color = "white") +
  labs(x = "Chronic Health Conditions Status",
    y = "Chronic Health Conditions",
    color = "Having MICHD") +
  theme(axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 12, face="bold"),
        plot.caption = element_text(color = "grey", size = 12, face = "italic"))

Oral Health (H) H1. RMVTETH4 ==> RMVTETH H2. LASTDEN4 ===> LASTDEN

H1. RMVTETH4 ==> RMVTETH

ggplot(BRFSS, aes(x = MICHD , group = RMVTETH)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values = c("gainsboro", "seagreen4")) +
    labs(y = "Percent of respondents", fill = "MICHD") +
    facet_grid(~ RMVTETH) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

prop.test(table(BRFSS$RMVTETH, BRFSS$MICHD), correct = FALSE)
## 
##  4-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$RMVTETH, BRFSS$MICHD)
## X-squared = 11997, df = 3, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4 
## 0.9553718 0.9040012 0.8193665 0.7600530

H2. LASTDEN4 ===> LASTDENTV

ggplot(BRFSS, aes(x= MICHD , group = LASTDENTV)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "seagreen4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ LASTDENTV) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

prop.test(table(BRFSS$LASTDENTV, BRFSS$MICHD), correct=FALSE)
## 
##  4-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$LASTDENTV, BRFSS$MICHD)
## X-squared = 2054.9, df = 3, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4 
## 0.9248640 0.9072394 0.8948507 0.8479726
MICHDrvmteeth <- data.frame(
  teeth_group = c("0","1-5",">= 6", "All"),
  Percentage = c(4.5, 9.6, 18.1, 24))
MICHDrvmteeth
##   teeth_group Percentage
## 1           0        4.5
## 2         1-5        9.6
## 3        >= 6       18.1
## 4         All       24.0
positions <- c("0","1-5",">= 6", "All")

ggplot(data = MICHDrvmteeth ,
           aes(x = positions, 
               y  = Percentage)) +
  geom_bar(stat = "identity",
           fill = "seagreen4",
           width = 0.6)+
  geom_text(aes(label = paste0(Percentage,"%")), 
            vjust = -0.5, 
            color = "black", 
            size = 4.5) +
  scale_x_discrete(limits = positions) +
  scale_y_continuous(limits = c(0,25)) +
  theme_minimal() +
  labs(x = "Number of teeth were removed",
       y = "Percentage (%)",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(size = 12),
        legend.position = "non")

MICHD_lastvdent <- data.frame(
  teeth_group = c("<1","1-<2","2-<5", "5+"),
  Percentage = c(7.5, 9.3, 10.5, 15.2))

positions <- c("<1","1-<2","2-<5", "5+")

ggplot(data = MICHD_lastvdent,
           aes(x = positions, 
               y  = Percentage)) +
  geom_bar(stat = "identity",
           fill = "seagreen4",
           width = 0.6) +
  geom_text(aes(label = paste0(Percentage,"%")), 
            vjust = -0.5, 
            color = "black", 
            size = 4.5) +
  scale_x_discrete(limits = positions) +
  scale_y_continuous(limits = c(0,20)) +
  theme_minimal() +
  labs(x = "Last time you visited dental clinic ",
       y = "Percentage (%)",
       caption = "Data source: BRFSS2018") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 12, face = "bold"),
        plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
        legend.title = element_text(color = "black", size = 12),
        legend.text = element_text(size = 12),
        legend.position = "non")

MICAT (I) I1. X_BMI5CAT ===> BMICAT

ggplot(BRFSS, aes(x= MICHD , group = BMICAT)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "firebrick4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ BMICAT) +
    scale_y_continuous(labels = scales::percent) +
  themebar_facet

prop.test(table(BRFSS$BMICAT, BRFSS$MICHD), correct = FALSE)
## 
##  4-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$BMICAT, BRFSS$MICHD)
## X-squared = 1051.4, df = 3, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4 
## 0.9099751 0.9359832 0.9104626 0.8920471

COMMUNITY (J)

ggplot(BRFSS, aes(x= MICHD , group = COMMUN)) + 
  geombar_facet +
  geomtext_facet +
  scale_fill_manual(values= c("gainsboro", "cyan4")) +
    labs(y = "Percent of respondents", fill="MICHD") +
    facet_grid(~ COMMUN) +
    scale_y_continuous(labels = scales::percent)+
  themebar_facet

prop.test(table(BRFSS$COMMUN,BRFSS$MICHD), correct = FALSE) 
## 
##  3-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(BRFSS$COMMUN, BRFSS$MICHD)
## X-squared = 335.84, df = 2, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3 
## 0.8954441 0.8995739 0.9186046