library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## -- Conflicts -------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(ggpol)
library(DataExplorer)
library(knitr)
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.6.3
## corrplot 0.84 loaded
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
BRFSS <- read.csv(file="C:/Users/user/Desktop/Capstone Project/Submission/R codes/BRFSS2018-V3.csv", header = TRUE, sep = ",")
plot_intro(BRFSS)
str(BRFSS)
## 'data.frame': 279572 obs. of 34 variables:
## $ IDAY : int 5 12 3 10 9 10 10 3 11 12 ...
## $ MONTH : Factor w/ 12 levels "Apr","Aug","Dec",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ Region : Factor w/ 4 levels "Midwest ","Northeast",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ AGE : Factor w/ 7 levels "18-29","30-39",..: 7 2 5 6 4 3 7 4 3 6 ...
## $ GENDER : Factor w/ 2 levels "Female","Male": 1 1 2 1 2 1 2 1 1 1 ...
## $ RACE : Factor w/ 6 levels "AIAN","Asian",..: 6 3 6 6 6 3 6 6 6 6 ...
## $ RELP : Factor w/ 4 levels "In relationship",..: 4 3 2 1 1 2 1 1 1 4 ...
## $ EDU : Factor w/ 4 levels "<8","12-15","16+",..: 3 3 4 3 4 3 4 2 2 4 ...
## $ EMPL : Factor w/ 4 levels "Employed","Occupations",..: 1 1 3 2 1 1 3 1 4 3 ...
## $ INCOME : Factor w/ 5 levels "$20K-<$35K","$35K-<$50K",..: 2 1 5 4 4 4 1 2 1 1 ...
## $ RENTHOME : Factor w/ 3 levels "Other","Own",..: 2 3 2 2 2 3 2 2 2 2 ...
## $ COMMUN : Factor w/ 3 levels "Rural","Suburban",..: 1 3 3 3 1 3 3 2 2 3 ...
## $ ALCpa30 : Factor w/ 3 levels "0","1-9","10-30": 1 2 1 1 2 1 1 3 2 1 ...
## $ SMOKESTAT : Factor w/ 4 levels "Daily smoker",..: 3 1 3 3 2 3 2 3 2 3 ...
## $ EXERpa30 : Factor w/ 2 levels "No","Yes": 1 2 2 2 1 2 1 2 2 1 ...
## $ SLEPTIM1 : int 7 5 6 8 6 7 10 6 6 6 ...
## $ GENHLTH : Factor w/ 5 levels "Excellent","Fair",..: 5 3 1 1 5 3 1 3 5 2 ...
## $ MENTHLTHpa30: Factor w/ 3 levels "0","1-9","10-30": 1 1 1 1 1 1 1 2 2 1 ...
## $ PHYSHLTHpa30: Factor w/ 3 levels "0","1-9","10-30": 3 1 1 1 1 2 1 2 2 1 ...
## $ HLTHPLN : Factor w/ 2 levels "No","Yes": 2 1 2 2 2 2 2 2 1 2 ...
## $ CHECKUP : Factor w/ 4 levels "<1","1-<2","2-<5",..: 1 2 1 1 1 1 1 1 1 1 ...
## $ MEDICOST : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 2 1 ...
## $ ARTHRITIS : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 1 2 2 ...
## $ ASTHMA : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 2 1 1 ...
## $ CANCER : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 2 1 1 ...
## $ DEPRESSION : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 1 ...
## $ DIABETE : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 2 ...
## $ KIDNEY : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ PULMOND : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 2 1 1 1 1 ...
## $ STROKE : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ LASTDENTV : Factor w/ 4 levels "<1","1-<2","2-<5",..: 1 2 3 1 1 1 1 1 1 2 ...
## $ RMVTETH : Factor w/ 4 levels ">= 6","0","1-5",..: 3 2 3 2 2 2 1 2 1 3 ...
## $ BMICAT : Factor w/ 4 levels "Normal","Obese",..: 1 2 3 1 3 3 3 2 3 2 ...
## $ MICHD : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
library(ggridges)
library(ggplot2)
1.2. Building Proportion Tables Example
MICHFreq <- table(BRFSS$MICHD)
MICHFreq
##
## No Yes
## 254978 24594
prop.table(MICHFreq)
##
## No Yes
## 0.91202982 0.08797018
library(scales)
library(tidyverse)
library(pander)
Defining some sets of color
col2 <- c("#A9A9A9", "#FF8C00")
col3 <- c("#003f5c", "#bc5090","#ffa600")
col4 <- c("#FB6542","#3F681C","#4D585B", "#375E97")
col5 <- c("#BD3E85","#3F681C","#FFBB00", "#FB6542", "#375E97")
col6 <- c("#0072BB", "#FF4C3B","#FFD034","#51A39D","#06425C", "#C6C8CA")
col7 <- c("#0072BB", "#FF4C3B","#FFD034","#51A39D","#06425C", "#C6C8CA","#814374")
The outcome (MICHD)
The characteristics of the sample were examined by calculating frequencies of categorical variables and the mean value of a continuous variable (sleep duration is only a numeric attribute in the data).
The outcome of interest, MICHD, was defined as a “Yes” response to having been “ever diagnosed with” at least one of the following: myocardial infarction/heart attack, angina or coronary heart disease. Of the 279572 individuals included in this analysis, 24594 respondents had MICHD
BRFSS %>%
group_by(MICHD) %>%
dplyr::count() %>%
pander()
| MICHD | n |
|---|---|
| No | 254978 |
| Yes | 24594 |
df_MICHD <- BRFSS %>%
group_by(MICHD) %>%
dplyr::count() %>%
ungroup()%>%
arrange(desc(MICHD)) %>%
mutate(Percentage = round(n/sum(n),4)*100,
lab.pos = cumsum(Percentage)-0.5*Percentage)
ggplot(data = df_MICHD,
aes(x = "",
y = Percentage,
fill = MICHD))+
geom_bar(stat = "identity")+
coord_polar("y") +
geom_text(aes(y = lab.pos,
label = paste(Percentage,"%", sep = "")),
col = "blue",
size = 6) +
theme( legend.box = "horizontal",
legend.text = element_text(color = "black", size = 12))+
theme_void() +
scale_fill_manual(values = col2)
Region
library(treemapify)
BRFSS %>%
group_by(Region) %>%
dplyr::count() %>%
pander()
| Region | n |
|---|---|
| Midwest | 76268 |
| Northeast | 57288 |
| South | 83856 |
| West | 62160 |
df_region <- BRFSS %>%
group_by(Region) %>%
dplyr::count() %>%
ungroup()%>%
arrange(desc(Region)) %>%
mutate(percentage = round(n/sum(n),4)*100,
lab.pos = cumsum(percentage)-.5*percentage)
ggplot(data = df_region,
aes(x = "",
y = percentage,
fill = Region))+
geom_bar(stat = "identity")+
coord_polar("y") +
geom_text(aes(y = lab.pos,
label = paste(percentage,"%", sep = "")), col = "white", size = 5) +
scale_fill_manual(values = col4) +
theme_void() +
theme(legend.title = element_text(color = "black", size = 14),
legend.text = element_text(color = "black", size = 14))
Demographics (B) B1. AGE B2. GENDER B3. X_RACE B4. RELPT B5. EDU B6. EMPL B7. INCOME1 B8. RENTHOME
Defining theme and geomtext for all bar charts
theme_bar <- theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey44",
size = 12,
face = "italic"),
legend.text = element_text(colour="black", size = 12))
geomtext_bar <- geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.3,
size = 4,
fontface = "bold")
theme_point <- theme(axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey", size = 12, face = "italic"),
legend.text = element_text(colour="black", size = 12),
legend.position="right")
themebar_stacked <- theme(axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey", size = 12, face = "italic"),
legend.text = element_text(colour="black", size = 12),
legend.position="right")
themebar_facet <- theme(axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 12, face= "bold"),
plot.title = element_text(color = "black", size = 12, face = "bold.italic"),
plot.caption = element_text(color = "grey", size = 12, face = "italic"),
strip.text = element_text(size = 12, color = "black", face = "bold.italic"),
strip.background = element_rect(fill = "slategray1"),
legend.text = element_text(colour="black", size = 20),
legend.position = "none")
B1.AGE
ggplot(BRFSS, aes(x = AGE)) +
geom_bar(aes(y = (..count..)/sum(..count..)),
fill = "royalblue4",
width = 0.75) +
scale_y_continuous(labels = percent)+
geom_text(size = 4.5,
color= "black",
aes(y = ((..count..)/sum(..count..)),
label = scales::percent((..count..)/sum(..count..))),
stat = "count",
vjust = -0.3) +
labs(x = "Age group",
y = "Percent of respondents",
caption = "Data source: BRFSS2018") +
theme_bar
Other way to make the same graph
BRFSS %>%
group_by(AGE) %>%
dplyr::summarise(Count = n()) %>%
mutate(Agegroup = factor(AGE),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100)) %>%
ggplot(aes(x = AGE,
y = Percentage)) +
geom_bar(stat = "identity",
fill = "royalblue4",
width = 0.80) +
geomtext_bar +
labs(x = "Age group",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
More than 60% of participants were older than 50 years
library(ggimage)
library(extrafont)
## Registering fonts with R
loadfonts(device = "win")
df1_age <- data.frame(grp = c("18-29","30-39","40-49","50-59","60-69","70-79","80+"),
y = 7:1,
img = rep("./man.png",7),
freq = c(10.52,12.32,13.71,19.05,22.61,15.30,6.48))
df2_age <- df1_age[rep(seq_len(nrow(df1_age)), df1_age$freq), 1:3]
df2_age <- df2_age %>% group_by(grp) %>% mutate(x = row_number())
ggplot(df2_age) +
geom_image(aes(x = x,
y = y,
image = img),
size = 0.045) +
theme_void() +
geom_text(data = df1_age,
aes(y = y + 0.1,
label = paste(freq,"%", sep = "")),
x = 24,
fontface = 2,
size = 4.5,
color = "blue") +
geom_text(data = df1_age,
aes(y = y + 0.2,
label = grp),
x = -0.3,
fontface = 2,
size = 4.5,
color = "black") +
xlim(-1,max(df1_age$freq)+2) +
ylim(0,max(df1_age$y)+1)
B2. GENDER
tbl_gender <- table(BRFSS$GENDER)
tbl_gender
##
## Female Male
## 145904 133668
prop.table(tbl_gender)
##
## Female Male
## 0.5218835 0.4781165
library(png)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
genderselection <- read.table(text="
Gender Freq
F 52
M 48
", header=T)
pcts <- round(prop.table(genderselection$Freq)*100)
# Load png file with man and woman
img <- readPNG("manfemale.png")
h <- dim(img)[1]
w <- dim(img)[2]
# Find the rows where feet starts and head ends
pos1 <- which(apply(img[,,1], 1, function(y) any(y==1)))
mn1 <- min(pos1)
mx1 <- max(pos1)
pospctM <- round((mx1-mn1)*pcts[2]/100+mn1)
pospctF <- round((mx1-mn1)*pcts[1]/100+mn1)
# Fill bodies with a different color according to percentages
imgmtx <- img[h:1,,1]
whitemtx <- (imgmtx==1)
colmtx <- matrix(rep(FALSE,h*w),nrow=h)
midpt <- round(w/2)-10
colmtx[mx1:pospctM,1:midpt] <- TRUE
colmtx[mx1:pospctF,(midpt+1):w] <- TRUE
imgmtx[whitemtx & colmtx] <- 0.5
# Plot matrix using heatmap and print text
labs <- c(paste0(pcts[2], "% Males"),paste0(pcts[1], "% Females"))
ax <- list(ticks = '',
showticklabels = FALSE,
showgrid = FALSE,
zeroline = FALSE)
p <- plot_ly(z = imgmtx,
showscale = FALSE,
type = 'heatmap',
width = 500,
height = 500) %>%
add_text(x = c(100,250), y = c(20,20), type = 'heatmap', mode = "text",
text = labs, showlegend = FALSE, textfont = list(size = 20, color = "#FFFFFF"), inherit = FALSE) %>%
layout(xaxis = ax, yaxis = ax)
p
B3. RACE
BRFSS$RACE <- factor(BRFSS$RACE,
levels = c("AIAN",
"Asian",
"Black",
"Hispanic",
"White",
"Others"), ordered = TRUE)
BRFSS %>%
group_by(RACE) %>%
dplyr::summarise(Count = n()) %>%
mutate(Agegroup = factor(RACE),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100)) %>%
ggplot(aes(x = RACE,
y = Percentage)) +
geom_bar(stat = "identity",
color = "blue",
fill = "royalblue4",
width = 0.75) +
geomtext_bar +
labs(x = "Race",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
The majority of participants are white, followed by black, with very few Asian or American Indians
Alternative graphs
library(treemapify)
library(treemap)
## Warning: package 'treemap' was built under R version 3.6.3
plotdata2 <- BRFSS %>%
dplyr::count(RACE)
ggplot(plotdata2,
aes(fill = RACE,
area = n,
label = RACE)) +
geom_treemap() +
geom_treemap_text(colour = "dark blue",
place = "centre",
fontface = "italic") +
labs(title = "Race of respondents") +
scale_fill_brewer(palette = "RdYlBu")+
theme(legend.position = "none")
Regroup AIAN, Asian, Multiracial and other into a group OTHERs
df2_race <- data.frame(
RACE = c("White", "Black","Hispanic", "Others"),
Percentage = c(78.96, 7.73, 6.69, 6.62))
df3_race <- df2_race %>%
arrange(desc(RACE)) %>%
mutate(lab.pos = cumsum(Percentage),
centre = lab.pos - 0.5*Percentage)
ggplot(data = df3_race,
aes(x = "",
y = Percentage,
fill = RACE))+
geom_bar(stat = "identity")+
coord_polar("y") +
geom_text(aes(y = centre,
label = paste(Percentage,"%", sep = "")),
col = "white") +
theme_void() +
scale_fill_manual(values = col4)
library(ggforce) # for 'geom_arc_bar'
##
## Attaching package: 'ggforce'
## The following objects are masked from 'package:ggpol':
##
## geom_circle, GeomCircle, stat_circle, StatCircle
BRFSS %>%
group_by(RACE) %>%
dplyr::count()
## # A tibble: 6 x 2
## # Groups: RACE [6]
## RACE n
## <ord> <int>
## 1 AIAN 4652
## 2 Asian 5670
## 3 Black 21602
## 4 Hispanic 18717
## 5 White 220761
## 6 Others 8170
df3_race <- data.frame(
RACE = c("White", "Black","Hispanic", "Others"),
Value = c(220761, 21602 , 18717, 18492))
df3_race <- df3_race %>%
mutate(end = 2 * pi * cumsum(Value)/sum(Value),
start = lag(end, default = 0),
middle = 0.5 * (start + end),
hjust = ifelse(middle > pi, 1, 0),
vjust = ifelse(middle < pi/2 | middle > 3 * pi/2, 0, 1))
df3_race$Label <- paste(round(((df3_race$Value/sum(df3_race$Value))*100),2),"%")
ggplot(df3_race) +
geom_arc_bar(aes(x0 = 0,
y0 = 0,
r0 = 0,
r = 1,
start = start,
end = end,
fill = RACE)) +
scale_fill_manual(values = col4) +
geom_text(aes(x = 1.05 * sin(middle),
y = 1.05 * cos(middle),
label = Label,
hjust = hjust,
vjust = vjust)) +
coord_fixed() +
scale_x_continuous(limits = c(-1.5, 1.5), # Adjust so labels are not cut off
name = "", breaks = NULL, labels = NULL) +
scale_y_continuous(limits = c(-1.25,1.25), # Adjust so labels are not cut off
name = "", breaks = NULL, labels = NULL)
xRace <- c(78.96, 7.72, 6.69, 6.63)
labels <- c("White", "Black","Hispanic", "Others")
pct <- xRace/sum(xRace)*100
lbls <- paste(labels, pct) # add percents to labels
lbls <- paste(lbls,"%",sep="") # ad % to labels
pie(xRace,labels = lbls, col= col4)
AIAN 1.66 Asian 2.03 Black 7.73 Hispanic 6.69 White 78.96 Others 2.92
xRace2 <- c(78.96, 7.73, 6.69,1.66, 2.03, 2.92)
labels2 <- c("White", "Black", "Hispanic","AIAN", "Asian", "Others")
pct2 <- round(xRace2/sum(xRace2)*100,2)
lbls2 <- paste(labels2, pct2) # add percents to labels
lbls2 <- paste(lbls2,"%",sep="") # ad % to labels
pie(xRace,labels = lbls2, col= col6)
B5. EDUCATION(EDU)
BRFSS$EDU <- factor(BRFSS$EDU,
levels = c("<8",
"9-12",
"12-15",
"16+"), ordered = TRUE)
BRFSS %>%
group_by(EDU) %>%
dplyr::summarise(Count = n()) %>%
mutate(EDUgroup = factor(EDU),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100)) %>%
ggplot(aes(x = EDU ,
y = Percentage)) +
geom_bar(stat = "identity",
color = "blue",
fill = "royalblue4",
width = 0.5) +
geomtext_bar +
labs(x = "Years of education",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
library(echarts4r)
## Warning: package 'echarts4r' was built under R version 3.6.3
## Welcome to echarts4r
##
## Docs: echarts4r.john-coene.com
df2_edu <- data.frame(
x = c("<8","9-12", "12-15", "16+"),
y = c(1.64, 29.41, 27.99, 40.95))
df2_edu %>%
e_charts(x) %>%
e_pictorial(y, symbol = paste0("image://","https://1.bp.blogspot.com/-klwxpFekdEQ/XOubIhkalyI/AAAAAAAAHlE/25psl9x4oNkbJoLc2CKTXgV2pEj6tAvigCLcBGAs/s1600/pencil.png", size = 0.001)) %>%
e_theme("westeros") %>%
e_labels(show = TRUE, postion = "top", fontSize = 20, color = "blue")%>%
e_legend(show = FALSE) %>%
e_x_axis(splitLine=list(show = FALSE,fontSize = 30)) %>%
e_y_axis(show=FALSE, splitLine=list(show = FALSE,fontSize = 20))
## Warning in if (deparse(substitute(symbol)) %in% colnames(e$x$data[[1]])) symbol
## <- deparse(substitute(symbol)): the condition has length > 1 and only the first
## element will be used
B6. EMPLOYMENT
BRFSS$EMPL <- factor(BRFSS$EMPL,
levels = c("Employed",
"Unemployed",
"Occupations",
"Retired"), ordered = TRUE)
tbl_empl <- table(BRFSS$EMPL)
pro_tbl_empl <- prop.table(tbl_empl)
pro_tbl_empl
##
## Employed Unemployed Occupations Retired
## 0.55142861 0.09831099 0.06583993 0.28442047
df_empl <- BRFSS %>%
group_by(EMPL) %>%
dplyr::summarise(Count = n()) %>%
mutate(EMPLgroup = factor(EMPL),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
df_empl %>%
pander()
| EMPL | Count | EMPLgroup | Percentage | label |
|---|---|---|---|---|
| Employed | 154164 | Employed | 55.14 | 55.1% |
| Unemployed | 27485 | Unemployed | 9.83 | 9.8% |
| Occupations | 18407 | Occupations | 6.58 | 6.6% |
| Retired | 79516 | Retired | 28.44 | 28.4% |
ggplot(df_empl,
aes(x = EMPL,
y = Percentage)) +
geom_bar(stat = "identity",
color = "blue",
fill = "royalblue4",
width = 0.5) +
geomtext_bar +
labs(x = "Employment status",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
B4. RELP (Relationship STATUS)
BRFSS %>%
group_by(RELP) %>%
dplyr::count()
## # A tibble: 4 x 2
## # Groups: RELP [4]
## RELP n
## <fct> <int>
## 1 In relationship 161013
## 2 Living apart 44731
## 3 Never married 45149
## 4 Widowed 28679
BRFSS$RELP <- factor(BRFSS$RELP,
levels = c("In relationship",
"Living apart",
"Never married",
"Widowed"), ordered = TRUE)
The table is maybe better
df_RELP <- BRFSS %>%
group_by(RELP) %>%
dplyr::summarise(Count = n()) %>%
mutate(RELPgroup = factor(RELP),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
df_RELP %>%
pander()
| RELP | Count | RELPgroup | Percentage | label |
|---|---|---|---|---|
| In relationship | 161013 | In relationship | 57.59 | 57.59% |
| Living apart | 44731 | Living apart | 16 | 16.00% |
| Never married | 45149 | Never married | 16.15 | 16.15% |
| Widowed | 28679 | Widowed | 10.26 | 10.26% |
ggplot(df_RELP,
aes(x = RELP,
y = Percentage)) +
geom_bar(stat = "identity",
color = "blue",
fill = "royalblue4",
width = 0.5) +
geomtext_bar +
labs(x = "Relationship status",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
B5. INCOME (LEVELS of INCOME)
BRFSS$INCOME <- factor(BRFSS$INCOME,
levels = c("<$20K",
"$20K-<$35K",
"$35K-<$50K",
"$50K-<$75K",
"$75K+"), ordered = TRUE)
df_income <- BRFSS %>%
group_by(INCOME) %>%
dplyr::summarise(Count = n()) %>%
mutate(Income_range = factor(INCOME),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
df_income %>%
pander()
| INCOME | Count | Income_range | Percentage | label |
|---|---|---|---|---|
| <$20K | 40024 | <$20K | 14.32 | 14.32% |
| $20K-<$35K | 51546 | $20K-<$35K | 18.44 | 18.44% |
| $35K-<$50K | 38677 | $35K-<$50K | 13.83 | 13.83% |
| $50K-<$75K | 47127 | $50K-<$75K | 16.86 | 16.86% |
| $75K+ | 102198 | $75K+ | 36.56 | 36.56% |
ggplot(df_income,
aes(x = INCOME,
y = Percentage)) +
geom_bar(stat = "identity",
color = "blue",
fill = "royalblue4",
width = 0.5) +
geomtext_bar +
labs(x = "Income range",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
For presentation
library(ggimage)
library(extrafont)
loadfonts(device = "win")
df1Income <- data.frame(grp = c("<$20K","$20K-<$35K","$35K-<$50K","$50K-<$75K","$75K+"),
y = 5:1,
img = rep("./coin.jpg",5),
freq=c(14.32,18.44,13.83,16.86,36.56))
df2Income<- df1Income[rep(seq_len(nrow(df1Income)), df1Income$freq), 1:3]
df2Income <- df2Income %>% group_by(grp) %>% mutate(x = row_number())
ggplot(df2Income) +
geom_image(aes(x = x,
y = y,
image = img), size = 0.045) +
theme_void() +
geom_text(data = df1Income, aes(y = y + 0.1, label=paste(freq,"%", sep = "")), x= -0.5,fontface = 2, size = 4, color="blue") +
geom_text(data = df1Income, aes(y = y-.3, label = grp), x = 0.5, fontface = 2, size = 4, color="black") +
xlim(-1,max(df1Income$freq)+2) + ylim(0,max(df1Income$y)+1)
Examining the association between income and education level The question we’d like to answer is: is edcuaiton level related to revenue?
ggplot(BRFSS, aes(x= EDU , group = INCOME)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="INCOME") +
facet_grid(~ INCOME) +
scale_y_continuous(labels = scales::percent)+
labs(x = "Year of education",
y = "Percent of respondents",
caption = "Data source: BRFSS2018") +
themebar_facet
df_EduIncome <- BRFSS %>%
group_by(EDU, INCOME) %>%
dplyr::summarise(Count = n())
df_EduIncome
## # A tibble: 20 x 3
## # Groups: EDU [4]
## EDU INCOME Count
## <ord> <ord> <int>
## 1 <8 <$20K 2373
## 2 <8 $20K-<$35K 1322
## 3 <8 $35K-<$50K 439
## 4 <8 $50K-<$75K 216
## 5 <8 $75K+ 244
## 6 9-12 <$20K 21070
## 7 9-12 $20K-<$35K 22708
## 8 9-12 $35K-<$50K 12608
## 9 9-12 $50K-<$75K 11564
## 10 9-12 $75K+ 14283
## 11 12-15 <$20K 11238
## 12 12-15 $20K-<$35K 16782
## 13 12-15 $35K-<$50K 12896
## 14 12-15 $50K-<$75K 14518
## 15 12-15 $75K+ 22825
## 16 16+ <$20K 5343
## 17 16+ $20K-<$35K 10734
## 18 16+ $35K-<$50K 12734
## 19 16+ $50K-<$75K 20829
## 20 16+ $75K+ 64846
library(tidyverse)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:plotly':
##
## arrange, mutate, rename, summarise
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
df2_EduIncome <- ddply(df_EduIncome,.(INCOME),transform, Percentage = Count*100/sum(Count))
df2_EduIncome
## EDU INCOME Count Percentage
## 1 <8 <$20K 2373 5.9289426
## 2 9-12 <$20K 21070 52.6434140
## 3 12-15 <$20K 11238 28.0781531
## 4 16+ <$20K 5343 13.3494903
## 5 <8 $20K-<$35K 1322 2.5646995
## 6 9-12 $20K-<$35K 22708 44.0538548
## 7 12-15 $20K-<$35K 16782 32.5573274
## 8 16+ $20K-<$35K 10734 20.8241183
## 9 <8 $35K-<$50K 439 1.1350415
## 10 9-12 $35K-<$50K 12608 32.5981850
## 11 12-15 $35K-<$50K 12896 33.3428136
## 12 16+ $35K-<$50K 12734 32.9239600
## 13 <8 $50K-<$75K 216 0.4583360
## 14 9-12 $50K-<$75K 11564 24.5379506
## 15 12-15 $50K-<$75K 14518 30.8061196
## 16 16+ $50K-<$75K 20829 44.1975937
## 17 <8 $75K+ 244 0.2387522
## 18 9-12 $75K+ 14283 13.9758117
## 19 12-15 $75K+ 22825 22.3340966
## 20 16+ $75K+ 64846 63.4513396
ggplot( )+
geom_bar(data=df2_EduIncome,
aes(x = INCOME,
y = Percentage,
fill = EDU),
stat = "identity")+
labs(y = "Percent of respondents (%)",
x = "Salary Bracket") +
scale_fill_manual(values = col4) +
scale_y_continuous(breaks = seq(0, 100, 20)) +
themebar_stacked
ggplot(data=df2_EduIncome, aes(x = INCOME,
y = Percentage,
group = EDU)) +
geom_line(linetype = "dashed",
aes(color = EDU),
size = 0.5) +
geom_point(aes(color = EDU),
size = 6) +
scale_color_manual(values = col4) +
labs(y = "Percent of respondents (%)", x = "Salary Bracket") +
theme_point +
guides(color=guide_legend("Year of education"))
ggplot(BRFSS, aes(x= INCOME , group = EDU)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col5) +
labs(y = "Percent of respondents", fill = "INCOME") +
facet_grid(~ EDU) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
df3_EduIncome <- ddply(df_EduIncome,.(EDU),transform, Percentage = Count*100/sum(Count))
df3_EduIncome
## EDU INCOME Count Percentage
## 1 <8 <$20K 2373 51.654332
## 2 <8 $20K-<$35K 1322 28.776665
## 3 <8 $35K-<$50K 439 9.555943
## 4 <8 $50K-<$75K 216 4.701785
## 5 <8 $75K+ 244 5.311276
## 6 9-12 <$20K 21070 25.622317
## 7 9-12 $20K-<$35K 22708 27.614218
## 8 9-12 $35K-<$50K 12608 15.332044
## 9 9-12 $50K-<$75K 11564 14.062481
## 10 9-12 $75K+ 14283 17.368939
## 11 12-15 <$20K 11238 14.360010
## 12 12-15 $20K-<$35K 16782 21.444179
## 13 12-15 $35K-<$50K 12896 16.478616
## 14 12-15 $50K-<$75K 14518 18.551221
## 15 12-15 $75K+ 22825 29.165975
## 16 16+ <$20K 5343 4.666946
## 17 16+ $20K-<$35K 10734 9.375819
## 18 16+ $35K-<$50K 12734 11.122757
## 19 16+ $50K-<$75K 20829 18.193491
## 20 16+ $75K+ 64846 56.640987
ggplot( )+
geom_bar(data=df3_EduIncome,
aes(x = EDU,
y = Percentage,
fill = INCOME),
stat = "identity")+
labs(y = "Percent of respondents (%)",
x = "Years of education") +
scale_fill_manual(values = col5) +
themebar_stacked
ggplot(data=df3_EduIncome, aes(x = EDU,
y = Percentage,
group = INCOME)) +
geom_line(linetype = "dashed",
aes(color = INCOME),
size = 0.5) +
geom_point(aes(color = INCOME),
size = 6) +
scale_color_manual(values = col5) +
labs(y = "Percent of respondents (%)", x = "Year of education") +
theme_point +
guides(color=guide_legend("Salary Bracket"))
GENDER and INCOME
df_GenderIncome <- BRFSS %>%
group_by(GENDER, INCOME) %>%
dplyr::summarise(Count = n())
df_GenderIncome %>%
pander()
| GENDER | INCOME | Count |
|---|---|---|
| Female | <$20K | 24445 |
| Female | $20K-<$35K | 29608 |
| Female | $35K-<$50K | 20421 |
| Female | $50K-<$75K | 23800 |
| Female | $75K+ | 47630 |
| Male | <$20K | 15579 |
| Male | $20K-<$35K | 21938 |
| Male | $35K-<$50K | 18256 |
| Male | $50K-<$75K | 23327 |
| Male | $75K+ | 54568 |
df2_GenderIncome <- ddply(df_GenderIncome,.(INCOME),transform, Percentage = Count*100/sum(Count))
df2_GenderIncome <- df2_GenderIncome %>%
mutate(pos = cumsum(Percentage) - (0.5 * Percentage))
df2_GenderIncome %>%
pander()
| GENDER | INCOME | Count | Percentage | pos |
|---|---|---|---|---|
| Female | <$20K | 24445 | 61.08 | 30.54 |
| Male | <$20K | 15579 | 38.92 | 80.54 |
| Female | $20K-<$35K | 29608 | 57.44 | 128.7 |
| Male | $20K-<$35K | 21938 | 42.56 | 178.7 |
| Female | $35K-<$50K | 20421 | 52.8 | 226.4 |
| Male | $35K-<$50K | 18256 | 47.2 | 276.4 |
| Female | $50K-<$75K | 23800 | 50.5 | 325.3 |
| Male | $50K-<$75K | 23327 | 49.5 | 375.3 |
| Female | $75K+ | 47630 | 46.61 | 423.3 |
| Male | $75K+ | 54568 | 53.39 | 473.3 |
ggplot( )+
geom_bar(data = df2_GenderIncome,
aes(x = INCOME,
y = Percentage,
fill = GENDER),
stat = "identity") +
labs(y = "Percent of respondents (%)",
x = "Salary Bracket") +
scale_fill_manual(values = col2) +
scale_y_continuous(breaks = seq(0, 100, 20)) +
themebar_stacked
It is hard to see the difference in this graph
ggplot(BRFSS, aes(x= GENDER ,
group = INCOME)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.75) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col2) +
labs(y = "Percent of respondents", fill="GENDER") +
facet_grid(~ INCOME) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(data = df2_GenderIncome,
aes(x = INCOME,
y = Percentage,
group = GENDER)) +
geom_line(linetype = "dashed",
aes(color = GENDER),
size = 0.5) +
geom_point(aes(color = GENDER),
size = 6) +
scale_color_manual(values = c("red","blue") ) +
labs(x = "Salary Bracket",
y = "Percent of respondents (%)") +
theme_point +
guides(color=guide_legend("GENDER"))
ggplot(BRFSS, aes(x= INCOME , group = GENDER)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.6) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col5) +
labs(y = "Percent of respondents", fill = "INCOME") +
facet_grid(~ GENDER) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
library(DescTools)
Strong association : AGE, EMPL Moderate to strong : RELP,INCOME Weak to moderate : GENDER, EDU, RACE
Community
df_COMMUN <- BRFSS %>%
group_by(COMMUN) %>%
dplyr::summarise(Count = n()) %>%
ungroup()%>%
arrange(desc(COMMUN))
df_COMMUN
## # A tibble: 3 x 2
## COMMUN Count
## <fct> <int>
## 1 Urban 192173
## 2 Suburban 45058
## 3 Rural 42341
df2_COMMUN <- df_COMMUN %>%
mutate(Percentage = round(Count/sum(Count),4)*100,
lab.pos = cumsum(Percentage)-0.5*Percentage)
df2_COMMUN
## # A tibble: 3 x 4
## COMMUN Count Percentage lab.pos
## <fct> <int> <dbl> <dbl>
## 1 Urban 192173 68.7 34.4
## 2 Suburban 45058 16.1 76.8
## 3 Rural 42341 15.1 92.4
geomtext_pie <- geom_text(aes(y = lab.pos,
label = paste(Percentage,"%", sep = "")),
col = "blue",
size = 5)
ggplot(data = df2_COMMUN,
aes(x = "",
y = Percentage,
fill = COMMUN)) +
geom_bar(stat = "identity") +
coord_polar("y") +
geomtext_pie +
theme_void() +
theme(legend.title = element_text(color = "black", size = 14),
legend.text = element_text(color = "black", size = 14)) +
scale_fill_manual(values = col3)
Xcommun <- c(68.74, 16.12, 15.14)
labels2 <- c("Urban", "Suburban","Rural")
pct2 <- Xcommun/sum(Xcommun)*100
lbls2 <- paste(labels2, pct2) # add percents to labels
lbls2 <- paste(lbls2,"%",sep="") # ad % to labels
pie(Xcommun,labels = lbls2, col= col3)
RENT/OWN
library(dplyr)
library(plyr)
library(pander)
Xownerchip <- c(72.68, 23.55, 3.78)
labels3 <- c("Own", "Rent","Other situation")
pct3 <-Xownerchip /sum(Xownerchip )*100
lbls3 <- paste(labels3, round(pct3,2) )# add percents to labels
lbls3 <- paste(lbls3,"%" ,sep="") # ad % to labels
pie(Xownerchip ,labels = lbls3, col= col3)
Lifestyle (C) Health behaviors
BRFSS %>%
group_by(SMOKESTAT) %>%
dplyr::count() %>%
pander()
| SMOKESTAT | n |
|---|---|
| Daily smoker | 29247 |
| Former smoker | 80542 |
| Never smoker | 158075 |
| Someday smoker | 11708 |
BRFSS$SMOKESTAT <- factor(BRFSS$SMOKESTAT,
levels = c("Daily smoker",
"Someday smoker",
"Former smoker",
"Never smoker"), ordered = TRUE)
df_SMOKESTAT <- BRFSS %>%
group_by(SMOKESTAT) %>%
dplyr::summarise(Count = n()) %>%
mutate(SMOKESTATgroup = factor(SMOKESTAT),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
ggplot(df_SMOKESTAT,
aes(x = SMOKESTAT,
y = Percentage)) +
geom_bar(stat = "identity",
color = "olivedrab4",
fill = "olivedrab4",
width = 0.5) +
scale_y_continuous(breaks = seq(0,60,10)) +
geomtext_bar +
labs(x = "Smoker status",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
C4.EXERCISE
df_EXERpa30 <- BRFSS %>%
group_by(EXERpa30) %>%
dplyr::summarise(Count = n()) %>%
ungroup()%>%
arrange(desc(EXERpa30))
df2_EXERpa30 <- df_EXERpa30 %>%
mutate(Percentage = round(Count/sum(Count),4)*100,
lab.pos = cumsum(Percentage)-0.5*Percentage)
df2_EXERpa30 %>%
pander()
| EXERpa30 | Count | Percentage | lab.pos |
|---|---|---|---|
| Yes | 217106 | 77.66 | 38.83 |
| No | 62466 | 22.34 | 88.83 |
ggplot(data = df2_EXERpa30,
aes(x = "",
y = Percentage,
fill = EXERpa30))+
geom_bar(stat = "identity")+
coord_polar("y") +
geomtext_pie +
theme_void() +
theme(legend.title = element_text(color = "black", size = 14),
legend.text = element_text(color = "black", size = 14)) +
scale_fill_manual(values = c("steelblue","olivedrab4"))
C2.ALCpa30
df_ALCpa30 <- BRFSS %>%
group_by(ALCpa30) %>%
dplyr::summarise(Count = n())
df_ALCpa30 %>%
pander()
| ALCpa30 | Count |
|---|---|
| 0 | 123079 |
| 1-9 | 98975 |
| 10-30 | 57518 |
ggplot(df_ALCpa30,
aes(x = ALCpa30,
y = Count)) +
geom_bar(stat = "identity",
color = "olivedrab4",
fill = "olivedrab4",
width = 0.5) +
geom_text(aes(label=paste0(Count)),
vjust = -0.3,
size = 4,
color= "black",
fontface = "bold") +
labs(x = "# of days where people have at least one drink",
y = "Frequency",
caption = "Data source: BRFSS2018") +
theme_bar
df_ALCpa30 <- BRFSS %>%
group_by(ALCpa30) %>%
dplyr::summarise(Count = n()) %>%
mutate(ALCpa30group = factor(ALCpa30),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
ggplot(df_ALCpa30,
aes(x = ALCpa30,
y = Percentage)) +
geom_bar(stat = "identity",
color = "olivedrab4",
fill = "olivedrab4",
width = 0.45) +
geomtext_bar +
labs(x = "Number of days where people have at least one drink",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
SMOKESTAT : strong assocation with the outcome Maybe we should remove all the data that were collected during the past 30 days
C4. SLEPTIM1
library(ggforce)
Numerical Data Descriptive Statistics
ggplot(data = BRFSS, aes(SLEPTIM1)) +
geom_bar(stat= "count",
fill= "olivedrab4") +
coord_cartesian(clip = "off") +
labs( x = "Sleep duration (hours)",
y = "Frequency",
subtitle = "On average, how many hours of sleep do you get in a 24-hour period?",
caption = "Data source: BRFSS2018") +
scale_y_continuous(limits = c(0, 100000), expand = c(0, 0)) +
scale_x_continuous(limits = c(0, 15), expand = c(0, 0)) +
theme_bar
summary(BRFSS$SLEPTIM1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 6.000 7.000 7.018 8.000 12.000
ggplot(BRFSS,
aes(x = PULMOND,
y = SLEPTIM1)) +
geom_violin(fill = "cornflowerblue") +
geom_boxplot(width = .2,
fill = "orange",
outlier.color = "orange",
outlier.size = 2) +
labs(title = "Sleep duration distribution by PULMOND status")
ggplot(BRFSS,
aes(x = MICHD,
y = SLEPTIM1, fill = MICHD)) +
geom_violin(trim = FALSE,fill = "cornflowerblue") +
geom_boxplot(width = 0.1,
fill = "grey",
outlier.color = "grey",
outlier.size = 2) +
labs(title = "Sleep duration distribution by MICHD status")
D1.GENHLTH
BRFSS$GENHLTH <- factor(BRFSS$GENHLTH,
levels = c("Poor",
"Fair",
"Good",
"Very Good",
"Excellent"), ordered = TRUE)
df_GENHLTH <- BRFSS %>%
group_by(GENHLTH) %>%
dplyr::summarise(Count = n()) %>%
mutate(SMOKESTATgroup = factor(GENHLTH),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
ggplot(df_GENHLTH,
aes(x = GENHLTH,
y = Percentage)) +
geom_bar(stat = "identity",
color = "purple4",
fill = "purple4",
width = 0.6) +
scale_y_continuous(breaks = seq(0,60,10)) +
geomtext_bar +
labs(x = "In general, your health is:",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
ggplot(BRFSS, aes(x = GENHLTH, group = INCOME)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col5) +
labs(y = "Percent of respondents", fill = "INCOME") +
facet_grid(~ INCOME) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
ggplot(BRFSS, aes(x = INCOME , group = GENHLTH)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col5) +
labs(y = "Percent of respondents", fill = "GENHLTH") +
facet_grid(~ GENHLTH) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
Healthy Days (E)— Health Related Quality of Life E1. PHYSHLTH (Number of Days Physical Health Not Good) 3 level not good physical health status: 0 days, 1-9 days, 10-30 days E2. MENTHLTH (Number of Days Mental Health Not Good) 3 level not good mental health status: 0 days, 1-9 days, 10-30 days
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(grid)
p_PHYSHLTH <- ggplot(BRFSS, aes(x = PHYSHLTHpa30)) +
geom_bar(aes(y = (..count..)/sum(..count..)),
fill = "purple4",
width = 0.6) +
scale_y_continuous(labels = percent,
limits = c(0, 0.7))+
geom_text(size = 5,
color= "black",
aes(y = ((..count..)/sum(..count..)),
label = scales::percent((..count..)/sum(..count..))),
stat = "count",
vjust = -0.5) +
labs(x = "Physical Health Not Good",
y = "Percent of Respondents") +
theme_bar
p_MENTHLTH <- ggplot(BRFSS, aes(x = MENTHLTHpa30)) +
geom_bar(aes(y = (..count..)/sum(..count..)),
fill = "purple4",
width = 0.6) +
scale_y_continuous(labels = percent,
limits = c(0, 0.7))+
geom_text(size = 5,
color= "black",
aes(y = ((..count..)/sum(..count..)),
label = scales::percent((..count..)/sum(..count..))),
stat = "count",
vjust = -0.5) +
labs(x = "Mental Health Not Good",
y = "Percent of Respondents") +
theme_bar
grid.arrange(p_PHYSHLTH, p_MENTHLTH, ncol = 2)
HRQOL <- data.frame(
Health = c("Physical","Physical","Physical",
"Mental","Mental","Mental"),
NumberofDays = rep(c("0", "1-9", "10-30"),2),
Percentage = c(63.5, 21.4, 15.1,
66.3, 19.7, 14.0 ))
HRQOL %>%
pander()
| Health | NumberofDays | Percentage |
|---|---|---|
| Physical | 0 | 63.5 |
| Physical | 1-9 | 21.4 |
| Physical | 10-30 | 15.1 |
| Mental | 0 | 66.3 |
| Mental | 1-9 | 19.7 |
| Mental | 10-30 | 14 |
ggplot(data = HRQOL,
aes(x = Health,
y = Percentage,
fill = NumberofDays)) +
geom_bar(stat = "identity",
position = "dodge",
size = 0.25)+
scale_y_continuous(limits = c(0, 70))+
scale_fill_manual(values= col3)+
geom_text(aes(label = paste0(Percentage,"%")),
position = position_dodge(width = 0.9),
vjust= -0.25,
size = 5,
color = "black") +
labs(x = "Health related quality of life",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
ggplot(data = HRQOL,
aes(x = NumberofDays,
y = Percentage,
fill = Health)) +
geom_bar(stat = "identity",
position = "dodge")+
scale_y_continuous(limits = c(0, 70))+
scale_fill_manual(values= col2)+
geom_text(aes(label = paste0(Percentage,"%")),
position = position_dodge(width = 0.9),
vjust = -0.25,
size = 4.5,
color = "black") +
labs(x = "Number of days where health is not good",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
ggplot(BRFSS, aes(x = GENHLTH, group = EMPL)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 5,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col5) +
labs(y = "Percent of respondents", fill = "EMPL") +
facet_grid(~ EMPL) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
ggplot(BRFSS, aes(x = MENTHLTHpa30, group = EMPL )) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.75) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col3) +
labs(y = "Percent of respondents", fill = "EMPL") +
facet_grid(~ EMPL) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
ggplot(BRFSS, aes(x = PHYSHLTHpa30, group = EMPL )) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.75) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col3) +
labs(y = "Percent of respondents", fill = "EMPL") +
facet_grid(~ EMPL) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
ggplot(BRFSS, aes(x = GENHLTH, group = EXERpa30)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.75) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col5) +
labs(y = "Percent of respondents", fill = "EMPL") +
facet_grid(~ EXERpa30) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
ggplot(BRFSS, aes(x = MENTHLTHpa30, group = EXERpa30)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.6) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col3) +
labs(y = "Percent of respondents", fill = "EMPL") +
facet_grid(~ EXERpa30) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
ggplot(BRFSS, aes(x = PHYSHLTHpa30, group = EXERpa30)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.6) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col3) +
labs(y = "Percent of respondents", fill = "EMPL") +
facet_grid(~ EXERpa30) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
Health Care Access (F) F1. HPLN1 (Have any health care coverage) ==> HPLN F2. MEDCOST (Could Not See Doctor Because of Cost) F3. CHECKUP1
df_HLTHPLN <- BRFSS %>%
group_by(HLTHPLN) %>%
dplyr::summarise(Count = n()) %>%
ungroup()%>%
arrange(desc(HLTHPLN))
df2_HLTHPLN <- df_HLTHPLN %>%
mutate(Percentage = round(Count/sum(Count),4)*100,
lab.pos = cumsum(Percentage)-0.5*Percentage)
df2_HLTHPLN %>%
pander()
| HLTHPLN | Count | Percentage | lab.pos |
|---|---|---|---|
| Yes | 259509 | 92.82 | 46.41 |
| No | 20063 | 7.18 | 96.41 |
ggplot(data = df2_HLTHPLN,
aes(x = "",
y = Percentage,
fill = HLTHPLN))+
geom_bar(stat = "identity")+
coord_polar("y") +
geom_text(aes(y = lab.pos,
label = paste(Percentage,"%", sep = "")), col = "white", size = 4) +
theme_void() +
scale_fill_manual(values = c("steelblue","darkgreen")) +
theme(legend.title = element_text(color = "black", size = 14),
legend.text = element_text(color = "black", size = 14))
df_MEDICOST <- BRFSS %>%
group_by(MEDICOST) %>%
dplyr::summarise(Count = n()) %>%
ungroup()%>%
arrange(desc(MEDICOST))
df2_MEDICOST <- df_MEDICOST %>%
mutate(percentage = round(Count/sum(Count),4)*100,
lab.pos = cumsum(percentage)-0.5*percentage)
df2_MEDICOST
## # A tibble: 2 x 4
## MEDICOST Count percentage lab.pos
## <fct> <int> <dbl> <dbl>
## 1 Yes 27236 9.74 4.87
## 2 No 252336 90.3 54.9
ggplot(data = df2_MEDICOST,
aes(x = "",
y = percentage,
fill = MEDICOST))+
geom_bar(stat = "identity")+
coord_polar("y") +
geom_text(aes(y = lab.pos,
label = paste(percentage,"%", sep = "")), col = "white", size = 4) +
theme_void() +
scale_fill_manual(values = c("steelblue","darkgreen"))+
theme(legend.title = element_text(color = "black", size = 14),
legend.text = element_text(color = "black", size = 14))
df_CHECKUP <- BRFSS %>%
group_by(CHECKUP) %>%
dplyr::summarise(Count = n()) %>%
mutate(CHECKUPgroup = factor(CHECKUP),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
ggplot(df_CHECKUP,
aes(x = CHECKUP,
y = Percentage)) +
geom_bar(stat = "identity",
color = "darkgreen",
fill = "darkgreen",
width = 0.6) +
scale_y_continuous(breaks = seq(0,100,20)) +
geomtext_bar +
labs(x = "Last visited a doctor for a routine checkup (Year)",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
BMICAT
BRFSS$BMICAT <- factor(BRFSS$BMICAT,
levels = c("Underweight",
"Normal",
"Overweight",
"Obese"), ordered = TRUE)
df_BMICAT <- BRFSS %>%
group_by(BMICAT) %>%
dplyr::summarise(Count = n()) %>%
mutate(BMICATgroup = factor(BMICAT),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
ggplot(df_BMICAT,
aes(x = BMICAT,
y = Percentage)) +
geom_bar(stat = "identity",
color = "firebrick4",
fill = "firebrick4",
width = 0.6) +
scale_y_continuous(breaks = seq(0,50,10)) +
geomtext_bar +
labs(x = "BMI categories",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
BMICAT and EDUCATION
ggplot(BRFSS, aes(x= BMICAT , group = EDU)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill = "BMICATE") +
facet_grid(~ EDU) +
scale_y_continuous(labels = scales::percent) +
theme_bar
df_EduBMI <- BRFSS %>%
group_by(EDU, BMICAT) %>%
dplyr::summarise(Count = n())
df2_EduBMI <- ddply(df_EduBMI,.(EDU),transform, Percentage = Count*100/sum(Count))
df2_EduBMI
## EDU BMICAT Count Percentage
## 1 <8 Underweight 75 1.632564
## 2 <8 Normal 1065 23.182412
## 3 <8 Overweight 1711 37.244232
## 4 <8 Obese 1743 37.940792
## 5 9-12 Underweight 1449 1.762066
## 6 9-12 Normal 21689 26.375056
## 7 9-12 Overweight 29002 35.268080
## 8 9-12 Obese 30093 36.594798
## 9 12-15 Underweight 1086 1.387700
## 10 12-15 Normal 21402 27.347653
## 11 12-15 Overweight 27732 35.436180
## 12 12-15 Obese 28039 35.828467
## 13 16+ Underweight 1400 1.222857
## 14 16+ Normal 38916 33.991929
## 15 16+ Overweight 42820 37.401953
## 16 16+ Obese 31350 27.383261
ggplot(data = df2_EduBMI,
aes(x = EDU,
y = Percentage,
group = BMICAT)) +
geom_line(linetype = "dashed",
aes(color = BMICAT),
size = 0.5) +
geom_point(aes(color = BMICAT),
size = 6) +
scale_color_manual(values = col4) +
labs(x = "Education year",
y = "Percent of respondents (%)") +
theme_point +
guides(color=guide_legend("Education year"))
ggplot(BRFSS, aes(x = AGE, group = BMICAT)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col7) +
labs(y = "Percent of respondents", fill = "EDU") +
facet_grid(~ BMICAT ) +
scale_y_continuous(labels = scales::percent)+
theme_bar
df_BMIage <- BRFSS %>%
group_by(AGE, BMICAT) %>%
dplyr::summarise(Count = n())
df_BMIage
## # A tibble: 28 x 3
## # Groups: AGE [7]
## AGE BMICAT Count
## <fct> <ord> <int>
## 1 18-29 Underweight 892
## 2 18-29 Normal 12996
## 3 18-29 Overweight 8765
## 4 18-29 Obese 6762
## 5 30-39 Underweight 431
## 6 30-39 Normal 10903
## 7 30-39 Overweight 11740
## 8 30-39 Obese 11376
## 9 40-49 Underweight 350
## 10 40-49 Normal 10107
## # ... with 18 more rows
ggplot(data=df_BMIage, aes(x=AGE,
y = Count,
group=BMICAT)) +
geom_line(linetype="dashed",
aes(color = BMICAT),
size=1) +
geom_point(aes(color = BMICAT),
size= 6) +
scale_color_manual(values= col4) +
scale_y_continuous(breaks = seq(0,50000,10000)) +
labs(x = "Age group",
y = "Frequency",
caption = "Data source: BRFSS2018") +
theme_point
How to calculate proportion by age group?
df2_BMIage <- ddply(df_BMIage,.(AGE),transform, Percentage = Count*100/sum(Count))
df2_BMIage
## AGE BMICAT Count Percentage
## 1 18-29 Underweight 892 3.0324664
## 2 18-29 Normal 12996 44.1815400
## 3 18-29 Overweight 8765 29.7977223
## 4 18-29 Obese 6762 22.9882713
## 5 30-39 Underweight 431 1.2510885
## 6 30-39 Normal 10903 31.6487663
## 7 30-39 Overweight 11740 34.0783745
## 8 30-39 Obese 11376 33.0217707
## 9 40-49 Underweight 350 0.9128847
## 10 40-49 Normal 10107 26.3615023
## 11 40-49 Overweight 13354 34.8304643
## 12 40-49 Obese 14529 37.8951487
## 13 50-59 Underweight 533 1.0008826
## 14 50-59 Normal 13463 25.2812048
## 15 50-59 Overweight 19398 36.4261168
## 16 50-59 Obese 19859 37.2917958
## 17 60-69 Underweight 734 1.1610985
## 18 60-69 Normal 16629 26.3050494
## 19 60-69 Overweight 23827 37.6914072
## 20 60-69 Obese 22026 34.8424450
## 21 70-79 Underweight 603 1.4092736
## 22 70-79 Normal 11924 27.8676264
## 23 70-79 Overweight 17015 39.7658222
## 24 70-79 Obese 13246 30.9572777
## 25 80+ Underweight 467 2.5786858
## 26 80+ Normal 7050 38.9287686
## 27 80+ Overweight 7166 39.5692987
## 28 80+ Obese 3427 18.9232468
ggplot(data=df2_BMIage, aes(x=AGE,
y = Percentage,
group=BMICAT)) +
geom_line(linetype = "dashed",
aes(color = BMICAT),
size = 0.5) +
geom_point(aes(color = BMICAT),
size= 6) +
scale_color_manual(values= col4) +
labs(y = "Percent of respondents (%)", x = "Age range") +
theme_point
**INCOME and BMI
df_BMIincome <- BRFSS %>%
group_by(INCOME, BMICAT) %>%
dplyr::summarise(Count = n())
df2_BMIincome <- ddply(df_BMIincome,.(INCOME),transform, Percentage = Count*100/sum(Count))
df2_BMIincome
## INCOME BMICAT Count Percentage
## 1 <$20K Underweight 1048 2.6184289
## 2 <$20K Normal 11416 28.5228863
## 3 <$20K Overweight 12299 30.7290626
## 4 <$20K Obese 15261 38.1296222
## 5 $20K-<$35K Underweight 955 1.8527141
## 6 $20K-<$35K Normal 14914 28.9333799
## 7 $20K-<$35K Overweight 17685 34.3091607
## 8 $20K-<$35K Obese 17992 34.9047453
## 9 $35K-<$50K Underweight 524 1.3548104
## 10 $35K-<$50K Normal 10930 28.2596892
## 11 $35K-<$50K Overweight 14057 36.3445976
## 12 $35K-<$50K Obese 13166 34.0409029
## 13 $50K-<$75K Underweight 498 1.0567191
## 14 $50K-<$75K Normal 13355 28.3383199
## 15 $50K-<$75K Overweight 17550 37.2397988
## 16 $50K-<$75K Obese 15724 33.3651622
## 17 $75K+ Underweight 985 0.9638153
## 18 $75K+ Normal 32457 31.7589385
## 19 $75K+ Overweight 39674 38.8207206
## 20 $75K+ Obese 29082 28.4565256
ggplot(data=df2_BMIincome, aes(x= INCOME,
y = Percentage,
group=BMICAT)) +
geom_line(linetype="dashed",
aes(color = BMICAT),
size = 0.5) +
geom_point(aes(color =BMICAT),
size= 6) +
scale_color_manual(values= col4) +
labs(y = "Percent of respondents (%)", x = "Income bracket") +
theme_point
Overweight increase with income, from 30% for 20K to 38% for 75K+ Obsese decreses with income, from 36.9 % for 20 K to 28.9 for 75K+ The normal weight is similar for all The underweight decreases from 2.5% to 1 %.
ggplot(data=df2_BMIincome, aes(x= BMICAT,
y = Percentage,
group = INCOME)) +
geom_line(linetype="dashed",
aes(color = INCOME),
size = 0.5) +
geom_point(aes(color = INCOME),
size= 6) +
scale_color_manual(values= col5) +
labs(y = "Percent of respondents (%)", x = "BMI categories") +
theme_point
df3_BMIincome <- ddply(df_BMIincome,.(BMICAT),transform, Percentage = Count*100/sum(Count))
df3_BMIincome
## INCOME BMICAT Count Percentage
## 1 <$20K Underweight 1048 26.13466
## 2 $20K-<$35K Underweight 955 23.81546
## 3 $35K-<$50K Underweight 524 13.06733
## 4 $50K-<$75K Underweight 498 12.41895
## 5 $75K+ Underweight 985 24.56359
## 6 <$20K Normal 11416 13.74230
## 7 $20K-<$35K Normal 14914 17.95310
## 8 $35K-<$50K Normal 10930 13.15726
## 9 $50K-<$75K Normal 13355 16.07642
## 10 $75K+ Normal 32457 39.07093
## 11 <$20K Overweight 12299 12.14536
## 12 $20K-<$35K Overweight 17685 17.46408
## 13 $35K-<$50K Overweight 14057 13.88140
## 14 $50K-<$75K Overweight 17550 17.33077
## 15 $75K+ Overweight 39674 39.17839
## 16 <$20K Obese 15261 16.72897
## 17 $20K-<$35K Obese 17992 19.72266
## 18 $35K-<$50K Obese 13166 14.43245
## 19 $50K-<$75K Obese 15724 17.23650
## 20 $75K+ Obese 29082 31.87942
ggplot(data=df3_BMIincome,
aes(x= BMICAT,
y = Percentage,
group = INCOME)) +
geom_line(linetype = "dashed",
aes(color = INCOME),
size = 0.5) +
geom_point(aes(color = INCOME),
size= 6) +
scale_color_manual(values= col5) +
labs(x = "BMI categories",
y = "Percent of respondents (%)") +
theme_point
ggplot(data = df3_BMIincome,
aes(x = INCOME,
y = Percentage,
group = BMICAT)) +
geom_line(linetype = "dashed",
aes(color = BMICAT),
size = 0.5) +
geom_point(aes(color = BMICAT),
size= 6) +
scale_color_manual(values= col5) +
labs(x = "Income range",
y = "Percent of respondents (%)") +
theme_point
df_INCOMEBMI <- BRFSS %>%
group_by(INCOME, BMICAT) %>%
dplyr::summarise(Count = n())
df_INCOMEBMI
## # A tibble: 20 x 3
## # Groups: INCOME [5]
## INCOME BMICAT Count
## <ord> <ord> <int>
## 1 <$20K Underweight 1048
## 2 <$20K Normal 11416
## 3 <$20K Overweight 12299
## 4 <$20K Obese 15261
## 5 $20K-<$35K Underweight 955
## 6 $20K-<$35K Normal 14914
## 7 $20K-<$35K Overweight 17685
## 8 $20K-<$35K Obese 17992
## 9 $35K-<$50K Underweight 524
## 10 $35K-<$50K Normal 10930
## 11 $35K-<$50K Overweight 14057
## 12 $35K-<$50K Obese 13166
## 13 $50K-<$75K Underweight 498
## 14 $50K-<$75K Normal 13355
## 15 $50K-<$75K Overweight 17550
## 16 $50K-<$75K Obese 15724
## 17 $75K+ Underweight 985
## 18 $75K+ Normal 32457
## 19 $75K+ Overweight 39674
## 20 $75K+ Obese 29082
ggplot(data = df_INCOMEBMI , aes(x = INCOME,
y = Count,
group = BMICAT)) +
geom_line(linetype = "dashed",
aes(color = BMICAT),
size = 0.5) +
geom_point(aes(color = BMICAT),
size= 6) +
scale_color_manual(values = col4) +
scale_y_continuous(breaks = seq(0,20000,5000)) +
labs(x = "Income braket",
y = "Frequency",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 12, face="bold"),
legend.position = "none",
plot.caption = element_text(color = "grey", size = 12, face = "italic")) +
theme(legend.position="top")
by using face_grid (margin = true), we will obtain an grid of total
ggplot(BRFSS, aes(x= BMICAT, group = RACE)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat = "count",
width = 0.6) +
geom_text(size = 4.5,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat = "count",
color = "black",
vjust = -0.5) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents",
x = "BMI categories") +
facet_grid(~ RACE) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
Table is maybe the best option here
df_BMIrace <- BRFSS %>%
group_by(RACE, BMICAT) %>%
dplyr::summarise(Count = n())
df2_BMIrace <- ddply(df_BMIrace,.(RACE),transform, Percentage = Count*100/sum(Count))
df2_BMIrace %>%
pander()
| RACE | BMICAT | Count | Percentage |
|---|---|---|---|
| AIAN | Underweight | 76 | 1.634 |
| AIAN | Normal | 1045 | 22.46 |
| AIAN | Overweight | 1567 | 33.68 |
| AIAN | Obese | 1964 | 42.22 |
| Asian | Underweight | 187 | 3.298 |
| Asian | Normal | 2737 | 48.27 |
| Asian | Overweight | 1962 | 34.6 |
| Asian | Obese | 784 | 13.83 |
| Black | Underweight | 253 | 1.171 |
| Black | Normal | 4577 | 21.19 |
| Black | Overweight | 7269 | 33.65 |
| Black | Obese | 9503 | 43.99 |
| Hispanic | Underweight | 248 | 1.325 |
| Hispanic | Normal | 4871 | 26.02 |
| Hispanic | Overweight | 7073 | 37.79 |
| Hispanic | Obese | 6525 | 34.86 |
| White | Underweight | 3121 | 1.414 |
| White | Normal | 67495 | 30.57 |
| White | Overweight | 80612 | 36.52 |
| White | Obese | 69533 | 31.5 |
| Others | Underweight | 125 | 1.53 |
| Others | Normal | 2347 | 28.73 |
| Others | Overweight | 2782 | 34.05 |
| Others | Obese | 2916 | 35.69 |
Obese : AIAN, Black, Multiracial Obese + Overweight : AsiaN IS THE LEASt UNDERWEIGHT : ASIAN
ggplot(BRFSS, aes(x= GENDER , group = BMICAT)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.75) +
geom_text(size = 4.5,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col2) +
labs(y = "Percent of respondents", fill="GENDER") +
facet_grid(~ BMICAT) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
BRFSS %>%
filter(BMICAT == "Obese") %>%
group_by(GENDER) %>%
dplyr::count()
## # A tibble: 2 x 2
## # Groups: GENDER [2]
## GENDER n
## <fct> <int>
## 1 Female 47853
## 2 Male 43372
BRFSS %>%
filter(BMICAT == "Underweight") %>%
group_by(GENDER) %>%
dplyr::count()
## # A tibble: 2 x 2
## # Groups: GENDER [2]
## GENDER n
## <fct> <int>
## 1 Female 2780
## 2 Male 1230
df_BMIgender<- BRFSS %>%
group_by(GENDER, BMICAT) %>%
dplyr::summarise(Count = n())
df2_BMIgender <- ddply(df_BMIgender,.(GENDER),transform, Percentage = Count*100/sum(Count))
df2_BMIgender %>%
pander()
| GENDER | BMICAT | Count | Percentage |
|---|---|---|---|
| Female | Underweight | 2780 | 1.905 |
| Female | Normal | 50141 | 34.37 |
| Female | Overweight | 45130 | 30.93 |
| Female | Obese | 47853 | 32.8 |
| Male | Underweight | 1230 | 0.9202 |
| Male | Normal | 32931 | 24.64 |
| Male | Overweight | 56135 | 42 |
| Male | Obese | 43372 | 32.45 |
#This is the best graph EVER
ggplot(df2_BMIgender, aes(BMICAT, Percentage, fill = GENDER)) +
geom_bar(position="dodge",
stat="identity")+
scale_fill_manual(values= col2) +
labs(x = "BMI categories",
y = "Percent of Respondents (%)",
caption = "Data source: BRFSS2018") +
geom_text(size = 4.5,
color= "blue",
aes(y = Percentage,
label = paste0(round(Percentage,1),"%")),
stat = "identity",
position = position_dodge2(width = 0.9, preserve = "single"),
vjust= -0.5,
hjust= 0.5) +
themebar_facet
ggplot(BRFSS, aes(x = BMICAT, group = GENDER)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.75) +
geom_text(size = 5,
aes(label = scales::percent(..prop..),
y = ..prop.. ),
stat= "count",
color = "blue",
vjust = 0.5,
hjust = -0.1) +
scale_fill_manual(values= col4) +
labs(x= "Categories of BMI",
y = "Percent of respondents") +
facet_grid(~ GENDER) +
coord_flip() +
scale_y_continuous(labels = scales::percent,limits = c(0,0.5))+
themebar_facet
Amomg male group, 41% of them are overweight while those of female are only 31% 2 % of female are underweight, this number decrease to 1 % for male.
ggplot(BRFSS, aes(x = BMICAT, group = GENHLTH)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.75) +
geom_text(size = 4.5,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="GENDER") +
facet_grid(~ GENHLTH) +
scale_y_continuous(labels = scales::percent)+
theme(axis.text = element_text(size = 12, color = "black", angle = 90),
axis.title = element_text(size = 12, face= "bold"),
plot.title = element_text(color = "black", size = 12, face = "bold.italic"),
plot.caption = element_text(color = "grey", size = 12, face = "italic"),
strip.text = element_text(size = 12, color = "black", face = "bold.italic"),
strip.background = element_rect(fill = "slategray1"),
legend.text = element_text(colour="black", size = 20),
legend.position = "none")
ggplot(BRFSS, aes(x = BMICAT, group = EXERpa30 )) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.75) +
geom_text(size = 4.5,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="GENDER") +
facet_grid(~ EXERpa30) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ORAL HEALTH H1. RMVTETH
BRFSS$RMVTETH <- factor(BRFSS$RMVTETH,
levels= c("0",
"1-5",
">= 6",
"All"), ordered = TRUE)
df_RMVTETH <- BRFSS %>%
group_by(RMVTETH) %>%
dplyr::summarise(Count = n()) %>%
mutate(CHECKUPgroup = factor(RMVTETH),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
ggplot(df_RMVTETH,
aes(x = RMVTETH,
y = Percentage)) +
geom_bar(stat = "identity",
color = "darkgreen",
fill = "darkgreen",
width = 0.6) +
scale_y_continuous(breaks = seq(0,50,10)) +
geomtext_bar +
labs(x = "Number of teeth that have been removed",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
ggplot(BRFSS, aes(x= RMVTETH , group = AGE)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="IRMVTETH") +
facet_grid(~ AGE) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
df_RMVTETH <- BRFSS %>%
group_by(RMVTETH) %>%
dplyr::summarise(Count = n()) %>%
mutate(Rmvteth = factor(RMVTETH),
Ratio = Count/sum(Count),
label = percent(Ratio %>% round(4)))
df_RMVTETH %>%
pander()
| RMVTETH | Count | Rmvteth | Ratio | label |
|---|---|---|---|---|
| 0 | 146320 | 0 | 0.5234 | 52.3% |
| 1-5 | 83126 | 1-5 | 0.2973 | 29.7% |
| >= 6 | 32768 | >= 6 | 0.1172 | 11.7% |
| All | 17358 | All | 0.06209 | 6.2% |
Age in function of of teeth removed
df_RmvtethAge <- BRFSS %>%
group_by(AGE, RMVTETH) %>%
dplyr::summarise(Count = n())
df2_RmvtethAge <- ddply(df_RmvtethAge, .(RMVTETH), transform, Percentage = Count*100/sum(Count))
df2_RmvtethAge
## AGE RMVTETH Count Percentage
## 1 18-29 0 25312 17.2990705
## 2 30-39 0 24595 16.8090487
## 3 40-49 0 24450 16.7099508
## 4 50-59 0 27550 18.8285949
## 5 60-69 0 26068 17.8157463
## 6 70-79 0 13598 9.2933297
## 7 80+ 0 4747 3.2442592
## 8 18-29 1-5 3667 4.4113755
## 9 30-39 1-5 8150 9.8043933
## 10 40-49 1-5 10597 12.7481173
## 11 50-59 1-5 16824 20.2391550
## 12 60-69 1-5 22288 26.8123090
## 13 70-79 1-5 15391 18.5152660
## 14 80+ 1-5 6209 7.4693838
## 15 18-29 >= 6 358 1.0925293
## 16 30-39 >= 6 1322 4.0344238
## 17 40-49 >= 6 2492 7.6049805
## 18 50-59 >= 6 6100 18.6157227
## 19 60-69 >= 6 9718 29.6569824
## 20 70-79 >= 6 8527 26.0223389
## 21 80+ >= 6 4251 12.9730225
## 22 18-29 All 78 0.4493605
## 23 30-39 All 383 2.2064754
## 24 40-49 All 801 4.6145869
## 25 50-59 All 2779 16.0099090
## 26 60-69 All 5142 29.6232285
## 27 70-79 All 5272 30.3721627
## 28 80+ All 2903 16.7242770
col7 <- c("#0072BB", "#FF4C3B","#FFD034","#51A39D","#06425C", "#C6C8CA","#814374")
ggplot( )+
geom_bar(data = df2_RmvtethAge,
aes(x = RMVTETH,
y = Percentage,
fill = AGE),
stat = "identity")+
labs(y = "Percent of respondents (%)",
x = "Number of teeth removed") +
scale_fill_manual(values = col7) +
scale_y_continuous(breaks = seq(0, 100, 20)) +
themebar_stacked
df2_AgeRmvteth <- ddply(df_RmvtethAge ,.(AGE),transform, Percentage = Count*100/sum(Count))
df2_AgeRmvteth
## AGE RMVTETH Count Percentage
## 1 18-29 0 25312 86.0513344
## 2 18-29 1-5 3667 12.4664287
## 3 18-29 >= 6 358 1.2170661
## 4 18-29 All 78 0.2651708
## 5 30-39 0 24595 71.3933237
## 6 30-39 1-5 8150 23.6574746
## 7 30-39 >= 6 1322 3.8374456
## 8 30-39 All 383 1.1117562
## 9 40-49 0 24450 63.7715180
## 10 40-49 1-5 10597 27.6395409
## 11 40-49 >= 6 2492 6.4997392
## 12 40-49 All 801 2.0892019
## 13 50-59 0 27550 51.7341746
## 14 50-59 1-5 16824 31.5925863
## 15 50-59 >= 6 6100 11.4547537
## 16 50-59 All 2779 5.2184853
## 17 60-69 0 26068 41.2363958
## 18 60-69 1-5 22288 35.2568970
## 19 60-69 >= 6 9718 15.3726905
## 20 60-69 All 5142 8.1340167
## 21 70-79 0 13598 31.7799383
## 22 70-79 1-5 15391 35.9703655
## 23 70-79 >= 6 8527 19.9284846
## 24 70-79 All 5272 12.3212116
## 25 80+ 0 4747 26.2120375
## 26 80+ 1-5 6209 34.2849255
## 27 80+ >= 6 4251 23.4732192
## 28 80+ All 2903 16.0298178
RMVTETH and AGE
ggplot( )+
geom_bar(data = df2_AgeRmvteth,
aes(x = AGE,
y = Percentage,
fill = RMVTETH),
stat = "identity")+
labs(y = "Percent of respondents (%)",
x = "Age group") +
scale_fill_manual(values = col4) +
scale_y_continuous(breaks = seq(0, 100, 20)) +
themebar_stacked
RMVTETH and EDU
ggplot(BRFSS, aes(x= RMVTETH , group = EDU)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="IRMVTETH") +
facet_grid(~ EDU) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(BRFSS, aes(x= RMVTETH , group = EMPL)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="IRMVTETH") +
facet_grid(~ EMPL) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(BRFSS, aes(x= RMVTETH , group = INCOME)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="IRMVTETH") +
facet_grid(~ INCOME) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(BRFSS, aes(x= RMVTETH , group = EXERpa30)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="RMVTETH") +
facet_grid(~ EXERpa30) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(BRFSS, aes(x= RMVTETH , group = SMOKESTAT)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="IRMVTETH") +
facet_grid(~ SMOKESTAT) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(data = df2_AgeRmvteth, aes(x = AGE,
y = Percentage,
group = RMVTETH)) +
geom_line(linetype = "dashed",
aes(color = RMVTETH),
size = 0.5) +
geom_point(aes(color = RMVTETH),
size = 6) +
scale_color_manual(values = col4) +
labs(y = "Percent of respondents (%)",
x = "Age group") +
theme_point +
guides(color=guide_legend("Nb of removed teeth"))
H1.LASTDENTV
df_LASTDENTV <- BRFSS %>%
group_by(LASTDENTV) %>%
dplyr::summarise(Count = n()) %>%
mutate(LASTDENTVgroup = factor(LASTDENTV),
Percentage = round(Count*100/sum(Count),2),
label = percent(Percentage/100))
ggplot(df_LASTDENTV,
aes(x = LASTDENTV,
y = Percentage)) +
geom_bar(stat = "identity",
color = "darkgreen",
fill = "darkgreen",
width = 0.6) +
scale_y_continuous(breaks = seq(0,70,10)) +
geomtext_bar +
labs(x = "Last time visited dentist",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
df_RmvtethLastdentv <- BRFSS %>%
group_by(LASTDENTV, RMVTETH) %>%
dplyr::summarise(Count = n())
df_RmvtethLastdentv
## # A tibble: 16 x 3
## # Groups: LASTDENTV [4]
## LASTDENTV RMVTETH Count
## <fct> <ord> <int>
## 1 <1 0 110367
## 2 <1 1-5 60837
## 3 <1 >= 6 19637
## 4 <1 All 4618
## 5 1-<2 0 13766
## 6 1-<2 1-5 9053
## 7 1-<2 >= 6 4292
## 8 1-<2 All 2104
## 9 2-<5 0 10687
## 10 2-<5 1-5 7089
## 11 2-<5 >= 6 3942
## 12 2-<5 All 2771
## 13 5+ 0 11500
## 14 5+ 1-5 6147
## 15 5+ >= 6 4897
## 16 5+ All 7865
df2_RmvtethLastdentv <- ddply(df_RmvtethLastdentv,.(LASTDENTV),transform, Percentage = Count*100/sum(Count))
df2_RmvtethLastdentv
## LASTDENTV RMVTETH Count Percentage
## 1 <1 0 110367 56.465550
## 2 <1 1-5 60837 31.125198
## 3 <1 >= 6 19637 10.046608
## 4 <1 All 4618 2.362644
## 5 1-<2 0 13766 47.119630
## 6 1-<2 1-5 9053 30.987506
## 7 1-<2 >= 6 4292 14.691083
## 8 1-<2 All 2104 7.201780
## 9 2-<5 0 10687 43.640002
## 10 2-<5 1-5 7089 28.947691
## 11 2-<5 >= 6 3942 16.097023
## 12 2-<5 All 2771 11.315284
## 13 5+ 0 11500 37.817751
## 14 5+ 1-5 6147 20.214410
## 15 5+ >= 6 4897 16.103785
## 16 5+ All 7865 25.864053
ggplot( )+
geom_bar(data = df2_RmvtethLastdentv,
aes(x = LASTDENTV,
y = Percentage,
fill = RMVTETH),
stat = "identity")+
labs(y = "Percent of respondents (%)",
x = "Last time visited dentist") +
scale_fill_manual(values = col4) +
scale_y_continuous(breaks = seq(0, 100, 10)) +
themebar_stacked
ggplot(data = df2_RmvtethLastdentv, aes(x = LASTDENTV,
y = Percentage,
group = RMVTETH)) +
geom_line(linetype = "dashed",
aes(color = RMVTETH),
size = 0.5) +
geom_point(aes(color = RMVTETH),
size = 6) +
scale_color_manual(values = col4) +
labs(y = "Percent of respondents (%)", x = "Last time visited dental clinic") +
theme_point +
guides(color=guide_legend("Nb of removed teeth"))
Visiting dentist and income
ggplot(BRFSS, aes(x= LASTDENTV , group = INCOME)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="IRMVTETH") +
facet_grid(~ INCOME) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(BRFSS, aes(x= LASTDENTV, group = EXERpa30)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="IRMVTETH") +
facet_grid(~ EXERpa30) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(BRFSS, aes(x= LASTDENTV, group = HLTHPLN)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="RMVTETH") +
facet_grid(~ HLTHPLN) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(BRFSS, aes(x= LASTDENTV, group = MEDICOST)) + # teeth removed by age
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill="RMVTETH") +
facet_grid(~ MEDICOST) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
df_IncomeLastdentv <- BRFSS %>%
group_by(LASTDENTV, INCOME) %>%
dplyr::summarise(Count = n())
df2_IncomeLastdentv <- ddply(df_IncomeLastdentv,.(LASTDENTV),transform, Percentage = Count*100/sum(Count))
df2_IncomeLastdentv
## LASTDENTV INCOME Count Percentage
## 1 <1 <$20K 18372 9.399414
## 2 <1 $20K-<$35K 29558 15.122353
## 3 <1 $35K-<$50K 26273 13.441694
## 4 <1 $50K-<$75K 35474 18.149075
## 5 <1 $75K+ 85782 43.887465
## 6 1-<2 <$20K 5811 19.890467
## 7 1-<2 $20K-<$35K 6872 23.522163
## 8 1-<2 $35K-<$50K 4488 15.361972
## 9 1-<2 $50K-<$75K 4558 15.601575
## 10 1-<2 $75K+ 7486 25.623823
## 11 2-<5 <$20K 5836 23.831108
## 12 2-<5 $20K-<$35K 6277 25.631916
## 13 2-<5 $35K-<$50K 3705 15.129242
## 14 2-<5 $50K-<$75K 3621 14.786231
## 15 2-<5 $75K+ 5050 20.621504
## 16 5+ <$20K 10005 32.901444
## 17 5+ $20K-<$35K 8839 29.067053
## 18 5+ $35K-<$50K 4211 13.847874
## 19 5+ $50K-<$75K 3474 11.424249
## 20 5+ $75K+ 3880 12.759380
ggplot( )+
geom_bar(data = df2_IncomeLastdentv,
aes(x = LASTDENTV,
y = Percentage,
fill = INCOME),
stat = "identity")+
labs(y = "Percent of respondents (%)",
x = "Last time visited dentist") +
scale_fill_manual(values = col5) +
scale_y_continuous(breaks = seq(0, 100, 10)) +
themebar_stacked
df3_IncomeLastdentv <- ddply(df_IncomeLastdentv,.(INCOME),transform, Percentage = Count*100/sum(Count))
df3_IncomeLastdentv
## LASTDENTV INCOME Count Percentage
## 1 <1 <$20K 18372 45.902459
## 2 1-<2 <$20K 5811 14.518789
## 3 2-<5 <$20K 5836 14.581251
## 4 5+ <$20K 10005 24.997501
## 5 <1 $20K-<$35K 29558 57.342956
## 6 1-<2 $20K-<$35K 6872 13.331781
## 7 2-<5 $20K-<$35K 6277 12.177473
## 8 5+ $20K-<$35K 8839 17.147790
## 9 <1 $35K-<$50K 26273 67.929260
## 10 1-<2 $35K-<$50K 4488 11.603796
## 11 2-<5 $35K-<$50K 3705 9.579337
## 12 5+ $35K-<$50K 4211 10.887608
## 13 <1 $50K-<$75K 35474 75.273198
## 14 1-<2 $50K-<$75K 4558 9.671738
## 15 2-<5 $50K-<$75K 3621 7.683494
## 16 5+ $50K-<$75K 3474 7.371570
## 17 <1 $75K+ 85782 83.937063
## 18 1-<2 $75K+ 7486 7.324997
## 19 2-<5 $75K+ 5050 4.941388
## 20 5+ $75K+ 3880 3.796552
ggplot( )+
geom_bar(data = df3_IncomeLastdentv,
aes(x = INCOME ,
y = Percentage,
fill = LASTDENTV),
stat = "identity")+
labs(y = "Percent of respondents (%)",
x = "Income bracket") +
scale_fill_manual(values = col4) +
scale_y_continuous(breaks = seq(0, 100, 10)) +
themebar_stacked
ggplot(data = df3_IncomeLastdentv,
aes(x = INCOME,
y = Percentage,
group = LASTDENTV)) +
geom_line(linetype = "dashed",
aes(color = LASTDENTV),
size = 0.5) +
geom_point(aes(color = LASTDENTV),
size = 6) +
scale_color_manual(values = col5) +
labs(x = "Income range",
y = "Percent of respondents (%)") +
theme_point +
guides(color=guide_legend("Last time visited dentist"))
Last than 1 year — more frequent than from 1 to 2 year higher income - visit dentist more
df_LASTDENTVRACE <- BRFSS %>%
select(LASTDENTV, RACE) %>%
filter(RACE == "White" | RACE == "Black"| RACE == "Hispanic" | RACE == "Asian") %>%
group_by(LASTDENTV, RACE) %>%
dplyr::summarise(Count = n())
df2_LASTDENTVRACE <- ddply(df_LASTDENTVRACE,.(RACE),transform, Percentage = round(Count*100/sum(Count),2))
df2_LASTDENTVRACE
## LASTDENTV RACE Count Percentage
## 1 <1 Asian 4114 72.56
## 2 1-<2 Asian 661 11.66
## 3 2-<5 Asian 468 8.25
## 4 5+ Asian 427 7.53
## 5 <1 Black 13113 60.70
## 6 1-<2 Black 3242 15.01
## 7 2-<5 Black 2509 11.61
## 8 5+ Black 2738 12.67
## 9 <1 Hispanic 11553 61.72
## 10 1-<2 Hispanic 2741 14.64
## 11 2-<5 Hispanic 2176 11.63
## 12 5+ Hispanic 2247 12.01
## 13 <1 White 158845 71.95
## 14 1-<2 White 20871 9.45
## 15 2-<5 White 17920 8.12
## 16 5+ White 23125 10.48
ggplot(df2_LASTDENTVRACE,
aes(x = LASTDENTV,
y = Percentage,
fill = LASTDENTV)) +
geom_bar(stat = "identity",
width = 0.75) +
scale_fill_manual(values= c("blue", "cyan4", "cyan3","cyan2", "red")) +
scale_y_continuous(breaks = seq(0,100,20)) +
geomtext_bar +
facet_wrap(~ RACE, nrow = 2) +
labs(x = "Last time visited dentist (Year)",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
G. CHRONIC HEALTH CONDITIONS
geombar_CHC <- geom_bar(aes(y = (..count..)/sum(..count..)),
fill = "firebrick4",
alpha = 0.9,
width = 0.5)
geomtext_CHC <- geom_text(size = 4,
color= "blue",
aes(y = ((..count..)/sum(..count..)),
label = scales::percent((..count..)/sum(..count..))),
stat = "count",
vjust = -0.5)
G1.ARTHRITIS
ggplot(BRFSS, aes(x = ARTHRITIS)) +
geombar_CHC +
geomtext_CHC +
scale_y_continuous(labels = percent) +
labs(x = "Arthritis?",
y = "Percent of Respondents",
caption = "Data source: BRFSS2018") +
theme_bar
G2. PULMOND chronic obstructive pulmonary disease, C.O.P.D., emphysema or chronic bronchitis?
ggplot(BRFSS, aes(x = PULMOND)) +
geombar_CHC +
geomtext_CHC +
scale_y_continuous(labels = percent)+
labs(x = "PULMOND?",
y = "Percent of Respondents",
caption = "Data source: BRFSS2018") +
theme_bar
G3.ASTHMA
ggplot(BRFSS, aes(x = ASTHMA)) +
geombar_CHC +
geomtext_CHC +
scale_y_continuous(labels = percent)+
labs(x = "ASTHMA?",
y = "Percent of Respondents",
caption = "Data source: BRFSS2018") +
theme_bar
G4.DEPRESSION
ggplot(BRFSS, aes(x = DEPRESSION)) +
geombar_CHC +
geomtext_CHC +
scale_y_continuous(labels = percent)+
labs(x = "DEPRESSION?",
y = "Percent of Respondents",
caption = "Data source: BRFSS2018") +
theme_bar
G5.KIDNEY
ggplot(BRFSS, aes(x = KIDNEY)) +
geombar_CHC +
geomtext_CHC +
scale_y_continuous(labels = percent)+
labs(x = "DEPRESSION?",
y = "Percent of Respondents",
caption = "Data source: BRFSS2018") +
theme_bar
G6. DIABETE
ggplot(BRFSS, aes(x = DIABETE)) +
geombar_CHC +
geomtext_CHC +
scale_y_continuous(labels = percent)+
labs(x = "DEPRESSION?",
y = "Percent of Respondents",
caption = "Data source: BRFSS2018") +
theme_bar
DIABETE and BMI and EXERCISE
ggplot(BRFSS, aes(x = DIABETE, group = BMICAT)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.6) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col2) +
labs(y = "Percent of respondents", fill = "EDIABETE") +
facet_grid(~ BMICAT) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
ggplot(BRFSS, aes(x = BMICAT, group = DIABETE)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat = "count",
width = 0.6) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat = "count",
color = "black",
vjust = -.25) +
scale_fill_manual(values= col4) +
labs(y = "Percent of respondents", fill = "BMICAT") +
facet_grid(~ DIABETE) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
library(ggpubr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
## The following object is masked from 'package:ggimage':
##
## theme_transparent
theme_boxplot <- theme(text = element_text(size = 12),
axis.text = element_text(size = 12),
axis.title = element_text(size = 12,face="bold"),
plot.caption = element_text(color = "grey",size = 12, face = "italic"),
strip.text = element_text(size = 12, color = "blue", face = "bold.italic"),
strip.background = element_rect(fill = "slategray1"),
legend.position = "none")
ggboxplot(BRFSS,
x = "DIABETE",
y = "SLEPTIM1",
fill = "DIABETE",
ylab = "Sleep duration (hours)",
xlab = "DIABETE status") +
theme_boxplot
# Compute the analysis of variance
diabete.aov <- aov(SLEPTIM1 ~ DIABETE, data = BRFSS)
# Summary of the analysis
summary(diabete.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## DIABETE 1 1 0.5602 0.333 0.564
## Residuals 279570 469878 1.6807
As the p-value is higher than the significance level 0.05, we can conclude that there are NO significant differences between the groups
TukeyHSD(diabete.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SLEPTIM1 ~ DIABETE, data = BRFSS)
##
## $DIABETE
## diff lwr upr p adj
## Yes-No 0.004077576 -0.009764728 0.01791988 0.5637007
# 1. Homogeneity of variances
plot(diabete.aov, 1)
In the plot above, there is no evident relationships between residuals and fitted values (the mean of each groups), which is good. So, we can assume the homogeneity of variances.
plot(diabete.aov, 2)
G7. CANCER
ggplot(BRFSS, aes(x = CANCER)) +
geombar_CHC +
geomtext_CHC +
scale_y_continuous(labels = percent)+
labs(x = "DEPRESSION?",
y = "Percent of Respondents",
caption = "Data source: BRFSS2018") +
theme_bar
G7. STROKE
ggplot(BRFSS, aes(x = STROKE)) +
geombar_CHC +
geomtext_CHC +
scale_y_continuous(labels = percent)+
labs(x = "STROKE?",
y = "Percent of Respondents",
caption = "Data source: BRFSS2018") +
theme_bar
ChronicHC <- data.frame(
CHC = c("ARTHRITIS",
'ASTHMA',
"CANCER",
"DEPRESSION",
"DIABETE",
"KIDNEY",
"PLUMOND",
"STROKE",
"MICHD"),
Percentage = c(33.40, 13.82, 16.97, 18.81, 14.02, 3.59, 7.95, 3.94, 8.80 ))
ChronicHC
## CHC Percentage
## 1 ARTHRITIS 33.40
## 2 ASTHMA 13.82
## 3 CANCER 16.97
## 4 DEPRESSION 18.81
## 5 DIABETE 14.02
## 6 KIDNEY 3.59
## 7 PLUMOND 7.95
## 8 STROKE 3.94
## 9 MICHD 8.80
col8 <- c("#0072BB", "#FF4C3B","#FFD034","#51A39D","#06425C", "#C6C8CA","#814374", "red", "salmon")
ggplot(data = ChronicHC, aes(x= reorder(CHC, -Percentage),
y = Percentage,
fill = as.factor(Percentage))) +
geom_bar(stat = "identity",
width = 0.60) +
geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.25,
size = 4.5,
color = "black") +
scale_fill_manual(values = col8) +
theme(legend.position="none") +
labs(x = "Chronic health condition",
y = "Percent of Respondents (%)",
caption = "Data source: BRFSS2018")+
theme_bar
MICHD*
ggplot(BRFSS, aes(x = MICHD)) +
geombar_CHC +
geomtext_CHC +
scale_y_continuous(labels = percent)+
labs(x = "DEPRESSION?",
y = "Percent of Respondents",
caption = "Data source: BRFSS2018") +
theme_bar
df_CHC <- data.frame(
ChronicHLTHcond = c("CANCER","CANCER",
"DIABETE","DIABETE",
"KIDNEY","KIDNEY",
"DEPRESSION","DEPRESSION",
"ASTHMA","ASTHMA",
"PULMOND","PULMOND",
"ARTHRITIS","ARTHRITIS",
"STROKE","STROKE",
"MICHD","MICHD"),
Status = rep(c("NO", "YES"),9),
Percentage = c(83, 17,
86, 14,
96, 4,
81, 19,
86, 14,
92, 8,
67, 33,
96, 4,
91, 9))
df_CHC %>%
pander()
| ChronicHLTHcond | Status | Percentage |
|---|---|---|
| CANCER | NO | 83 |
| CANCER | YES | 17 |
| DIABETE | NO | 86 |
| DIABETE | YES | 14 |
| KIDNEY | NO | 96 |
| KIDNEY | YES | 4 |
| DEPRESSION | NO | 81 |
| DEPRESSION | YES | 19 |
| ASTHMA | NO | 86 |
| ASTHMA | YES | 14 |
| PULMOND | NO | 92 |
| PULMOND | YES | 8 |
| ARTHRITIS | NO | 67 |
| ARTHRITIS | YES | 33 |
| STROKE | NO | 96 |
| STROKE | YES | 4 |
| MICHD | NO | 91 |
| MICHD | YES | 9 |
df2_CHC <- df_CHC %>%
mutate(lab_ypos = cumsum(Percentage/10) - 0.5* Percentage/10)
df2_CHC
## ChronicHLTHcond Status Percentage lab_ypos
## 1 CANCER NO 83 4.15
## 2 CANCER YES 17 9.15
## 3 DIABETE NO 86 14.30
## 4 DIABETE YES 14 19.30
## 5 KIDNEY NO 96 24.80
## 6 KIDNEY YES 4 29.80
## 7 DEPRESSION NO 81 34.05
## 8 DEPRESSION YES 19 39.05
## 9 ASTHMA NO 86 44.30
## 10 ASTHMA YES 14 49.30
## 11 PULMOND NO 92 54.60
## 12 PULMOND YES 8 59.60
## 13 ARTHRITIS NO 67 63.35
## 14 ARTHRITIS YES 33 68.35
## 15 STROKE NO 96 74.80
## 16 STROKE YES 4 79.80
## 17 MICHD NO 91 84.55
## 18 MICHD YES 9 89.55
ggplot(df2_CHC,
aes(x = ChronicHLTHcond,
y = Percentage,
fill = Status)) +
geom_bar(position = "dodge",
stat = "identity")+
geom_text(aes(label = paste0(Percentage,"%")),
position = position_dodge(width=0.9),
vjust = -0.25,
size = 5,
color = "black") +
scale_fill_manual(values=c("#5F9EA0", "#E1B378")) +
labs(x = "Chronic Healthe Condition",
y = "Percent of Respondents (%)",
caption = "Data source: BRFSS2018") +
theme_bar
BRFSS %>%
group_by(CANCER, KIDNEY, ASTHMA, ARTHRITIS, PULMOND, DEPRESSION, DIABETE, STROKE, MICHD) %>%
dplyr::count() %>%
head(20)
## # A tibble: 20 x 10
## # Groups: CANCER, KIDNEY, ASTHMA, ARTHRITIS, PULMOND, DEPRESSION, DIABETE,
## # STROKE, MICHD [20]
## CANCER KIDNEY ASTHMA ARTHRITIS PULMOND DEPRESSION DIABETE STROKE MICHD n
## <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <int>
## 1 No No No No No No No No No 105809
## 2 No No No No No No No No Yes 2988
## 3 No No No No No No No Yes No 1080
## 4 No No No No No No No Yes Yes 340
## 5 No No No No No No Yes No No 8738
## 6 No No No No No No Yes No Yes 1000
## 7 No No No No No No Yes Yes No 350
## 8 No No No No No No Yes Yes Yes 161
## 9 No No No No No Yes No No No 15038
## 10 No No No No No Yes No No Yes 445
## 11 No No No No No Yes No Yes No 247
## 12 No No No No No Yes No Yes Yes 64
## 13 No No No No No Yes Yes No No 1490
## 14 No No No No No Yes Yes No Yes 170
## 15 No No No No No Yes Yes Yes No 84
## 16 No No No No No Yes Yes Yes Yes 51
## 17 No No No No Yes No No No No 1887
## 18 No No No No Yes No No No Yes 383
## 19 No No No No Yes No No Yes No 71
## 20 No No No No Yes No No Yes Yes 55
106927 people are CHC free 279966 - 106927 = 173039 (61.8%) people have at least one chronic health condition 43 people have all the CHCs (8)
df3_CHC <- BRFSS %>%
mutate(Cancer = case_when(CANCER == "Yes" ~ 1,
CANCER == "No" ~ 0),
Kidney = case_when(KIDNEY == "Yes" ~ 1,
KIDNEY == "No" ~ 0),
Asthma = case_when(ASTHMA == "Yes" ~ 1,
ASTHMA == "No" ~ 0),
Arthritis = case_when(ARTHRITIS == "Yes" ~ 1,
ARTHRITIS == "No" ~ 0),
Pulmond = case_when(PULMOND == "Yes" ~ 1,
PULMOND == "No" ~ 0),
Depression = case_when(DEPRESSION == "Yes" ~ 1,
DEPRESSION == "No" ~ 0),
Diabete = case_when(DIABETE == "Yes" ~ 1,
DIABETE == "No" ~ 0),
Stroke = case_when(STROKE == "Yes" ~ 1,
STROKE == "No" ~ 0),
Michd = case_when(MICHD == "Yes" ~ 1,
MICHD == "No" ~ 0),
)
Take only column with numeric values
df4_CHC <- df3_CHC %>%
select(Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete, Stroke,Michd)
df5_CHC <- df4_CHC %>%
group_by(Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete, Stroke, Michd) %>%
dplyr::count(name = "subtotal_case")
head(df5_CHC,30)
## # A tibble: 30 x 10
## # Groups: Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete,
## # Stroke, Michd [30]
## Cancer Kidney Asthma Arthritis Pulmond Depression Diabete Stroke Michd
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 1
## 3 0 0 0 0 0 0 0 1 0
## 4 0 0 0 0 0 0 0 1 1
## 5 0 0 0 0 0 0 1 0 0
## 6 0 0 0 0 0 0 1 0 1
## 7 0 0 0 0 0 0 1 1 0
## 8 0 0 0 0 0 0 1 1 1
## 9 0 0 0 0 0 1 0 0 0
## 10 0 0 0 0 0 1 0 0 1
## # ... with 20 more rows, and 1 more variable: subtotal_case <int>
tail(df5_CHC)
## # A tibble: 6 x 10
## # Groups: Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete,
## # Stroke, Michd [6]
## Cancer Kidney Asthma Arthritis Pulmond Depression Diabete Stroke Michd
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 1 1 1 1 0 1 0
## 2 1 1 1 1 1 1 0 1 1
## 3 1 1 1 1 1 1 1 0 0
## 4 1 1 1 1 1 1 1 0 1
## 5 1 1 1 1 1 1 1 1 0
## 6 1 1 1 1 1 1 1 1 1
## # ... with 1 more variable: subtotal_case <int>
df5_CHC$NbofCHC <- rowSums(df5_CHC[,c(1:9)])
df6_CHC <- df5_CHC
df6_CHC
## # A tibble: 507 x 11
## # Groups: Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete,
## # Stroke, Michd [507]
## Cancer Kidney Asthma Arthritis Pulmond Depression Diabete Stroke Michd
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 1
## 3 0 0 0 0 0 0 0 1 0
## 4 0 0 0 0 0 0 0 1 1
## 5 0 0 0 0 0 0 1 0 0
## 6 0 0 0 0 0 0 1 0 1
## 7 0 0 0 0 0 0 1 1 0
## 8 0 0 0 0 0 0 1 1 1
## 9 0 0 0 0 0 1 0 0 0
## 10 0 0 0 0 0 1 0 0 1
## # ... with 497 more rows, and 2 more variables: subtotal_case <int>,
## # NbofCHC <dbl>
All the people living with at one chronic health condition
df6_CHC %>%
filter(NbofCHC == 1)
## # A tibble: 9 x 11
## # Groups: Cancer, Kidney, Asthma, Arthritis, Pulmond, Depression, Diabete,
## # Stroke, Michd [9]
## Cancer Kidney Asthma Arthritis Pulmond Depression Diabete Stroke Michd
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0 0 0 0 0 1
## 2 0 0 0 0 0 0 0 1 0
## 3 0 0 0 0 0 0 1 0 0
## 4 0 0 0 0 0 1 0 0 0
## 5 0 0 0 0 1 0 0 0 0
## 6 0 0 0 1 0 0 0 0 0
## 7 0 0 1 0 0 0 0 0 0
## 8 0 1 0 0 0 0 0 0 0
## 9 1 0 0 0 0 0 0 0 0
## # ... with 2 more variables: subtotal_case <int>, NbofCHC <dbl>
df6_CHC %>%
group_by(NbofCHC) %>%
dplyr::summarise(totalcase = sum(subtotal_case))
## # A tibble: 10 x 2
## NbofCHC totalcase
## <dbl> <int>
## 1 0 105809
## 2 1 81744
## 3 2 48701
## 4 3 24441
## 5 4 11351
## 6 5 4861
## 7 6 1886
## 8 7 617
## 9 8 135
## 10 9 27
df7_CHC <- df6_CHC %>%
group_by(NbofCHC) %>%
dplyr::summarise(Total_case = sum(subtotal_case))
df7_CHC
## # A tibble: 10 x 2
## NbofCHC Total_case
## <dbl> <int>
## 1 0 105809
## 2 1 81744
## 3 2 48701
## 4 3 24441
## 5 4 11351
## 6 5 4861
## 7 6 1886
## 8 7 617
## 9 8 135
## 10 9 27
ggplot(data = df7_CHC,
aes(x = NbofCHC,
y = Total_case,
fill = Total_case)) +
geom_bar(stat = "identity") +
scale_fill_gradient(low = "darkred", high = "yellow")+
geom_text(aes(label = Total_case),
vjust = -0.5,
color = "darkblue",
size = 4) +
theme_minimal() +
scale_y_continuous(limits = c(0,120000),
breaks = seq(0,120000, by = 20000))+
scale_x_continuous(breaks = seq(0,9,by = 1))+
labs(x = "Number of chronic health conditions",
y = "Frequency",
caption = "Data source: BRFSS2018") +
theme_bar
df8_CHC <- df7_CHC %>%
mutate(Percentage = round(Total_case*100/sum(Total_case),2))
df8_CHC
## # A tibble: 10 x 3
## NbofCHC Total_case Percentage
## <dbl> <int> <dbl>
## 1 0 105809 37.8
## 2 1 81744 29.2
## 3 2 48701 17.4
## 4 3 24441 8.74
## 5 4 11351 4.06
## 6 5 4861 1.74
## 7 6 1886 0.67
## 8 7 617 0.22
## 9 8 135 0.05
## 10 9 27 0.01
ggplot(data = df8_CHC,
aes(x = NbofCHC,
y = Percentage,
fill = Percentage)) +
scale_fill_gradient(low = "darkred", high = "yellow")+
geom_bar(stat = "identity")+
geom_text(aes(label = paste0(Percentage,"%", "\n", Total_case)),
vjust = -0.1,
color = "darkblue",
size = 4.5) +
scale_x_continuous(breaks = seq(0,9,by = 1))+
scale_y_continuous(limits = c(0,40)) +
theme_minimal() +
labs(x = "Number of chronic health conditions",
y = "Percentage (%)",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size=12),
axis.title = element_text(size=12,face="bold"),
plot.caption = element_text(color = "grey44",size = 12, face = "italic"),
legend.title = element_text(color = "blue", size = 12),
legend.text = element_text(size=12),
legend.position = "non")
Among American aged 18 and more, almost 1 out of every 3 (~ 33%) adults were overweight or/and obese while 1.6% were underweight, based on their measured BMI.
SLEEPTIM (numeric attribute and chronic health condtion) A. Sleep duration and ASTHMA
ggplot(BRFSS, aes(x= SLEPTIM1 , group = ASTHMA)) +
geom_bar(aes(y = ..prop.., fill = factor(..x..)),
stat="count",width = 1) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~CANCER) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
sleptime_asthma <- ggplot(BRFSS,
aes(x = ASTHMA,
y = SLEPTIM1,
fill = ASTHMA)) +
geom_boxplot() +
scale_fill_hue(l = 40, c = 35)+
labs( x = "Cancer status",
y = "Sleep duration (hours)",
caption = "Data source: BRFSS2018")
sleptime_asthma + theme_boxplot
Assumption 3. Do the two populations have the same variances? We’ll use F-test to test for homogeneity in variances. This can be performed with the function var.test() as follow:
res.ftest_ASTHMA <- var.test(SLEPTIM1 ~ ASTHMA, data = BRFSS)
res.ftest_ASTHMA
##
## F test to compare two variances
##
## data: SLEPTIM1 by ASTHMA
## F = 0.73867, num df = 240911, denom df = 38659, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7275073 0.7499429
## sample estimates:
## ratio of variances
## 0.7386712
res.ftest_ARTHRITIS <- var.test(SLEPTIM1 ~ ARTHRITIS, data = BRFSS, alternative = "two.sided")
res.ftest_ARTHRITIS
##
## F test to compare two variances
##
## data: SLEPTIM1 by ARTHRITIS
## F = 0.6884, num df = 186212, denom df = 93358, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6807799 0.6960839
## sample estimates:
## ratio of variances
## 0.6883965
res.ftest_CANCER <- var.test(SLEPTIM1 ~ CANCER, data = BRFSS, alternative = "two.sided")
res.ftest_CANCER
##
## F test to compare two variances
##
## data: SLEPTIM1 by CANCER
## F = 0.90813, num df = 232122, denom df = 47448, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8955097 0.9208742
## sample estimates:
## ratio of variances
## 0.908133
res.ftest_DEPRESSION <- var.test(SLEPTIM1 ~ DEPRESSION, data = BRFSS, alternative = "two.sided")
res.ftest_DEPRESSION
##
## F test to compare two variances
##
## data: SLEPTIM1 by DEPRESSION
## F = 0.57769, num df = 226976, denom df = 52594, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5699777 0.5854759
## sample estimates:
## ratio of variances
## 0.5776913
res.ftest_DIABETE <- var.test(SLEPTIM1 ~ DIABETE, data = BRFSS, alternative = "two.sided")
res.ftest_DIABETE
##
## F test to compare two variances
##
## data: SLEPTIM1 by DIABETE
## F = 0.6913, num df = 240382, denom df = 39188, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6809148 0.7017925
## sample estimates:
## ratio of variances
## 0.6913035
res.ftest_KIDNEY <- var.test(SLEPTIM1 ~ KIDNEY, data = BRFSS, alternative = "two.sided")
res.ftest_KIDNEY
##
## F test to compare two variances
##
## data: SLEPTIM1 by KIDNEY
## F = 0.62827, num df = 269521, denom df = 10049, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6107080 0.6460933
## sample estimates:
## ratio of variances
## 0.6282687
res.ftest_PULMOND <- var.test(SLEPTIM1 ~ PULMOND, data = BRFSS, alternative = "two.sided")
res.ftest_PULMOND
##
## F test to compare two variances
##
## data: SLEPTIM1 by PULMOND
## F = 0.54672, num df = 257356, denom df = 22214, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5361798 0.5573743
## sample estimates:
## ratio of variances
## 0.5467181
res.ftest_STROKE <- var.test(SLEPTIM1 ~ STROKE, data = BRFSS, alternative = "two.sided")
res.ftest_STROKE
##
## F test to compare two variances
##
## data: SLEPTIM1 by STROKE
## F = 0.55674, num df = 268546, denom df = 11024, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5418525 0.5718449
## sample estimates:
## ratio of variances
## 0.556741
res.ftest_MICHD <- var.test(SLEPTIM1 ~ MICHD, data = BRFSS)
res.ftest_MICHD
##
## F test to compare two variances
##
## data: SLEPTIM1 by MICHD
## F = 0.65222, num df = 254977, denom df = 24593, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.6402132 0.6643555
## sample estimates:
## ratio of variances
## 0.6522193
t.test(SLEPTIM1 ~ ASTHMA, data = BRFSS)
##
## Welch Two Sample t-test
##
## data: SLEPTIM1 by ASTHMA
## t = 25.728, df = 48258, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1880149 0.2190244
## sample estimates:
## mean in group No mean in group Yes
## 7.046199 6.842680
All the F-test have p < 0.01 ==> reject the Null Hypotheses == accept the alternative hypothesis == There are difference between two variances of two groups. var.equal will set to be FALSE
t.test(SLEPTIM1 ~ ARTHRITIS, data = BRFSS, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: SLEPTIM1 by ARTHRITIS
## t = 12.489, df = 159401, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.05818593 0.07984851
## sample estimates:
## mean in group No mean in group Yes
## 7.041103 6.972086
t.test(SLEPTIM1 ~ CANCER, data = BRFSS, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: SLEPTIM1 by CANCER
## t = -25.076, df = 66232, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1820817 -0.1556816
## sample estimates:
## mean in group No mean in group Yes
## 6.989394 7.158275
t.test(SLEPTIM1 ~ DEPRESSION, data = BRFSS, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: SLEPTIM1 by DEPRESSION
## t = 25.771, df = 67338, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1765140 0.2055738
## sample estimates:
## mean in group No mean in group Yes
## 7.053997 6.862953
t.test(SLEPTIM1 ~ DIABETE, data = BRFSS, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: SLEPTIM1 by DIABETE
## t = -0.5059, df = 48419, p-value = 0.6129
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01987534 0.01172019
## sample estimates:
## mean in group No mean in group Yes
## 7.017485 7.021562
t.test(SLEPTIM1 ~ KIDNEY, alternative = "two.sided", var.equal = FALSE, data = BRFSS)
##
## Welch Two Sample t-test
##
## data: SLEPTIM1 by KIDNEY
## t = 0.46426, df = 10525, p-value = 0.6425
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02443215 0.03959710
## sample estimates:
## mean in group No mean in group Yes
## 7.018329 7.010746
t.test(SLEPTIM1 ~ PULMOND, alternative = "two.sided", var.equal = FALSE, data = BRFSS)
##
## Welch Two Sample t-test
##
## data: SLEPTIM1 by PULMOND
## t = 14.65, df = 24355, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1478766 0.1935567
## sample estimates:
## mean in group No mean in group Yes
## 7.031621 6.860905
t.test(SLEPTIM1 ~ STROKE, alternative = "two.sided", var.equal = FALSE, data = BRFSS)
##
## Welch Two Sample t-test
##
## data: SLEPTIM1 by STROKE
## t = -3.1169, df = 11533, p-value = 0.001832
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.08366195 -0.01906121
## sample estimates:
## mean in group No mean in group Yes
## 7.016031 7.067392
t.test(SLEPTIM1 ~ MICHD, alternative = "two.sided", var.equal = FALSE, data = BRFSS)
##
## Welch Two Sample t-test
##
## data: SLEPTIM1 by MICHD
## t = -2.5889, df = 27774, p-value = 0.009634
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.046917246 -0.006485714
## sample estimates:
## mean in group No mean in group Yes
## 7.015707 7.042409
res.ftest <- var.test(SLEPTIM1 ~ CANCER, data = BRFSS)
res.ftest
##
## F test to compare two variances
##
## data: SLEPTIM1 by CANCER
## F = 0.90813, num df = 232122, denom df = 47448, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8955097 0.9208742
## sample estimates:
## ratio of variances
## 0.908133
There are significant difference between the variance of two sets of dsata. Therefore, we will use t-test with unequal variance.
ggplot(BRFSS,
aes(x = PULMOND,
y = SLEPTIM1)) +
geom_violin(fill = "cornflowerblue") +
geom_boxplot(width = .2,
fill = "orange",
outlier.color = "orange",
outlier.size = 2) +
labs(title = "Sleep duration distribution by PULMOND status")
ggplot(BRFSS,
aes(x = MICHD,
y = SLEPTIM1, fill = MICHD)) +
geom_violin(trim = FALSE,fill = "cornflowerblue") +
geom_boxplot(width = 0.1,
fill = "grey",
outlier.color = "grey",
outlier.size = 2) +
labs(title = "Sleep duration distribution by MICHD status")
library(ggforce)
Numerical Data Descriptive Statistics
ggplot(data = BRFSS, aes(SLEPTIM1)) +
geom_bar(stat= "count",
fill= "olivedrab4") +
coord_cartesian(clip = "off") +
labs( x = "Sleep duration (hours)",
y = "Frequency",
subtitle = "On average, how many hours of sleep do you get in a 24-hour period?",
caption = "Data source: BRFSS2018") +
scale_y_continuous(limits = c(0, 100000), expand = c(0, 0)) +
scale_x_continuous(limits = c(0, 15), expand = c(0, 0)) +
theme_bar
****BIVARIATE analysis 1****
ggplot(BRFSS, aes(x= SLEPTIM1 , group = CANCER)) +
geom_bar(aes(y = ..prop.., fill = factor(..x..)),
stat="count",width = 1) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~CANCER) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
Boxplot
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
## The following object is masked from 'package:ggimage':
##
## theme_nothing
library(ggplot2)
theme_boxplot <- theme(text = element_text(size = 12),
axis.text = element_text(size = 12),
axis.title = element_text(size = 12,face="bold"),
plot.caption = element_text(color = "grey",size = 12, face = "italic"),
strip.text = element_text(size = 12, color = "blue", face = "bold.italic"),
strip.background = element_rect(fill = "slategray1"),
legend.position = "none")
sleptime_cancer <- ggplot(BRFSS,
aes(x = CANCER,
y = SLEPTIM1,
fill = CANCER)) +
geom_boxplot() +
scale_fill_hue(l = 40, c = 35)+
labs( x = "Cancer status",
y = "Sleep duration (hours)",
caption = "Data source: BRFSS2018")
sleptime_cancer + theme_boxplot
BRFSS %>%
summarise(mean = mean(SLEPTIM1),
std = sd(SLEPTIM1),
median = median(SLEPTIM1))
## mean std median
## 1 7.018056 1.296423 7
Compare two means sample size
BRFSS %>%
group_by(CANCER) %>%
dplyr::count()
## # A tibble: 2 x 2
## # Groups: CANCER [2]
## CANCER n
## <fct> <int>
## 1 No 232123
## 2 Yes 47449
library(purrr)
BRFSS %>%
split(.$CANCER) %>%
map(summary) %>%
pander()
No:
| IDAY | MONTH | Region | AGE | GENDER |
|---|---|---|---|---|
| Min. : 1.00 | Oct : 22689 | Midwest :63979 | 18-29:28992 | Female:119985 |
| 1st Qu.: 8.00 | Mar : 21969 | Northeast:47722 | 30-39:33224 | Male :112138 |
| Median :15.00 | Nov : 20695 | South :68624 | 40-49:35697 | NA |
| Mean :14.97 | Fev : 20539 | West :51798 | 50-59:46174 | NA |
| 3rd Qu.:22.00 | July : 20391 | NA | 60-69:49234 | NA |
| Max. :31.00 | Sept : 19978 | NA | 70-79:28095 | NA |
| NA | (Other):105862 | NA | 80+ :10707 | NA |
| RACE | RELP | EDU | EMPL |
|---|---|---|---|
| AIAN : 4106 | In relationship:133394 | <8 : 3961 | Employed :138489 |
| Asian : 5407 | Living apart : 36733 | 9-12 :69263 | Unemployed : 22780 |
| Black : 19836 | Never married : 41958 | 12-15:65214 | Occupations: 16620 |
| Hispanic: 17608 | Widowed : 20038 | 16+ :93685 | Retired : 54234 |
| White :177976 | NA | NA | NA |
| Others : 7190 | NA | NA | NA |
| NA | NA | NA | NA |
| INCOME | RENTHOME | COMMUN | ALCpa30 |
|---|---|---|---|
| <$20K :33493 | Other: 9500 | Rural : 34605 | 0 :100320 |
| $20K-<$35K:42371 | Own :163460 | Suburban: 37185 | 1-9 : 84939 |
| $35K-<$50K:31515 | Rent : 59163 | Urban :160333 | 10-30: 46864 |
| $50K-<$75K:38889 | NA | NA | NA |
| $75K+ :85855 | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| SMOKESTAT | EXERpa30 | SLEPTIM1 | GENHLTH |
|---|---|---|---|
| Daily smoker : 25319 | No : 50220 | Min. : 3.000 | Poor : 8402 |
| Someday smoker: 10315 | Yes:181903 | 1st Qu.: 6.000 | Fair :26484 |
| Former smoker : 62258 | NA | Median : 7.000 | Good :70948 |
| Never smoker :134231 | NA | Mean : 6.989 | Very Good:83001 |
| NA | NA | 3rd Qu.: 8.000 | Excellent:43288 |
| NA | NA | Max. :12.000 | NA |
| NA | NA | NA | NA |
| MENTHLTHpa30 | PHYSHLTHpa30 | HLTHPLN | CHECKUP | MEDICOST |
|---|---|---|---|---|
| 0 :152199 | 0 :150607 | No : 18816 | <1 :180709 | No :208350 |
| 1-9 : 47119 | 1-9 : 49961 | Yes:213307 | 1-<2: 24684 | Yes: 23773 |
| 10-30: 32805 | 10-30: 31555 | NA | 2-<5: 13149 | NA |
| NA | NA | NA | 5+ : 13581 | NA |
| NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA |
| ARTHRITIS | ASTHMA | CANCER | DEPRESSION | DIABETE | KIDNEY |
|---|---|---|---|---|---|
| No :162708 | No :200840 | No :232123 | No :189133 | No :201941 | No :225676 |
| Yes: 69415 | Yes: 31283 | Yes: 0 | Yes: 42990 | Yes: 30182 | Yes: 6447 |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| PULMOND | STROKE | LASTDENTV | RMVTETH | BMICAT |
|---|---|---|---|---|
| No :216198 | No :224444 | <1 :160426 | 0 :127220 | Underweight: 3294 |
| Yes: 15925 | Yes: 7679 | 1-<2: 25272 | 1-5 : 67111 | Normal :69003 |
| NA | NA | 2-<5: 21039 | >= 6: 24800 | Overweight :83349 |
| NA | NA | 5+ : 25386 | All : 12992 | Obese :76477 |
| NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA |
| MICHD |
|---|
| No :215121 |
| Yes: 17002 |
| NA |
| NA |
| NA |
| NA |
| NA |
Yes:
| IDAY | MONTH | Region | AGE | GENDER |
|---|---|---|---|---|
| Min. : 1.0 | Oct : 4588 | Midwest :12289 | 18-29: 423 | Female:25919 |
| 1st Qu.: 7.0 | July : 4370 | Northeast: 9566 | 30-39: 1226 | Male :21530 |
| Median :14.0 | Mar : 4245 | South :15232 | 40-49: 2643 | NA |
| Mean :14.5 | Nov : 4238 | West :10362 | 50-59: 7079 | NA |
| 3rd Qu.:21.0 | Aug : 4226 | NA | 60-69:13982 | NA |
| Max. :31.0 | Sept : 4144 | NA | 70-79:14693 | NA |
| NA | (Other):21638 | NA | 80+ : 7403 | NA |
| RACE | RELP | EDU | EMPL |
|---|---|---|---|
| AIAN : 546 | In relationship:27619 | <8 : 633 | Employed :15675 |
| Asian : 263 | Living apart : 7998 | 9-12 :12970 | Unemployed : 4705 |
| Black : 1766 | Never married : 3191 | 12-15:13045 | Occupations: 1787 |
| Hispanic: 1109 | Widowed : 8641 | 16+ :20801 | Retired :25282 |
| White :42785 | NA | NA | NA |
| Others : 980 | NA | NA | NA |
| NA | NA | NA | NA |
| INCOME | RENTHOME | COMMUN | ALCpa30 |
|---|---|---|---|
| <$20K : 6531 | Other: 1055 | Rural : 7736 | 0 :22759 |
| $20K-<$35K: 9175 | Own :39722 | Suburban: 7873 | 1-9 :14036 |
| $35K-<$50K: 7162 | Rent : 6672 | Urban :31840 | 10-30:10654 |
| $50K-<$75K: 8238 | NA | NA | NA |
| $75K+ :16343 | NA | NA | NA |
| NA | NA | NA | NA |
| NA | NA | NA | NA |
| SMOKESTAT | EXERpa30 | SLEPTIM1 | GENHLTH |
|---|---|---|---|
| Daily smoker : 3928 | No :12246 | Min. : 3.000 | Poor : 3787 |
| Someday smoker: 1393 | Yes:35203 | 1st Qu.: 6.000 | Fair : 7994 |
| Former smoker :18284 | NA | Median : 7.000 | Good :15305 |
| Never smoker :23844 | NA | Mean : 7.158 | Very Good:14825 |
| NA | NA | 3rd Qu.: 8.000 | Excellent: 5538 |
| NA | NA | Max. :12.000 | NA |
| NA | NA | NA | NA |
| MENTHLTHpa30 | PHYSHLTHpa30 | HLTHPLN | CHECKUP | MEDICOST | ARTHRITIS |
|---|---|---|---|---|---|
| 0 :33279 | 0 :26807 | No : 1247 | <1 :42609 | No :43986 | No :23505 |
| 1-9 : 7920 | 1-9 : 9998 | Yes:46202 | 1-<2: 2891 | Yes: 3463 | Yes:23944 |
| 10-30: 6250 | 10-30:10644 | NA | 2-<5: 1026 | NA | NA |
| NA | NA | NA | 5+ : 923 | NA | NA |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| ASTHMA | CANCER | DEPRESSION | DIABETE | KIDNEY | PULMOND |
|---|---|---|---|---|---|
| No :40072 | No : 0 | No :37844 | No :38442 | No :43846 | No :41159 |
| Yes: 7377 | Yes:47449 | Yes: 9605 | Yes: 9007 | Yes: 3603 | Yes: 6290 |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA | NA |
| STROKE | LASTDENTV | RMVTETH | BMICAT | MICHD |
|---|---|---|---|---|
| No :44103 | <1 :35033 | 0 :19100 | Underweight: 716 | No :39857 |
| Yes: 3346 | 1-<2: 3943 | 1-5 :16015 | Normal :14069 | Yes: 7592 |
| NA | 2-<5: 3450 | >= 6: 7968 | Overweight :17916 | NA |
| NA | 5+ : 5023 | All : 4366 | Obese :14748 | NA |
| NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA |
| NA | NA | NA | NA | NA |
The confident interval does not contain zero == > reject H0 ==> The difference statistically significant (Estimate for difference = 7.14- 6.98 = 0.16 hours= 9.6 minutes)
sleptime_Arthritis <- ggplot(BRFSS,
aes(x = ARTHRITIS,
y = SLEPTIM1,
fill = ARTHRITIS)) +
geom_boxplot() +
scale_fill_hue(l = 40, c = 35)+
labs(x = "Arthritis status",
y = "Sleep duration (hours)",
caption = "Data source: BRFSS2018")
sleptime_Arthritis + theme_boxplot
sleptime_pulmond <- ggplot(BRFSS,
aes(x = PULMOND,
y = SLEPTIM1,
fill = PULMOND)) +
geom_boxplot() +
scale_fill_hue(l = 40, c = 35)+
labs(x = "Pulmonary disease status",
y = "Sleep duration (hours)",
caption = "Data source: BRFSS2018")
sleptime_pulmond + theme_boxplot
sleptime_ASTHMA <- ggplot(BRFSS,
aes(x = ASTHMA,
y = SLEPTIM1,
fill = ASTHMA)) +
geom_boxplot() +
scale_fill_hue(l = 40, c = 35)+
labs(x = "Asthma status",
y = "Sleep duration (hours)",
caption = "Data source: BRFSS2018")
sleptime_ASTHMA + theme_boxplot
sleptime_DEPRESSION <- ggplot(BRFSS,
aes(x = DEPRESSION,
y = SLEPTIM1,
fill = DEPRESSION)) +
geom_boxplot() +
scale_fill_hue(l = 40, c = 35)+
labs(x = "Depressive disorder status",
y = "Sleep duration (hours)",
caption = "Data source: BRFSS2018")
sleptime_DEPRESSION + theme_boxplot
SLEPTIME and other risk factors (multiple levels)
BRFSS %>%
group_by(AGE) %>%
dplyr::summarise(count = n(),
meanslpt = mean(SLEPTIM1),
stdslpt = sd(SLEPTIM1))
## # A tibble: 7 x 4
## AGE count meanslpt stdslpt
## <fct> <int> <dbl> <dbl>
## 1 18-29 29415 6.92 1.27
## 2 30-39 34450 6.79 1.24
## 3 40-49 38340 6.82 1.25
## 4 50-59 53253 6.87 1.29
## 5 60-69 63216 7.09 1.28
## 6 70-79 42788 7.32 1.28
## 7 80+ 18110 7.50 1.40
ggboxplot(BRFSS,
x = "AGE",
y = "SLEPTIM1",
fill = "AGE",
ylab = "Sleep duration (hours)",
xlab = "Age group") +
theme_boxplot
# Compute the analysis of variance
age.aov <- aov(SLEPTIM1 ~ AGE, data = BRFSS)
# Summary of the analysis
summary(age.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## AGE 6 13090 2181.7 1335 <2e-16 ***
## Residuals 279565 456789 1.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As the p-value is less than the significance level 0.05, we can conclude that there are significant differences between the groups
TukeyHSD(age.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SLEPTIM1 ~ AGE, data = BRFSS)
##
## $AGE
## diff lwr upr p adj
## 30-39-18-29 -0.13357706 -0.163495798 -0.10365832 0.0000000
## 40-49-18-29 -0.09810146 -0.127312774 -0.06889015 0.0000000
## 50-59-18-29 -0.05324172 -0.080619810 -0.02586364 0.0000002
## 60-69-18-29 0.16682431 0.140224952 0.19342366 0.0000000
## 70-79-18-29 0.39362814 0.365083602 0.42217268 0.0000000
## 80+-18-29 0.58132152 0.545724945 0.61691810 0.0000000
## 40-49-30-39 0.03547560 0.007498282 0.06345291 0.0035015
## 50-59-30-39 0.08033534 0.054277917 0.10639276 0.0000000
## 60-69-30-39 0.30040137 0.275163396 0.32563934 0.0000000
## 70-79-30-39 0.52720520 0.499924801 0.55448560 0.0000000
## 80+-30-39 0.71489858 0.680307459 0.74948971 0.0000000
## 50-59-40-49 0.04485974 0.019617732 0.07010175 0.0000033
## 60-69-40-49 0.26492577 0.240530586 0.28932095 0.0000000
## 70-79-40-49 0.49172961 0.465226962 0.51823225 0.0000000
## 80+-40-49 0.67942299 0.645441878 0.71340410 0.0000000
## 60-69-50-59 0.22006603 0.197898848 0.24223321 0.0000000
## 70-79-50-59 0.44686987 0.422402559 0.47133717 0.0000000
## 80+-50-59 0.63456325 0.602144507 0.66698199 0.0000000
## 70-79-60-69 0.22680384 0.203211145 0.25039653 0.0000000
## 80+-60-69 0.41449722 0.382733390 0.44626104 0.0000000
## 80+-70-79 0.18769338 0.154283715 0.22110305 0.0000000
# 1. Homogeneity of variances
plot(age.aov, 1)
In the plot above, there is no evident relationships between residuals and fitted values (the mean of each groups), which is good. So, we can assume the homogeneity of variances.
plot(age.aov, 2)
SLEPTIM and GENDER
ggboxplot(BRFSS,
x = "GENDER",
y = "SLEPTIM1",
fill = "GENDER",
ylab = "Sleep duration (hours)",
xlab = "Gender") +
theme_boxplot
t.test(BRFSS$SLEPTIM1~BRFSS$GENDER)
##
## Welch Two Sample t-test
##
## data: BRFSS$SLEPTIM1 by BRFSS$GENDER
## t = 7.4379, df = 278554, p-value = 1.026e-13
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.02685357 0.04606973
## sample estimates:
## mean in group Female mean in group Male
## 7.035489 6.999027
SLEPTIME and INCOME
ggboxplot(BRFSS,
x = "INCOME",
y = "SLEPTIM1",
fill = "INCOME",
ylab = "Sleep duration (hours)",
xlab = "Income range") +
theme_boxplot
BRFSS %>%
group_by(INCOME) %>%
dplyr::summarise(count = n(),
meanslpt = mean(SLEPTIM1),
stdslpt = sd(SLEPTIM1)) %>%
pander()
| INCOME | count | meanslpt | stdslpt |
|---|---|---|---|
| <$20K | 40024 | 6.934 | 1.678 |
| $20K-<$35K | 51546 | 7.023 | 1.435 |
| $35K-<$50K | 38677 | 7.042 | 1.289 |
| $50K-<$75K | 47127 | 7.04 | 1.193 |
| $75K+ | 102198 | 7.03 | 1.081 |
# Compute the analysis of variance
income.aov <- aov(SLEPTIM1 ~ INCOME, data = BRFSS)
# Summary of the analysis
summary(income.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## INCOME 4 342 85.50 50.91 <2e-16 ***
## Residuals 279567 469537 1.68
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
H0 : There is no difference between group Ha : A least one of them is different p < 0.01 ==> reject the null hypothesis = accept the Ha
TukeyHSD(income.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SLEPTIM1 ~ INCOME, data = BRFSS)
##
## $INCOME
## diff lwr upr p adj
## $20K-<$35K-<$20K 0.088680936 0.065129401 0.112232470 0.0000000
## $35K-<$50K-<$20K 0.107635460 0.082429467 0.132841454 0.0000000
## $50K-<$75K-<$20K 0.105910131 0.081880817 0.129939446 0.0000000
## $75K+-<$20K 0.095806543 0.074961525 0.116651561 0.0000000
## $35K-<$50K-$20K-<$35K 0.018954524 -0.004826748 0.042735797 0.1894985
## $50K-<$75K-$20K-<$35K 0.017229196 -0.005301112 0.039759503 0.2260634
## $75K+-$20K-<$35K 0.007125607 -0.011972078 0.026223293 0.8473075
## $50K-<$75K-$35K-<$50K -0.001725329 -0.025979856 0.022529198 0.9996844
## $75K+-$35K-<$50K -0.011828917 -0.032933157 0.009275322 0.5435353
## $75K+-$50K-<$75K -0.010103589 -0.029787459 0.009580282 0.6275639
***RACE and SLEPTIME
BRFSS %>%
group_by(RACE) %>%
dplyr::summarise(count = n(),
meanslpt = mean(SLEPTIM1),
stdslpt = sd(SLEPTIM1)) %>%
pander()
| RACE | count | meanslpt | stdslpt |
|---|---|---|---|
| AIAN | 4652 | 6.922 | 1.545 |
| Asian | 5670 | 6.768 | 1.175 |
| Black | 21602 | 6.836 | 1.548 |
| Hispanic | 18717 | 6.92 | 1.33 |
| White | 220761 | 7.062 | 1.252 |
| Others | 8170 | 6.779 | 1.47 |
ggboxplot(BRFSS,
x = "RACE",
y = "SLEPTIM1",
fill = "RACE",
ylab = "Sleep duration (hours)",
xlab = "Race") +
theme_boxplot
# Compute the analysis of variance
race.aov <- aov(SLEPTIM1 ~ RACE, data = BRFSS)
# Summary of the analysis
summary(race.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## RACE 5 2179 435.8 260.5 <2e-16 ***
## Residuals 279566 467700 1.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(race.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SLEPTIM1 ~ RACE, data = BRFSS)
##
## $RACE
## diff lwr upr p adj
## Asian-AIAN -0.153500116 -0.22641429 -0.080585946 0.0000000
## Black-AIAN -0.086137012 -0.14571314 -0.026560881 0.0005409
## Hispanic-AIAN -0.001895133 -0.06227936 0.058489090 0.9999992
## White-AIAN 0.139760404 0.08515321 0.194367596 0.0000000
## Others-AIAN -0.142439519 -0.21013948 -0.074739561 0.0000000
## Black-Asian 0.067363104 0.01236327 0.122362938 0.0064160
## Hispanic-Asian 0.151604984 0.09573083 0.207479134 0.0000000
## White-Asian 0.293260520 0.24368622 0.342834824 0.0000000
## Others-Asian 0.011060597 -0.05264930 0.074770491 0.9963733
## Hispanic-Black 0.084241879 0.04743482 0.121048942 0.0000000
## White-Black 0.225897416 0.19962098 0.252173848 0.0000000
## Others-Black -0.056302507 -0.10417515 -0.008429866 0.0104282
## White-Hispanic 0.141655537 0.11359507 0.169716004 0.0000000
## Others-Hispanic -0.140544386 -0.18941901 -0.091669764 0.0000000
## Others-White -0.282199923 -0.32372606 -0.240673790 0.0000000
****Attribute and the outcome (MICHD)****
geombar_facet <- geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",width = 1)
geomtext_facet <- geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count", vjust = -.25)
A1 REGION
ggplot(BRFSS, aes(x = MICHD , group = Region)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "peachpuff4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~Region) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
Demographic B1.AGE and MICHD
ggplot(BRFSS, aes(x = MICHD , group = AGE)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "royalblue4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ AGE) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
MICHDage <- data.frame(
age_group = c("18-29","30-39","40-49", "50-59", "60-69","70-79","80+"),
Percentage = c(0.64, 1.44, 3.10, 6.78, 11.54, 17.64, 23.59))
MICHDage
## age_group Percentage
## 1 18-29 0.64
## 2 30-39 1.44
## 3 40-49 3.10
## 4 50-59 6.78
## 5 60-69 11.54
## 6 70-79 17.64
## 7 80+ 23.59
ggplot(data = MICHDage,
aes(x = age_group,
y = Percentage)) +
geom_bar(stat = "identity",
fill = "royalblue4",
width = 0.75)+
geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.5,
color = "black",
size = 4.5) +
scale_y_continuous(limits = c(0,25)) +
theme_minimal() +
labs(x = "Age group",
y = "Percentage (%)",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(size = 12),
legend.position = "non")
ggplot(data = MICHDage,
aes(x = age_group,
y = Percentage)) +
geom_point(stat = "identity",
position = "identity",
shape = 21,
colour = "blue",
fill = "blue", size = 3) +
geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.5,
color = "black",
size = 4.5) +
scale_y_continuous(limits = c(0,25)) +
theme_minimal() +
labs(x = "Age group",
y = "Percentage (%)",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(size = 12),
legend.position = "non")
B2.GENDER and MICHD
ggplot(BRFSS, aes(x= MICHD , group = GENDER)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "royalblue4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~GENDER) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
table(BRFSS$MICHD,BRFSS$GENDER)
##
## Female Male
## No 136133 118845
## Yes 9771 14823
prop.test(table(BRFSS$MICHD, BRFSS$GENDER), correct= FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$MICHD, BRFSS$GENDER)
## X-squared = 1677.6, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.1301941 0.1430238
## sample estimates:
## prop 1 prop 2
## 0.533901 0.397292
RACE and MICHD
ggplot(BRFSS, aes(x= MICHD , group = RACE)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "royalblue4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~RACE) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
ggplot(BRFSS, aes(x= MICHD , group = EDU)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "royalblue4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ EDU) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
MICHDedu <- data.frame(
edu_group = c("<8","9-12","12-15", "16+"),
Percentage = c(16.2, 11.2, 9.2, 6.5))
MICHDedu
## edu_group Percentage
## 1 <8 16.2
## 2 9-12 11.2
## 3 12-15 9.2
## 4 16+ 6.5
positions <- c("<8","9-12","12-15", "16+")
ggplot(data = MICHDedu ,
aes(x = positions,
y = Percentage)) +
geom_bar(stat = "identity",
fill = "royalblue4",
width = 0.6)+
geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.5,
color = "black",
size = 4.5) +
scale_x_discrete(limits = positions) +
scale_y_continuous(limits = c(0,20)) +
theme_minimal() +
labs(x = "Years of education categories",
y = "Percentage (%)",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(size = 12),
legend.position = "non")
ggplot(BRFSS, aes(x= MICHD , group = RELP)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "royalblue4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ RELP) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
AGE and widowed
BRFSS %>%
group_by(AGE,RELP) %>%
dplyr::count()
## # A tibble: 28 x 3
## # Groups: AGE, RELP [28]
## AGE RELP n
## <fct> <ord> <int>
## 1 18-29 In relationship 9521
## 2 18-29 Living apart 991
## 3 18-29 Never married 18861
## 4 18-29 Widowed 42
## 5 30-39 In relationship 21906
## 6 30-39 Living apart 4101
## 7 30-39 Never married 8267
## 8 30-39 Widowed 176
## 9 40-49 In relationship 26044
## 10 40-49 Living apart 6995
## # ... with 18 more rows
ggplot(BRFSS, aes(x= AGE, group = RELP)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat="count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col7) +
labs(y = "Percent of respondents", fill = "Age group") +
facet_grid(~ RELP) +
scale_y_continuous(labels = scales::percent) +
theme_bar
B6. EMPL
ggplot(BRFSS, aes(x = MICHD , group = EMPL)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "royalblue4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ EMPL) +
themebar_facet
ggplot(BRFSS, aes(x= AGE, group = EMPL)) +
geom_bar(aes(y = ..prop..,
fill = factor(..x..)),
stat = "count",
width = 0.8) +
geom_text(size = 4,
aes(label = scales::percent(..prop..),
y= ..prop.. ),
stat= "count",
color = "blue",
vjust = -.25) +
scale_fill_manual(values= col7) +
labs(y = "Percent of respondents", fill = "Age group") +
facet_grid(~EMPL) +
scale_y_continuous(labels = scales::percent) +
theme_bar
B7. Income levels (INCOME)
ggplot(BRFSS, aes(x= MICHD , group = INCOME)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "royalblue4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ INCOME) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
MICHDincome <- data.frame(
edu_group = c("<$20K","$20K-<$35K","$35K-<$50K", "$50K-<$75K","$75K+"),
Percentage = c(14.9, 11.5,9.5, 7.8, 5.3))
MICHDincome
## edu_group Percentage
## 1 <$20K 14.9
## 2 $20K-<$35K 11.5
## 3 $35K-<$50K 9.5
## 4 $50K-<$75K 7.8
## 5 $75K+ 5.3
position_edu <- c("<$20K","$20K-<$35K","$35K-<$50K", "$50K-<$75K","$75K+")
ggplot(data = MICHDincome,
aes(x = position_edu,
y = Percentage)) +
geom_bar(stat = "identity",
fill = "royalblue4",
width = 0.6)+
geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.5,
color = "black",
size = 4.5) +
scale_x_discrete(limits = position_edu) +
scale_y_continuous(limits = c(0,18)) +
theme_minimal() +
labs(x = "Income levels",
y = "Percentage (%)",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(size = 12),
legend.position = "non")
B8.Geographic locale
ggplot(BRFSS, aes(x= MICHD , group = COMMUN)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "royalblue4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ COMMUN) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
Lifestyle (C) C1. X_SMOKER3 Four-level smoker status —> SMOKESTAT C2. ALCDAY5 Days in past 30 had alcoholic beverage —> ALCpa30 C3. SLEPTIM1 How Much Time Do You Sleep C4. EXERANY2 Exercise in Past 30 Days
C1. X_SMOKER3 Four-level smoker status —> SMOKESTAT
ggplot(BRFSS, aes(x = MICHD , group = SMOKESTAT)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "olivedrab4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ SMOKESTAT) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
prop.test(table(BRFSS$SMOKESTAT, BRFSS$MICHD), correct=FALSE)
##
## 4-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$SMOKESTAT, BRFSS$MICHD)
## X-squared = 3786.7, df = 3, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3 prop 4
## 0.8933224 0.9036556 0.8658091 0.9396616
C2. ALCDAY5 Days in past 30 had alcoholic beverage —> ALCpa30
ggplot(BRFSS, aes(x = MICHD , group = ALCpa30)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "olivedrab4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ ALCpa30) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
prop.test(table(BRFSS$ALCpa30, BRFSS$MICHD), correct = FALSE)
##
## 3-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$ALCpa30, BRFSS$MICHD)
## X-squared = 2257.3, df = 2, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.8833757 0.9360242 0.9320561
C3. SLEPTIM1 How Much Time Do You Sleep
ggplot(BRFSS, aes(x = MICHD , group = SLEPTIM1)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "olivedrab4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ SLEPTIM1) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
prop.test(table(BRFSS$SLEPTIM1, BRFSS$MICHD), correct = FALSE)
##
## 10-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$SLEPTIM1, BRFSS$MICHD)
## X-squared = 2095.6, df = 9, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3 prop 4 prop 5 prop 6 prop 7 prop 8
## 0.8150538 0.8381890 0.8964931 0.9178640 0.9332331 0.9088374 0.8906505 0.8343991
## prop 9 prop 10
## 0.8532609 0.8106769
Sleep <- data.frame(
Duration = c(3:12),
Percentage = c(18.49, 16.18, 10.35, 8.21, 6.68, 9.12, 10.93, 16.56, 14.67, 18.93)
)
Sleep
## Duration Percentage
## 1 3 18.49
## 2 4 16.18
## 3 5 10.35
## 4 6 8.21
## 5 7 6.68
## 6 8 9.12
## 7 9 10.93
## 8 10 16.56
## 9 11 14.67
## 10 12 18.93
ggplot(data = Sleep, aes(x = Duration,
y = Percentage)) +
geom_line(linetype = "dashed",
size = 0.5) +
geom_point(size = 6, color = "blue") +
labs(y = "Percent of respondents (%)",
x = "Duration (h)") +
theme_point
ggplot(data = Sleep,
aes(x = Duration,
y = Percentage,
fill = Percentage)) +
scale_fill_gradient(low = "olivedrab4", high = "darkred")+
geom_bar(stat = "identity")+
geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.5,
color = "black",
size = 4.5) +
scale_x_continuous(breaks = seq(0,14,by = 1))+
scale_y_continuous(limits = c(0,20)) +
theme_minimal() +
labs(x = "Sleep duration (h)",
y = "Percentage (%)",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(size = 12),
legend.position = "non")
C4. EXERANY2 Exercise in Past 30 Days
ggplot(BRFSS, aes(x= MICHD , group = EXERpa30)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "olivedrab4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ EXERpa30) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
prop.test(table(BRFSS$EXERpa30, BRFSS$MICHD), correct=FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$EXERpa30, BRFSS$MICHD)
## X-squared = 2775.8, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.07069454 -0.06482085
## sample estimates:
## prop 1 prop 2
## 0.8594115 0.9271692
Health Status (D) D1. GENHLTH
ggplot(BRFSS, aes(x= MICHD , group = GENHLTH)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "orangered4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ GENHLTH) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
prop.test(table(BRFSS$GENHLTH, BRFSS$MICHD), correct = FALSE)
##
## 5-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$GENHLTH, BRFSS$MICHD)
## X-squared = 18845, df = 4, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3 prop 4 prop 5
## 0.6677332 0.8065433 0.9026237 0.9550222 0.9779830
Sleep <- data.frame(
Duration = c(3:12),
Percentage = c(18.49, 16.18, 10.35, 8.21, 6.68, 9.12, 10.93, 16.56, 14.67, 18.93)
)
Sleep
## Duration Percentage
## 1 3 18.49
## 2 4 16.18
## 3 5 10.35
## 4 6 8.21
## 5 7 6.68
## 6 8 9.12
## 7 9 10.93
## 8 10 16.56
## 9 11 14.67
## 10 12 18.93
MICHD_GENHLTH <- data.frame(
GENHLTH_group = c("Poor","Fair","Good", "Very good","Excellent"),
Percentage = c(33.2,19.3,9.7, 4.5, 2.2))
position_GENHLTH <- c("Poor","Fair","Good", "Very good","Excellent")
ggplot(data = MICHD_GENHLTH,
aes(x = position_GENHLTH,
y = Percentage)) +
geom_bar(stat = "identity",
fill = "purple4",
width = 0.6)+
geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.5,
color = "black",
size = 4.5) +
scale_x_discrete(limits = position_GENHLTH) +
scale_y_continuous(limits = c(0,35)) +
theme_minimal() +
labs(x = "Would you say that in general your health is:...",
y = "Percentage (%)",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(size = 12),
legend.position = "non")
Healthy Days (E)— Health Related Quality of Life E1.PHYSHLTH (Number of Days Physical Health Not Good) E2.MENTHLTH (Number of Days Mental Health Not Good)
ggplot(BRFSS, aes(x= MICHD , group = PHYSHLTHpa30)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "purple4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ PHYSHLTHpa30) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
prop.test(table(BRFSS$PHYSHLTHpa30, BRFSS$MICHD), correct = FALSE)
##
## 3-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$PHYSHLTHpa30, BRFSS$MICHD)
## X-squared = 8082.2, df = 2, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.9381334 0.9134742 0.8002322
ggplot(BRFSS, aes(x= MICHD , group = MENTHLTHpa30)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "purple4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ MENTHLTHpa30) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
prop.test(table(BRFSS$MENTHLTHpa30, BRFSS$MICHD), correct = FALSE)
##
## 3-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$MENTHLTHpa30, BRFSS$MICHD)
## X-squared = 661.24, df = 2, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.9128522 0.9303403 0.8823198
Health Care Access (F) F1. HPLN1 (Have any health care coverage) ==> HLTHPLN F2. MEDCOST (Could Not See Doctor Because of Cost) ==> MEDICOST F3. CHECKUP1 ==> CHECKUP
ggplot(BRFSS, aes(x= MICHD , group = HLTHPLN)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ HLTHPLN) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(BRFSS, aes(x= MICHD , group = MEDICOST)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ MEDICOST) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
ggplot(BRFSS, aes(x= MICHD , group = CHECKUP)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ CHECKUP) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
BRFSS %>%
group_by(CHECKUP, MICHD) %>%
dplyr::count()
## # A tibble: 8 x 3
## # Groups: CHECKUP, MICHD [8]
## CHECKUP MICHD n
## <fct> <fct> <int>
## 1 <1 No 200580
## 2 <1 Yes 22738
## 3 1-<2 No 26573
## 4 1-<2 Yes 1002
## 5 2-<5 No 13765
## 6 2-<5 Yes 410
## 7 5+ No 14060
## 8 5+ Yes 444
Chronic Health Conditions (G) G1. ASTHMA3 ==> ASTHMA G2. CHCCOPD1 ==> PULMOND G3. HAVARTH3 ==> ARTHRITIS G4. ADDEPEV2 ==> DEPRESSION G5. CHCKDNY1 ==> KIDNEY G6. DIABETE3 ==> DIABETE G7. CHCSCNCR,CHCOCNCR ==> CANCER
BRFSS %>%
select(ASTHMA, PULMOND,ARTHRITIS,DEPRESSION, KIDNEY,DIABETE,CANCER, MICHD) %>%
group_by (MICHD) %>%
count()
## ASTHMA PULMOND ARTHRITIS DEPRESSION KIDNEY DIABETE CANCER MICHD freq
## 1 No No No No No No No No 106889
## 2 No No No No No No No Yes 3328
## 3 No No No No No No Yes No 12899
## 4 No No No No No No Yes Yes 1226
## 5 No No No No No Yes No No 9088
## 6 No No No No No Yes No Yes 1161
## 7 No No No No No Yes Yes No 1689
## 8 No No No No No Yes Yes Yes 378
## 9 No No No No Yes No No No 904
## 10 No No No No Yes No No Yes 128
## 11 No No No No Yes No Yes No 442
## 12 No No No No Yes No Yes Yes 100
## 13 No No No No Yes Yes No No 401
## 14 No No No No Yes Yes No Yes 165
## 15 No No No No Yes Yes Yes No 155
## 16 No No No No Yes Yes Yes Yes 73
## 17 No No No Yes No No No No 15285
## 18 No No No Yes No No No Yes 509
## 19 No No No Yes No No Yes No 1812
## 20 No No No Yes No No Yes Yes 166
## 21 No No No Yes No Yes No No 1574
## 22 No No No Yes No Yes No Yes 221
## 23 No No No Yes No Yes Yes No 297
## 24 No No No Yes No Yes Yes Yes 55
## 25 No No No Yes Yes No No No 217
## 26 No No No Yes Yes No No Yes 36
## 27 No No No Yes Yes No Yes No 85
## 28 No No No Yes Yes No Yes Yes 15
## 29 No No No Yes Yes Yes No No 97
## 30 No No No Yes Yes Yes No Yes 40
## 31 No No No Yes Yes Yes Yes No 34
## 32 No No No Yes Yes Yes Yes Yes 13
## 33 No No Yes No No No No No 28413
## 34 No No Yes No No No No Yes 2730
## 35 No No Yes No No No Yes No 8670
## 36 No No Yes No No No Yes Yes 1318
## 37 No No Yes No No Yes No No 5295
## 38 No No Yes No No Yes No Yes 1207
## 39 No No Yes No No Yes Yes No 1748
## 40 No No Yes No No Yes Yes Yes 503
## 41 No No Yes No Yes No No No 770
## 42 No No Yes No Yes No No Yes 201
## 43 No No Yes No Yes No Yes No 475
## 44 No No Yes No Yes No Yes Yes 182
## 45 No No Yes No Yes Yes No No 488
## 46 No No Yes No Yes Yes No Yes 282
## 47 No No Yes No Yes Yes Yes No 266
## 48 No No Yes No Yes Yes Yes Yes 167
## 49 No No Yes Yes No No No No 8089
## 50 No No Yes Yes No No No Yes 762
## 51 No No Yes Yes No No Yes No 2162
## 52 No No Yes Yes No No Yes Yes 333
## 53 No No Yes Yes No Yes No No 1826
## 54 No No Yes Yes No Yes No Yes 458
## 55 No No Yes Yes No Yes Yes No 523
## 56 No No Yes Yes No Yes Yes Yes 170
## 57 No No Yes Yes Yes No No No 320
## 58 No No Yes Yes Yes No No Yes 81
## 59 No No Yes Yes Yes No Yes No 165
## 60 No No Yes Yes Yes No Yes Yes 60
## 61 No No Yes Yes Yes Yes No No 210
## 62 No No Yes Yes Yes Yes No Yes 100
## 63 No No Yes Yes Yes Yes Yes No 85
## 64 No No Yes Yes Yes Yes Yes Yes 52
## 65 No Yes No No No No No No 1958
## 66 No Yes No No No No No Yes 438
## 67 No Yes No No No No Yes No 612
## 68 No Yes No No No No Yes Yes 165
## 69 No Yes No No No Yes No No 346
## 70 No Yes No No No Yes No Yes 157
## 71 No Yes No No No Yes Yes No 127
## 72 No Yes No No No Yes Yes Yes 59
## 73 No Yes No No Yes No No No 33
## 74 No Yes No No Yes No No Yes 23
## 75 No Yes No No Yes No Yes No 41
## 76 No Yes No No Yes No Yes Yes 29
## 77 No Yes No No Yes Yes No No 24
## 78 No Yes No No Yes Yes No Yes 40
## 79 No Yes No No Yes Yes Yes No 13
## 80 No Yes No No Yes Yes Yes Yes 13
## 81 No Yes No Yes No No No No 615
## 82 No Yes No Yes No No No Yes 117
## 83 No Yes No Yes No No Yes No 157
## 84 No Yes No Yes No No Yes Yes 28
## 85 No Yes No Yes No Yes No No 116
## 86 No Yes No Yes No Yes No Yes 57
## 87 No Yes No Yes No Yes Yes No 35
## 88 No Yes No Yes No Yes Yes Yes 26
## 89 No Yes No Yes Yes No No No 25
## 90 No Yes No Yes Yes No No Yes 10
## 91 No Yes No Yes Yes No Yes No 11
## 92 No Yes No Yes Yes No Yes Yes 8
## 93 No Yes No Yes Yes Yes No No 10
## 94 No Yes No Yes Yes Yes No Yes 14
## 95 No Yes No Yes Yes Yes Yes No 1
## 96 No Yes No Yes Yes Yes Yes Yes 5
## 97 No Yes Yes No No No No No 1926
## 98 No Yes Yes No No No No Yes 529
## 99 No Yes Yes No No No Yes No 742
## 100 No Yes Yes No No No Yes Yes 321
## 101 No Yes Yes No No Yes No No 487
## 102 No Yes Yes No No Yes No Yes 278
## 103 No Yes Yes No No Yes Yes No 186
## 104 No Yes Yes No No Yes Yes Yes 140
## 105 No Yes Yes No Yes No No No 76
## 106 No Yes Yes No Yes No No Yes 55
## 107 No Yes Yes No Yes No Yes No 67
## 108 No Yes Yes No Yes No Yes Yes 52
## 109 No Yes Yes No Yes Yes No No 53
## 110 No Yes Yes No Yes Yes No Yes 89
## 111 No Yes Yes No Yes Yes Yes No 33
## 112 No Yes Yes No Yes Yes Yes Yes 56
## 113 No Yes Yes Yes No No No No 1044
## 114 No Yes Yes Yes No No No Yes 326
## 115 No Yes Yes Yes No No Yes No 374
## 116 No Yes Yes Yes No No Yes Yes 142
## 117 No Yes Yes Yes No Yes No No 310
## 118 No Yes Yes Yes No Yes No Yes 190
## 119 No Yes Yes Yes No Yes Yes No 100
## 120 No Yes Yes Yes No Yes Yes Yes 82
## 121 No Yes Yes Yes Yes No No No 72
## 122 No Yes Yes Yes Yes No No Yes 40
## 123 No Yes Yes Yes Yes No Yes No 37
## 124 No Yes Yes Yes Yes No Yes Yes 37
## 125 No Yes Yes Yes Yes Yes No No 44
## 126 No Yes Yes Yes Yes Yes No Yes 63
## 127 No Yes Yes Yes Yes Yes Yes No 19
## 128 No Yes Yes Yes Yes Yes Yes Yes 36
## 129 Yes No No No No No No No 11164
## 130 Yes No No No No No No Yes 287
## 131 Yes No No No No No Yes No 1200
## 132 Yes No No No No No Yes Yes 104
## 133 Yes No No No No Yes No No 879
## 134 Yes No No No No Yes No Yes 113
## 135 Yes No No No No Yes Yes No 185
## 136 Yes No No No No Yes Yes Yes 46
## 137 Yes No No No Yes No No No 94
## 138 Yes No No No Yes No No Yes 15
## 139 Yes No No No Yes No Yes No 48
## 140 Yes No No No Yes No Yes Yes 8
## 141 Yes No No No Yes Yes No No 35
## 142 Yes No No No Yes Yes No Yes 15
## 143 Yes No No No Yes Yes Yes No 13
## 144 Yes No No No Yes Yes Yes Yes 7
## 145 Yes No No Yes No No No No 3372
## 146 Yes No No Yes No No No Yes 111
## 147 Yes No No Yes No No Yes No 345
## 148 Yes No No Yes No No Yes Yes 28
## 149 Yes No No Yes No Yes No No 358
## 150 Yes No No Yes No Yes No Yes 43
## 151 Yes No No Yes No Yes Yes No 58
## 152 Yes No No Yes No Yes Yes Yes 15
## 153 Yes No No Yes Yes No No No 52
## 154 Yes No No Yes Yes No No Yes 4
## 155 Yes No No Yes Yes No Yes No 16
## 156 Yes No No Yes Yes No Yes Yes 7
## 157 Yes No No Yes Yes Yes No No 19
## 158 Yes No No Yes Yes Yes No Yes 8
## 159 Yes No No Yes Yes Yes Yes No 11
## 160 Yes No No Yes Yes Yes Yes Yes 4
## 161 Yes No Yes No No No No No 3562
## 162 Yes No Yes No No No No Yes 341
## 163 Yes No Yes No No No Yes No 1135
## 164 Yes No Yes No No No Yes Yes 169
## 165 Yes No Yes No No Yes No No 784
## 166 Yes No Yes No No Yes No Yes 192
## 167 Yes No Yes No No Yes Yes No 280
## 168 Yes No Yes No No Yes Yes Yes 86
## 169 Yes No Yes No Yes No No No 120
## 170 Yes No Yes No Yes No No Yes 28
## 171 Yes No Yes No Yes No Yes No 70
## 172 Yes No Yes No Yes No Yes Yes 24
## 173 Yes No Yes No Yes Yes No No 79
## 174 Yes No Yes No Yes Yes No Yes 36
## 175 Yes No Yes No Yes Yes Yes No 35
## 176 Yes No Yes No Yes Yes Yes Yes 24
## 177 Yes No Yes Yes No No No No 2102
## 178 Yes No Yes Yes No No No Yes 189
## 179 Yes No Yes Yes No No Yes No 511
## 180 Yes No Yes Yes No No Yes Yes 94
## 181 Yes No Yes Yes No Yes No No 566
## 182 Yes No Yes Yes No Yes No Yes 135
## 183 Yes No Yes Yes No Yes Yes No 145
## 184 Yes No Yes Yes No Yes Yes Yes 60
## 185 Yes No Yes Yes Yes No No No 81
## 186 Yes No Yes Yes Yes No No Yes 22
## 187 Yes No Yes Yes Yes No Yes No 47
## 188 Yes No Yes Yes Yes No Yes Yes 20
## 189 Yes No Yes Yes Yes Yes No No 78
## 190 Yes No Yes Yes Yes Yes No Yes 39
## 191 Yes No Yes Yes Yes Yes Yes No 26
## 192 Yes No Yes Yes Yes Yes Yes Yes 20
## 193 Yes Yes No No No No No No 921
## 194 Yes Yes No No No No No Yes 131
## 195 Yes Yes No No No No Yes No 236
## 196 Yes Yes No No No No Yes Yes 73
## 197 Yes Yes No No No Yes No No 159
## 198 Yes Yes No No No Yes No Yes 73
## 199 Yes Yes No No No Yes Yes No 68
## 200 Yes Yes No No No Yes Yes Yes 32
## 201 Yes Yes No No Yes No No No 33
## 202 Yes Yes No No Yes No No Yes 12
## 203 Yes Yes No No Yes No Yes No 12
## 204 Yes Yes No No Yes No Yes Yes 8
## 205 Yes Yes No No Yes Yes No No 8
## 206 Yes Yes No No Yes Yes No Yes 14
## 207 Yes Yes No No Yes Yes Yes No 4
## 208 Yes Yes No No Yes Yes Yes Yes 9
## 209 Yes Yes No Yes No No No No 462
## 210 Yes Yes No Yes No No No Yes 89
## 211 Yes Yes No Yes No No Yes No 93
## 212 Yes Yes No Yes No No Yes Yes 29
## 213 Yes Yes No Yes No Yes No No 123
## 214 Yes Yes No Yes No Yes No Yes 44
## 215 Yes Yes No Yes No Yes Yes No 29
## 216 Yes Yes No Yes No Yes Yes Yes 16
## 217 Yes Yes No Yes Yes No No No 16
## 218 Yes Yes No Yes Yes No No Yes 9
## 219 Yes Yes No Yes Yes No Yes No 11
## 220 Yes Yes No Yes Yes No Yes Yes 3
## 221 Yes Yes No Yes Yes Yes No No 9
## 222 Yes Yes No Yes Yes Yes No Yes 10
## 223 Yes Yes No Yes Yes Yes Yes No 11
## 224 Yes Yes No Yes Yes Yes Yes Yes 7
## 225 Yes Yes Yes No No No No No 1017
## 226 Yes Yes Yes No No No No Yes 275
## 227 Yes Yes Yes No No No Yes No 445
## 228 Yes Yes Yes No No No Yes Yes 154
## 229 Yes Yes Yes No No Yes No No 374
## 230 Yes Yes Yes No No Yes No Yes 192
## 231 Yes Yes Yes No No Yes Yes No 153
## 232 Yes Yes Yes No No Yes Yes Yes 112
## 233 Yes Yes Yes No Yes No No No 62
## 234 Yes Yes Yes No Yes No No Yes 38
## 235 Yes Yes Yes No Yes No Yes No 45
## 236 Yes Yes Yes No Yes No Yes Yes 32
## 237 Yes Yes Yes No Yes Yes No No 54
## 238 Yes Yes Yes No Yes Yes No Yes 64
## 239 Yes Yes Yes No Yes Yes Yes No 35
## 240 Yes Yes Yes No Yes Yes Yes Yes 45
## 241 Yes Yes Yes Yes No No No No 976
## 242 Yes Yes Yes Yes No No No Yes 280
## 243 Yes Yes Yes Yes No No Yes No 306
## 244 Yes Yes Yes Yes No No Yes Yes 149
## 245 Yes Yes Yes Yes No Yes No No 403
## 246 Yes Yes Yes Yes No Yes No Yes 230
## 247 Yes Yes Yes Yes No Yes Yes No 143
## 248 Yes Yes Yes Yes No Yes Yes Yes 102
## 249 Yes Yes Yes Yes Yes No No No 78
## 250 Yes Yes Yes Yes Yes No No Yes 49
## 251 Yes Yes Yes Yes Yes No Yes No 44
## 252 Yes Yes Yes Yes Yes No Yes Yes 43
## 253 Yes Yes Yes Yes Yes Yes No No 76
## 254 Yes Yes Yes Yes Yes Yes No Yes 79
## 255 Yes Yes Yes Yes Yes Yes Yes No 35
## 256 Yes Yes Yes Yes Yes Yes Yes Yes 52
G1. ASTHMA
table1 <- table(BRFSS$ASTHMA, BRFSS$MICHD)
table1
##
## No Yes
## No 221067 19845
## Yes 33911 4749
ggplot(BRFSS, aes(x = MICHD , group = ASTHMA)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ ASTHMA) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
prop.test(table(BRFSS$ASTHMA, BRFSS$MICHD), correct = FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$ASTHMA, BRFSS$MICHD)
## X-squared = 679.92, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.03701430 0.04391704
## sample estimates:
## prop 1 prop 2
## 0.9176255 0.8771599
G2. PULMOND
ggplot(BRFSS, aes(x= MICHD , group = PULMOND)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ PULMOND) +
themebar_facet
prop.test(table(BRFSS$PULMOND, BRFSS$MICHD), correct = FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$PULMOND, BRFSS$MICHD)
## X-squared = 10375, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.1958027 0.2076973
## sample estimates:
## prop 1 prop 2
## 0.9280610 0.7263111
G3. ARTHRITIS
ggplot(BRFSS, aes(x = MICHD , group = ARTHRITIS)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ ARTHRITIS) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
prop.test(table(BRFSS$ARTHRITIS, BRFSS$MICHD), correct=FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$ARTHRITIS, BRFSS$MICHD)
## X-squared = 7715.3, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.09723545 0.10231039
## sample estimates:
## prop 1 prop 2
## 0.9453475 0.8455746
G4. DEPRESSION
ggplot(BRFSS, aes(x = MICHD, group = DEPRESSION)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ DEPRESSION) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
prop.test(table(BRFSS$DEPRESSION, BRFSS$MICHD), correct = FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$DEPRESSION, BRFSS$MICHD)
## X-squared = 761.52, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.03484365 0.04080930
## sample estimates:
## prop 1 prop 2
## 0.9191460 0.8813195
G5.KIDNEY
ggplot(BRFSS, aes(x = MICHD , group = KIDNEY)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ KIDNEY) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
prop.test(table(BRFSS$KIDNEY, BRFSS$MICHD), correct=FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$KIDNEY, BRFSS$MICHD)
## X-squared = 5868.8, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.2114302 0.2294740
## sample estimates:
## prop 1 prop 2
## 0.9199546 0.6995025
G6. DIABETE3 ==> DIABETE
ggplot(BRFSS, aes(x = MICHD , group = DIABETE)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ DIABETE) +
themebar_facet
prop.test(table(BRFSS$DIABETE, BRFSS$MICHD), correct = FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$DIABETE, BRFSS$MICHD)
## X-squared = 8616.9, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.1390752 0.1474029
## sample estimates:
## prop 1 prop 2
## 0.9321083 0.7888693
G7. CHCSCNCR,CHCOCNCR ==> CANCER
ggplot(BRFSS, aes(x = MICHD , group = CANCER)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ CANCER) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
prop.test(table(BRFSS$CANCER, BRFSS$MICHD), correct = FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$CANCER, BRFSS$MICHD)
## X-squared = 3695.9, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.08329296 0.09022248
## sample estimates:
## prop 1 prop 2
## 0.9267544 0.8399966
ggplot(BRFSS, aes(x = MICHD , group = STROKE)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "goldenrod4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ STROKE) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
Among pp who have MICHD
CHC <- c("ARTHRITIS", "ASHMA", " CANCER","DEPRESSION", "DIABETE","KIDNEY", "PULMON", "STROKE")
CHC_NO <- c(7.0,9.6,8.7,9.4,8.0,9.0,8.0,8.0)
CHC_YES <- c(17,14.9,17.9,14.3,23,32,30,36.0)
dataCHC <- cbind.data.frame(CHC, CHC_NO, CHC_YES)
dataCHC
## CHC CHC_NO CHC_YES
## 1 ARTHRITIS 7.0 17.0
## 2 ASHMA 9.6 14.9
## 3 CANCER 8.7 17.9
## 4 DEPRESSION 9.4 14.3
## 5 DIABETE 8.0 23.0
## 6 KIDNEY 9.0 32.0
## 7 PULMON 8.0 30.0
## 8 STROKE 8.0 36.0
library(ggthemes)
##
## Attaching package: 'ggthemes'
## The following object is masked from 'package:cowplot':
##
## theme_map
CHC_MICHD <- read.csv(file="C:/Users/user/Desktop/Capstone Project/Submission/R codes/Chronic health.csv", header=TRUE, sep=",")
CHC_MICHD
## ï..ChronicHLTH CHC.status MICHD.NO MICHD.YES
## 1 ASTHMA NO 90.4 9.6
## 2 ASTHMA YES 85.1 14.9
## 3 PULMOND NO 92.0 8.0
## 4 PULMOND YES 70.0 30.0
## 5 ARTHRITIS NO 93.0 7.0
## 6 ARTHRITIS YES 83.0 17.0
## 7 DEPRESSION NO 90.6 9.4
## 8 DEPRESSION YES 85.7 14.3
## 9 KIDNEY NO 91.0 9.0
## 10 KIDNEY YES 68.0 32.0
## 11 DIABETE NO 92.0 8.0
## 12 DIABETE YES 77.0 23.0
## 13 CANCER NO 91.3 8.7
## 14 CANCER YES 82.1 17.9
## 15 STROKE NO 92.0 8.0
## 16 STROKE YES 64.0 36.0
ggplot(CHC_MICHD, aes(CHC.status, MICHD.YES)) +
geom_bar(stat = "identity") +
facet_wrap(~ï..ChronicHLTH, nrow = 2) +
geom_text(aes(label = paste0(MICHD.YES, "%"),
y = MICHD.YES),
vjust = 1.4,
size = 5,
color = "white") +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.ticks = element_blank(),
axis.text.y = element_blank()) +
themebar_facet +
aes(fill = as.factor(CHC.status)) +
scale_fill_manual(values = c("#00AFBB", "goldenrod4")) +
labs(x = "Status (Yes/No) of chronic health condition",
y = "Percent of respondents (%)",
caption = "Data source: BRFSS2018")
Among people having MICHD
ggplot(CHC_MICHD, aes(CHC.status,ï..ChronicHLTH,
fill = MICHD.YES)) +
geom_tile(show.legend = TRUE) +
geom_text(aes(label = paste0(MICHD.YES, "%")), color = "white") +
labs(x = "Chronic Health Conditions Status",
y = "Chronic Health Conditions",
color = "Having MICHD") +
theme(axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 12, face="bold"),
plot.caption = element_text(color = "grey", size = 12, face = "italic"))
Oral Health (H) H1. RMVTETH4 ==> RMVTETH H2. LASTDEN4 ===> LASTDEN
H1. RMVTETH4 ==> RMVTETH
ggplot(BRFSS, aes(x = MICHD , group = RMVTETH)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values = c("gainsboro", "seagreen4")) +
labs(y = "Percent of respondents", fill = "MICHD") +
facet_grid(~ RMVTETH) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
prop.test(table(BRFSS$RMVTETH, BRFSS$MICHD), correct = FALSE)
##
## 4-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$RMVTETH, BRFSS$MICHD)
## X-squared = 11997, df = 3, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3 prop 4
## 0.9553718 0.9040012 0.8193665 0.7600530
H2. LASTDEN4 ===> LASTDENTV
ggplot(BRFSS, aes(x= MICHD , group = LASTDENTV)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "seagreen4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ LASTDENTV) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
prop.test(table(BRFSS$LASTDENTV, BRFSS$MICHD), correct=FALSE)
##
## 4-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$LASTDENTV, BRFSS$MICHD)
## X-squared = 2054.9, df = 3, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3 prop 4
## 0.9248640 0.9072394 0.8948507 0.8479726
MICHDrvmteeth <- data.frame(
teeth_group = c("0","1-5",">= 6", "All"),
Percentage = c(4.5, 9.6, 18.1, 24))
MICHDrvmteeth
## teeth_group Percentage
## 1 0 4.5
## 2 1-5 9.6
## 3 >= 6 18.1
## 4 All 24.0
positions <- c("0","1-5",">= 6", "All")
ggplot(data = MICHDrvmteeth ,
aes(x = positions,
y = Percentage)) +
geom_bar(stat = "identity",
fill = "seagreen4",
width = 0.6)+
geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.5,
color = "black",
size = 4.5) +
scale_x_discrete(limits = positions) +
scale_y_continuous(limits = c(0,25)) +
theme_minimal() +
labs(x = "Number of teeth were removed",
y = "Percentage (%)",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(size = 12),
legend.position = "non")
MICHD_lastvdent <- data.frame(
teeth_group = c("<1","1-<2","2-<5", "5+"),
Percentage = c(7.5, 9.3, 10.5, 15.2))
positions <- c("<1","1-<2","2-<5", "5+")
ggplot(data = MICHD_lastvdent,
aes(x = positions,
y = Percentage)) +
geom_bar(stat = "identity",
fill = "seagreen4",
width = 0.6) +
geom_text(aes(label = paste0(Percentage,"%")),
vjust = -0.5,
color = "black",
size = 4.5) +
scale_x_discrete(limits = positions) +
scale_y_continuous(limits = c(0,20)) +
theme_minimal() +
labs(x = "Last time you visited dental clinic ",
y = "Percentage (%)",
caption = "Data source: BRFSS2018") +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 12, face = "bold"),
plot.caption = element_text(color = "grey44", size = 12, face = "italic"),
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(size = 12),
legend.position = "non")
MICAT (I) I1. X_BMI5CAT ===> BMICAT
ggplot(BRFSS, aes(x= MICHD , group = BMICAT)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "firebrick4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ BMICAT) +
scale_y_continuous(labels = scales::percent) +
themebar_facet
prop.test(table(BRFSS$BMICAT, BRFSS$MICHD), correct = FALSE)
##
## 4-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$BMICAT, BRFSS$MICHD)
## X-squared = 1051.4, df = 3, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3 prop 4
## 0.9099751 0.9359832 0.9104626 0.8920471
COMMUNITY (J)
ggplot(BRFSS, aes(x= MICHD , group = COMMUN)) +
geombar_facet +
geomtext_facet +
scale_fill_manual(values= c("gainsboro", "cyan4")) +
labs(y = "Percent of respondents", fill="MICHD") +
facet_grid(~ COMMUN) +
scale_y_continuous(labels = scales::percent)+
themebar_facet
prop.test(table(BRFSS$COMMUN,BRFSS$MICHD), correct = FALSE)
##
## 3-sample test for equality of proportions without continuity
## correction
##
## data: table(BRFSS$COMMUN, BRFSS$MICHD)
## X-squared = 335.84, df = 2, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
## prop 1 prop 2 prop 3
## 0.8954441 0.8995739 0.9186046