## Loading required package: ggplot2
## SEQN RIAGENDR RIDRETH1 DIQ010 BMXBMI gender race diabetes BMI
## 1 51624 1 3 2 32.22 Males White No Overweight
## 2 51626 1 4 2 22.00 Males Black No Normal weight
## 3 51627 1 4 2 18.22 Males Black No Normal weight
## 4 51628 2 4 1 42.39 Females Black Yes Overweight
## 5 51629 1 1 2 32.61 Males Hispanic No Overweight
## 6 51630 2 3 2 30.57 Females White No Overweight
## 'data.frame': 8706 obs. of 9 variables:
## $ SEQN : int 51624 51626 51627 51628 51629 51630 51632 51633 51634 51635 ...
## $ RIAGENDR: int 1 1 1 2 1 2 1 1 1 1 ...
## $ RIDRETH1: int 3 4 4 4 1 3 2 3 1 3 ...
## $ DIQ010 : int 2 2 2 1 2 2 2 2 2 1 ...
## $ BMXBMI : num 32.2 22 18.2 42.4 32.6 ...
## $ gender : Factor w/ 2 levels "Females","Males": 2 2 2 1 2 1 2 2 2 2 ...
## $ race : Factor w/ 3 levels "Black","Hispanic",..: 3 1 1 1 2 3 2 3 2 3 ...
## $ diabetes: Factor w/ 2 levels "No","Yes": 1 1 1 2 1 1 1 1 1 2 ...
## $ BMI : Factor w/ 2 levels "Normal weight",..: 2 1 1 2 2 2 1 2 1 2 ...
## gender race diabetes BMI Freq
## 1 Females Black No Normal weight 347
## 2 Males Black No Normal weight 429
## 3 Females Hispanic No Normal weight 712
## 4 Males Hispanic No Normal weight 706
## 5 Females White No Normal weight 998
## 6 Males White No Normal weight 873
##draw ggplot
ggplot(dta_v3,
aes(axis1=race,
axis2=gender,
axis3=diabetes,
y=Freq)) +
scale_x_discrete(limits=c("race",
"gender",
"diabetes"),
expand=c(.1, .05)) +
labs(y='No. individuals') +
geom_alluvium(aes(fill=BMI)) +
geom_stratum() +
geom_text(stat="stratum",
infer.label=TRUE) +
scale_fill_manual(values=c('skyblue','hotpink'))+
theme_minimal() +
ggtitle("Diabetes in overall population in US 2009-2010")+
theme(legend.position = "bottom")

##different color
ggplot(dta_v3,
aes(axis1=race,
axis2=gender,
axis3=diabetes,
y=Freq)) +
scale_x_discrete(limits=c("race",
"gender",
"diabetes"),
expand=c(.1, .05)) +
labs(y='No. individuals') +
geom_alluvium(aes(fill=BMI)) +
geom_stratum() +
geom_text(stat="stratum",
infer.label=TRUE) +
scale_fill_manual(values=c('orange','gray'))+
theme_minimal() +
ggtitle("Diabetes in overall population in US 2009-2010")+
theme(legend.position = "bottom")

## I found that the order of the data is different from the teacher's diagram. we Must change the order.
# reorder
dta_v3$race <- factor(dta_v3$race, levels = c("Hispanic", "White", "Black"))
dta_v3$gender <- factor(dta_v3$gender, levels = c("Males", "Females"))
dta_v3$diabetes <- factor(dta_v3$diabetes, levels = c("Yes", "No"))
#draw plot
ggplot(dta_v3,
aes(axis1=race,
axis2=gender,
axis3=diabetes,
y=Freq)) +
scale_x_discrete(limits=c("race",
"gender",
"diabetes"),
expand=c(.1, .05)) +
labs(y='No. individuals') +
geom_alluvium(aes(fill=BMI)) +
geom_stratum() +
geom_text(stat="stratum",
infer.label=TRUE) +
scale_fill_manual(values=c('orange','gray'))+
theme_minimal() +
ggtitle("Diabetes in overall population in US 2009-2010")+
theme(legend.position = "bottom")
