讀取資料、產生交叉表,計算次數
dta <- read.csv("C:/Users/X510/Desktop/2020-04-20-In-class exercise-5/diabetes_mell.csv", header = T)[,c(6,7,8,9)]
dat <- data.frame(xtabs(data = dta, ~ race + gender + diabetes + BMI))
head(dat)## race gender diabetes BMI Freq
## 1 Black Females No Normal weight 347
## 2 Hispanic Females No Normal weight 712
## 3 White Females No Normal weight 998
## 4 Black Males No Normal weight 429
## 5 Hispanic Males No Normal weight 706
## 6 White Males No Normal weight 873
繪圖
## Loading required package: ggplot2
p0 <- ggplot(dat,
aes(axis1=race,
axis2=gender,
axis3=diabetes),y=Freq) +
scale_x_discrete(limits=c("race",
"gender",
"diabetes"))+
##設定x軸顯示的內容
labs(y='No. individuals')+
geom_alluvium(aes(fill=BMI))+
##依照BMI填充
geom_stratum()+##放上變項框
geom_text(stat="stratum",
infer.label=T)+
scale_fill_manual(values=c('gray40','orange'))+
##設定顏色
theme_minimal()+##設定背景
ggtitle("Diabetes in overall population in US 2009-2010", subtitle = "straitified by race, gender and diabetes mellitus")+
theme(legend.position = "bottom")##設定圖例位置
p0