讀取資料、產生交叉表,計算次數

dta <- read.csv("C:/Users/X510/Desktop/2020-04-20-In-class exercise-5/diabetes_mell.csv", header = T)[,c(6,7,8,9)]
dat <- data.frame(xtabs(data = dta, ~ race + gender + diabetes + BMI))
head(dat)
##       race  gender diabetes           BMI Freq
## 1    Black Females       No Normal weight  347
## 2 Hispanic Females       No Normal weight  712
## 3    White Females       No Normal weight  998
## 4    Black   Males       No Normal weight  429
## 5 Hispanic   Males       No Normal weight  706
## 6    White   Males       No Normal weight  873

繪圖

library(ggalluvial)
## Loading required package: ggplot2
p0 <- ggplot(dat, 
       aes(axis1=race,
           axis2=gender, 
           axis3=diabetes),y=Freq) +
  scale_x_discrete(limits=c("race", 
                            "gender", 
                            "diabetes"))+
  ##設定x軸顯示的內容
  
  labs(y='No. individuals')+
   geom_alluvium(aes(fill=BMI))+
  ##依照BMI填充
  
  geom_stratum()+##放上變項框
  geom_text(stat="stratum", 
            infer.label=T)+
  scale_fill_manual(values=c('gray40','orange'))+
  ##設定顏色
  
  theme_minimal()+##設定背景
  ggtitle("Diabetes in overall population in US 2009-2010", subtitle = "straitified by race, gender and diabetes mellitus")+
  theme(legend.position = "bottom")##設定圖例位置

p0