##load & see (same code from lecture note )
library(ggalluvial)
## Loading required package: ggplot2
library(ggplot2)
data <- read.csv("/Users/Tjlee/Desktop/weather/diabetes_mell.csv", header = T)

head(data)
##    SEQN RIAGENDR RIDRETH1 DIQ010 BMXBMI  gender     race diabetes           BMI
## 1 51624        1        3      2  32.22   Males    White       No    Overweight
## 2 51626        1        4      2  22.00   Males    Black       No Normal weight
## 3 51627        1        4      2  18.22   Males    Black       No Normal weight
## 4 51628        2        4      1  42.39 Females    Black      Yes    Overweight
## 5 51629        1        1      2  32.61   Males Hispanic       No    Overweight
## 6 51630        2        3      2  30.57 Females    White       No    Overweight
str(data)
## 'data.frame':    8706 obs. of  9 variables:
##  $ SEQN    : int  51624 51626 51627 51628 51629 51630 51632 51633 51634 51635 ...
##  $ RIAGENDR: int  1 1 1 2 1 2 1 1 1 1 ...
##  $ RIDRETH1: int  3 4 4 4 1 3 2 3 1 3 ...
##  $ DIQ010  : int  2 2 2 1 2 2 2 2 2 1 ...
##  $ BMXBMI  : num  32.2 22 18.2 42.4 32.6 ...
##  $ gender  : Factor w/ 2 levels "Females","Males": 2 2 2 1 2 1 2 2 2 2 ...
##  $ race    : Factor w/ 3 levels "Black","Hispanic",..: 3 1 1 1 2 3 2 3 2 3 ...
##  $ diabetes: Factor w/ 2 levels "No","Yes": 1 1 1 2 1 1 1 1 1 2 ...
##  $ BMI     : Factor w/ 2 levels "Normal weight",..: 2 1 1 2 2 2 1 2 1 2 ...
dta_v3 <- data.frame(with(data[, c("gender", "race", "diabetes", "BMI")],xtabs(~ gender + race + diabetes + BMI)))

head(dta_v3)
##    gender     race diabetes           BMI Freq
## 1 Females    Black       No Normal weight  347
## 2   Males    Black       No Normal weight  429
## 3 Females Hispanic       No Normal weight  712
## 4   Males Hispanic       No Normal weight  706
## 5 Females    White       No Normal weight  998
## 6   Males    White       No Normal weight  873
##draw ggplot
ggplot(dta_v3, 
       aes(axis1=race,
           axis2=gender, 
           axis3=diabetes, 
           y=Freq)) +
  scale_x_discrete(limits=c("race", 
                            "gender", 
                            "diabetes"), 
                   expand=c(.1, .05)) +
  labs(y='No. individuals') +
  geom_alluvium(aes(fill=BMI)) +
  geom_stratum() + 
  geom_text(stat="stratum", 
            infer.label=TRUE) +
  scale_fill_manual(values=c('skyblue','hotpink'))+
  theme_minimal() +
  ggtitle("Diabetes in overall population in US 2009-2010")+
  theme(legend.position = "bottom")

##different color
ggplot(dta_v3, 
       aes(axis1=race,
           axis2=gender, 
           axis3=diabetes, 
           y=Freq)) +
  scale_x_discrete(limits=c("race", 
                            "gender", 
                            "diabetes"), 
                   expand=c(.1, .05)) +
  labs(y='No. individuals') +
  geom_alluvium(aes(fill=BMI)) +
  geom_stratum() + 
  geom_text(stat="stratum", 
            infer.label=TRUE) +
  scale_fill_manual(values=c('orange','gray'))+
  theme_minimal() +
  ggtitle("Diabetes in overall population in US 2009-2010")+
  theme(legend.position = "bottom")

## I found that the order of the data is different from the teacher's diagram. we Must change the order.

# reorder
dta_v3$race <- factor(dta_v3$race, levels = c("Hispanic", "White", "Black"))
dta_v3$gender <- factor(dta_v3$gender, levels = c("Males", "Females"))
dta_v3$diabetes <- factor(dta_v3$diabetes, levels = c("Yes", "No"))


#draw plot
ggplot(dta_v3, 
       aes(axis1=race,
           axis2=gender, 
           axis3=diabetes, 
           y=Freq)) +
  scale_x_discrete(limits=c("race", 
                            "gender", 
                            "diabetes"), 
                   expand=c(.1, .05)) +
  labs(y='No. individuals') +
  geom_alluvium(aes(fill=BMI)) +
  geom_stratum() + 
  geom_text(stat="stratum", 
            infer.label=TRUE) +
  scale_fill_manual(values=c('orange','gray'))+
  theme_minimal() +
  ggtitle("Diabetes in overall population in US 2009-2010")+
  theme(legend.position = "bottom")

##data resouse(change order)
# http://yhhuang1966.blogspot.com/2018/01/r.html