suppressPackageStartupMessages( require( 'oetteR' ) )
suppressPackageStartupMessages( require( 'tidyverse' ) )
suppressPackageStartupMessages( require( 'ggalluvial' ) )

Grouped Data

Sum of all frequencies is constant

data = UCBAdmissions %>%
  as.tibble()

data
## # A tibble: 24 x 4
##       Admit Gender  Dept     n
##       <chr>  <chr> <chr> <dbl>
##  1 Admitted   Male     A   512
##  2 Rejected   Male     A   313
##  3 Admitted Female     A    89
##  4 Rejected Female     A    19
##  5 Admitted   Male     B   353
##  6 Rejected   Male     B   207
##  7 Admitted Female     B    17
##  8 Rejected Female     B     8
##  9 Admitted   Male     C   120
## 10 Rejected   Male     C   205
## # ... with 14 more rows
ggplot(data,
       aes( weight = n
            , axis1 = Gender
            , axis2 = Dept )
       ) +
  geom_alluvium( aes(fill = Admit)
                 , width = 1/12
                 ) +
  geom_stratum( width = 1/12
                , fill = "black"
                , color = "grey"
                ) +
  geom_label(stat = "stratum", label.strata = TRUE) +
  scale_x_continuous(breaks = 1:2
                     , labels = c("Gender", "Dept")) +
  scale_fill_manual( values = f_plot_col_vector74() ) +
  ggtitle("UC Berkeley admissions and rejections, by sex and department")

Sum of all Frequencies varies

data(Refugees, package = "alluvial")
country_regions = c(
                    Afghanistan = "Middle East",
                    Burundi = "Central Africa",
                    `Congo DRC` = "Central Africa",
                    Iraq = "Middle East",
                    Myanmar = "Southeast Asia",
                    Palestine = "Middle East",
                    Somalia = "Horn of Africa",
                    Sudan = "Central Africa",
                    Syria = "Middle East",
                    Vietnam = "Southeast Asia"
                   )

Refugees$region = country_regions[Refugees$country]

data = Refugees %>%
  as.tibble()

data
## # A tibble: 110 x 4
##        country  year refugees         region
##         <fctr> <int>    <int>          <chr>
##  1 Afghanistan  2003  2136043    Middle East
##  2     Burundi  2003   531637 Central Africa
##  3   Congo DRC  2003   453465 Central Africa
##  4        Iraq  2003   368580    Middle East
##  5     Myanmar  2003   151384 Southeast Asia
##  6   Palestine  2003   350568    Middle East
##  7     Somalia  2003   402336 Horn of Africa
##  8       Sudan  2003   606242 Central Africa
##  9       Syria  2003    20819    Middle East
## 10     Vietnam  2003   363179 Southeast Asia
## # ... with 100 more rows
data %>%
  filter( region == 'Middle East') %>%
  ggplot(
       aes(x = year, weight = refugees, alluvium = country)) +
  geom_alluvium(aes(fill = country
                    , colour = country)
                , alpha = .75
                , decreasing = FALSE) +
  scale_x_continuous(breaks = seq(2003, 2013, 2)) +
  theme(axis.text.x = element_text(angle = -30, hjust = 0))

Tidy Data (Ungrouped data)

data(majors)

data = majors %>%
  as_tibble() %>%
  mutate( curriculum = as.factor(curriculum) )

data
## # A tibble: 80 x 3
##    student semester  curriculum
##      <int>   <fctr>      <fctr>
##  1       1    CURR1    Painting
##  2       2    CURR1    Painting
##  3       6    CURR1    Sculpure
##  4       8    CURR1    Painting
##  5       9    CURR1    Sculpure
##  6      10    CURR1    Painting
##  7      11    CURR1 Digital Art
##  8      12    CURR1    Sculpure
##  9      14    CURR1     Ceramic
## 10      15    CURR1 Photography
## # ... with 70 more rows
ggplot(data,
       aes(x = semester, stratum = curriculum, alluvium = student,
           fill = curriculum, label = curriculum)) +
  geom_flow(stat = "alluvium", lode.guidance = "rightleft",
            color = "darkgray") +
  geom_stratum() +
  theme(legend.position = "bottom") +
  ggtitle("student curricula across several semesters")

Same plot from grouped data

data_gr = data  %>%
  spread( key = semester, value = curriculum ) %>%
  select( - student ) %>%
  group_by_all() %>%
  count() %>%
  ungroup() %>%
  mutate( alluvium = row_number() ) %>%
  rename( weight = n ) %>%
  gather( key = 'x', value = 'stratum', - weight, -alluvium ) %>%
  mutate( x = forcats::as_factor(x) )
ggplot(data_gr,
       aes(x = x
           , stratum = stratum
           , alluvium = alluvium
           , weight = weight
           , fill = stratum
           , label = stratum)) +
  geom_flow(stat = "alluvium", lode.guidance = "rightleft",
            color = "darkgray") +
  geom_stratum() +
  theme(legend.position = "bottom") +
  ggtitle("student curricula across several semesters")