suppressPackageStartupMessages( require( 'oetteR' ) )
suppressPackageStartupMessages( require( 'tidyverse' ) )
suppressPackageStartupMessages( require( 'ggalluvial' ) )
Grouped Data
Sum of all frequencies is constant
data = UCBAdmissions %>%
as.tibble()
data
## # A tibble: 24 x 4
## Admit Gender Dept n
## <chr> <chr> <chr> <dbl>
## 1 Admitted Male A 512
## 2 Rejected Male A 313
## 3 Admitted Female A 89
## 4 Rejected Female A 19
## 5 Admitted Male B 353
## 6 Rejected Male B 207
## 7 Admitted Female B 17
## 8 Rejected Female B 8
## 9 Admitted Male C 120
## 10 Rejected Male C 205
## # ... with 14 more rows
ggplot(data,
aes( weight = n
, axis1 = Gender
, axis2 = Dept )
) +
geom_alluvium( aes(fill = Admit)
, width = 1/12
) +
geom_stratum( width = 1/12
, fill = "black"
, color = "grey"
) +
geom_label(stat = "stratum", label.strata = TRUE) +
scale_x_continuous(breaks = 1:2
, labels = c("Gender", "Dept")) +
scale_fill_manual( values = f_plot_col_vector74() ) +
ggtitle("UC Berkeley admissions and rejections, by sex and department")

Sum of all Frequencies varies
data(Refugees, package = "alluvial")
country_regions = c(
Afghanistan = "Middle East",
Burundi = "Central Africa",
`Congo DRC` = "Central Africa",
Iraq = "Middle East",
Myanmar = "Southeast Asia",
Palestine = "Middle East",
Somalia = "Horn of Africa",
Sudan = "Central Africa",
Syria = "Middle East",
Vietnam = "Southeast Asia"
)
Refugees$region = country_regions[Refugees$country]
data = Refugees %>%
as.tibble()
data
## # A tibble: 110 x 4
## country year refugees region
## <fctr> <int> <int> <chr>
## 1 Afghanistan 2003 2136043 Middle East
## 2 Burundi 2003 531637 Central Africa
## 3 Congo DRC 2003 453465 Central Africa
## 4 Iraq 2003 368580 Middle East
## 5 Myanmar 2003 151384 Southeast Asia
## 6 Palestine 2003 350568 Middle East
## 7 Somalia 2003 402336 Horn of Africa
## 8 Sudan 2003 606242 Central Africa
## 9 Syria 2003 20819 Middle East
## 10 Vietnam 2003 363179 Southeast Asia
## # ... with 100 more rows
data %>%
filter( region == 'Middle East') %>%
ggplot(
aes(x = year, weight = refugees, alluvium = country)) +
geom_alluvium(aes(fill = country
, colour = country)
, alpha = .75
, decreasing = FALSE) +
scale_x_continuous(breaks = seq(2003, 2013, 2)) +
theme(axis.text.x = element_text(angle = -30, hjust = 0))

Tidy Data (Ungrouped data)
data(majors)
data = majors %>%
as_tibble() %>%
mutate( curriculum = as.factor(curriculum) )
data
## # A tibble: 80 x 3
## student semester curriculum
## <int> <fctr> <fctr>
## 1 1 CURR1 Painting
## 2 2 CURR1 Painting
## 3 6 CURR1 Sculpure
## 4 8 CURR1 Painting
## 5 9 CURR1 Sculpure
## 6 10 CURR1 Painting
## 7 11 CURR1 Digital Art
## 8 12 CURR1 Sculpure
## 9 14 CURR1 Ceramic
## 10 15 CURR1 Photography
## # ... with 70 more rows
ggplot(data,
aes(x = semester, stratum = curriculum, alluvium = student,
fill = curriculum, label = curriculum)) +
geom_flow(stat = "alluvium", lode.guidance = "rightleft",
color = "darkgray") +
geom_stratum() +
theme(legend.position = "bottom") +
ggtitle("student curricula across several semesters")

Same plot from grouped data
data_gr = data %>%
spread( key = semester, value = curriculum ) %>%
select( - student ) %>%
group_by_all() %>%
count() %>%
ungroup() %>%
mutate( alluvium = row_number() ) %>%
rename( weight = n ) %>%
gather( key = 'x', value = 'stratum', - weight, -alluvium ) %>%
mutate( x = forcats::as_factor(x) )
ggplot(data_gr,
aes(x = x
, stratum = stratum
, alluvium = alluvium
, weight = weight
, fill = stratum
, label = stratum)) +
geom_flow(stat = "alluvium", lode.guidance = "rightleft",
color = "darkgray") +
geom_stratum() +
theme(legend.position = "bottom") +
ggtitle("student curricula across several semesters")
