Treemaps

Create a treemap which explores categories of views

library(treemap)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(RColorBrewer)
data <- read.csv("http://datasets.flowingdata.com/post-data.txt")
head(data)
##     id  views comments               category
## 1 5019 148896       28 Artistic Visualization
## 2 1416  81374       26          Visualization
## 3 1416  81374       26               Featured
## 4 3485  80819       37               Featured
## 5 3485  80819       37                Mapping
## 6 3485  80819       37           Data Sources

Heatmaps

This data appears to contain data about 2008 NBA player stats.

# How to make a heatmap
nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
nba
##                  Name  G  MIN  PTS  FGM  FGA   FGP FTM  FTA   FTP X3PM X3PA
## 1        Dwyane Wade  79 38.6 30.2 10.8 22.0 0.491 7.5  9.8 0.765  1.1  3.5
## 2       LeBron James  81 37.7 28.4  9.7 19.9 0.489 7.3  9.4 0.780  1.6  4.7
## 3        Kobe Bryant  82 36.2 26.8  9.8 20.9 0.467 5.9  6.9 0.856  1.4  4.1
## 4      Dirk Nowitzki  81 37.7 25.9  9.6 20.0 0.479 6.0  6.7 0.890  0.8  2.1
## 5      Danny Granger  67 36.2 25.8  8.5 19.1 0.447 6.0  6.9 0.878  2.7  6.7
## 6       Kevin Durant  74 39.0 25.3  8.9 18.8 0.476 6.1  7.1 0.863  1.3  3.1
## 7       Kevin Martin  51 38.2 24.6  6.7 15.9 0.420 9.0 10.3 0.867  2.3  5.4
## 8       Al Jefferson  50 36.6 23.1  9.7 19.5 0.497 3.7  5.0 0.738  0.0  0.1
## 9         Chris Paul  78 38.5 22.8  8.1 16.1 0.503 5.8  6.7 0.868  0.8  2.3
## 10   Carmelo Anthony  66 34.5 22.8  8.1 18.3 0.443 5.6  7.1 0.793  1.0  2.6
## 11        Chris Bosh  77 38.1 22.7  8.0 16.4 0.487 6.5  8.0 0.817  0.2  0.6
## 12       Brandon Roy  78 37.2 22.6  8.1 16.9 0.480 5.3  6.5 0.824  1.1  2.8
## 13    Antawn Jamison  81 38.2 22.2  8.3 17.8 0.468 4.2  5.6 0.754  1.4  3.9
## 14       Tony Parker  72 34.1 22.0  8.9 17.5 0.506 3.9  5.0 0.782  0.3  0.9
## 15  Amare Stoudemire  53 36.8 21.4  7.6 14.1 0.539 6.1  7.3 0.835  0.1  0.1
## 16       Joe Johnson  79 39.5 21.4  7.8 18.0 0.437 3.8  4.6 0.826  1.9  5.2
## 17      Devin Harris  69 36.1 21.3  6.6 15.1 0.438 7.2  8.8 0.820  0.9  3.2
## 18      Michael Redd  33 36.4 21.2  7.5 16.6 0.455 4.0  4.9 0.814  2.1  5.8
## 19        David West  76 39.3 21.0  8.0 17.0 0.472 4.8  5.5 0.884  0.1  0.3
## 20  Zachary Randolph  50 35.1 20.8  8.3 17.5 0.475 3.6  4.9 0.734  0.6  1.9
## 21      Caron Butler  67 38.6 20.8  7.3 16.2 0.453 5.1  6.0 0.858  1.0  3.1
## 22      Vince Carter  80 36.8 20.8  7.4 16.8 0.437 4.2  5.1 0.817  1.9  4.9
## 23   Stephen Jackson  59 39.7 20.7  7.0 16.9 0.414 5.0  6.0 0.826  1.7  5.2
## 24        Ben Gordon  82 36.6 20.7  7.3 16.0 0.455 4.0  4.7 0.864  2.1  5.1
## 25     Dwight Howard  79 35.7 20.6  7.1 12.4 0.572 6.4 10.7 0.594  0.0  0.0
## 26       Paul Pierce  81 37.4 20.5  6.7 14.6 0.457 5.7  6.8 0.830  1.5  3.8
## 27     Al Harrington  73 34.9 20.1  7.3 16.6 0.439 3.2  4.0 0.793  2.3  6.4
## 28    Jamal Crawford  65 38.1 19.7  6.4 15.7 0.410 4.6  5.3 0.872  2.2  6.1
## 29          Yao Ming  77 33.6 19.7  7.4 13.4 0.548 4.9  5.7 0.866  0.0  0.0
## 30 Richard Jefferson  82 35.9 19.6  6.5 14.9 0.439 5.1  6.3 0.805  1.4  3.6
## 31       Jason Terry  74 33.6 19.6  7.3 15.8 0.463 2.7  3.0 0.880  2.3  6.2
## 32    Deron Williams  68 36.9 19.4  6.8 14.5 0.471 4.8  5.6 0.849  1.0  3.3
## 33        Tim Duncan  75 33.7 19.3  7.4 14.8 0.504 4.5  6.4 0.692  0.0  0.0
## 34       Monta Ellis  25 35.6 19.0  7.8 17.2 0.451 3.1  3.8 0.830  0.3  1.0
## 35          Rudy Gay  79 37.3 18.9  7.2 16.0 0.453 3.3  4.4 0.767  1.1  3.1
## 36         Pau Gasol  81 37.1 18.9  7.3 12.9 0.567 4.2  5.4 0.781  0.0  0.0
## 37    Andre Iguodala  82 39.8 18.8  6.6 14.0 0.473 4.6  6.4 0.724  1.0  3.2
## 38    Corey Maggette  51 31.1 18.6  5.7 12.4 0.461 6.7  8.1 0.824  0.5  1.9
## 39         O.J. Mayo  82 38.0 18.5  6.9 15.6 0.438 3.0  3.4 0.879  1.8  4.6
## 40      John Salmons  79 37.5 18.3  6.5 13.8 0.472 3.6  4.4 0.830  1.6  3.8
## 41  Richard Hamilton  67 34.0 18.3  7.0 15.6 0.447 3.3  3.9 0.848  1.0  2.8
## 42         Ray Allen  79 36.3 18.2  6.3 13.2 0.480 3.0  3.2 0.952  2.5  6.2
## 43 LaMarcus Aldridge  81 37.1 18.1  7.4 15.3 0.484 3.2  4.1 0.781  0.1  0.3
## 44       Josh Howard  52 31.9 18.0  6.8 15.1 0.451 3.3  4.2 0.782  1.1  3.2
## 45  Maurice Williams  81 35.0 17.8  6.5 13.9 0.467 2.6  2.8 0.912  2.3  5.2
## 46  Shaquille O'neal  75 30.1 17.8  6.8 11.2 0.609 4.1  6.9 0.595  0.0  0.0
## 47     Rashard Lewis  79 36.2 17.7  6.1 13.8 0.439 2.8  3.4 0.836  2.8  7.0
## 48  Chauncey Billups  79 35.3 17.7  5.2 12.4 0.418 5.3  5.8 0.913  2.1  5.0
## 49     Allen Iverson  57 36.7 17.5  6.1 14.6 0.417 4.8  6.1 0.781  0.5  1.7
## 50     Nate Robinson  74 29.9 17.2  6.1 13.9 0.437 3.4  4.0 0.841  1.7  5.2
##     X3PP ORB DRB  TRB  AST STL BLK  TO  PF
## 1  0.317 1.1 3.9  5.0  7.5 2.2 1.3 3.4 2.3
## 2  0.344 1.3 6.3  7.6  7.2 1.7 1.1 3.0 1.7
## 3  0.351 1.1 4.1  5.2  4.9 1.5 0.5 2.6 2.3
## 4  0.359 1.1 7.3  8.4  2.4 0.8 0.8 1.9 2.2
## 5  0.404 0.7 4.4  5.1  2.7 1.0 1.4 2.5 3.1
## 6  0.422 1.0 5.5  6.5  2.8 1.3 0.7 3.0 1.8
## 7  0.415 0.6 3.0  3.6  2.7 1.2 0.2 2.9 2.3
## 8  0.000 3.4 7.5 11.0  1.6 0.8 1.7 1.8 2.8
## 9  0.364 0.9 4.7  5.5 11.0 2.8 0.1 3.0 2.7
## 10 0.371 1.6 5.2  6.8  3.4 1.1 0.4 3.0 3.0
## 11 0.245 2.8 7.2 10.0  2.5 0.9 1.0 2.3 2.5
## 12 0.377 1.3 3.4  4.7  5.1 1.1 0.3 1.9 1.6
## 13 0.351 2.4 6.5  8.9  1.9 1.2 0.3 1.5 2.7
## 14 0.292 0.4 2.7  3.1  6.9 0.9 0.1 2.6 1.5
## 15 0.429 2.2 5.9  8.1  2.0 0.9 1.1 2.8 3.1
## 16 0.360 0.8 3.6  4.4  5.8 1.1 0.2 2.5 2.2
## 17 0.291 0.4 2.9  3.3  6.9 1.7 0.2 3.1 2.4
## 18 0.366 0.7 2.5  3.2  2.7 1.1 0.1 1.6 1.4
## 19 0.240 2.1 6.4  8.5  2.3 0.6 0.9 2.1 2.7
## 20 0.330 3.1 6.9 10.1  2.1 0.9 0.3 2.3 2.7
## 21 0.310 1.8 4.4  6.2  4.3 1.6 0.3 3.1 2.5
## 22 0.385 0.9 4.2  5.1  4.7 1.0 0.5 2.1 2.9
## 23 0.338 1.2 3.9  5.1  6.5 1.5 0.5 3.9 2.6
## 24 0.410 0.6 2.8  3.5  3.4 0.9 0.3 2.4 2.2
## 25 0.000 4.3 9.6 13.8  1.4 1.0 2.9 3.0 3.4
## 26 0.391 0.7 5.0  5.6  3.6 1.0 0.3 2.8 2.7
## 27 0.364 1.4 4.9  6.2  1.4 1.2 0.3 2.2 3.1
## 28 0.360 0.4 2.6  3.0  4.4 0.9 0.2 2.3 1.4
## 29 1.000 2.6 7.2  9.9  1.8 0.4 1.9 3.0 3.3
## 30 0.397 0.7 3.9  4.6  2.4 0.8 0.2 2.0 3.1
## 31 0.366 0.5 1.9  2.4  3.4 1.3 0.3 1.6 1.9
## 32 0.310 0.4 2.5  2.9 10.7 1.1 0.3 3.4 2.0
## 33 0.000 2.7 8.0 10.7  3.5 0.5 1.7 2.2 2.3
## 34 0.308 0.6 3.8  4.3  3.7 1.6 0.3 2.7 2.7
## 35 0.351 1.4 4.2  5.5  1.7 1.2 0.7 2.6 2.8
## 36 0.500 3.2 6.4  9.6  3.5 0.6 1.0 1.9 2.1
## 37 0.307 1.1 4.6  5.7  5.3 1.6 0.4 2.7 1.9
## 38 0.253 1.0 4.6  5.5  1.8 0.9 0.2 2.4 3.8
## 39 0.384 0.7 3.1  3.8  3.2 1.1 0.2 2.8 2.5
## 40 0.417 0.7 3.5  4.2  3.2 1.1 0.3 2.1 2.3
## 41 0.368 0.7 2.4  3.1  4.4 0.6 0.1 2.0 2.6
## 42 0.409 0.8 2.7  3.5  2.8 0.9 0.2 1.7 2.0
## 43 0.250 2.9 4.6  7.5  1.9 1.0 1.0 1.5 2.6
## 44 0.345 1.1 3.9  5.1  1.6 1.1 0.6 1.7 2.6
## 45 0.436 0.6 2.9  3.4  4.1 0.9 0.1 2.2 2.7
## 46 0.000 2.5 5.9  8.4  1.7 0.7 1.4 2.2 3.4
## 47 0.397 1.2 4.6  5.7  2.6 1.0 0.6 2.0 2.5
## 48 0.408 0.4 2.6  3.0  6.4 1.2 0.2 2.2 2.0
## 49 0.283 0.5 2.5  3.0  5.0 1.5 0.1 2.6 1.5
## 50 0.325 1.3 2.6  3.9  4.1 1.3 0.1 1.9 2.8

Create a cool-color heatmap of the nba 2008 data

nba <- nba[order(nba$PTS),]
row.names(nba) <- nba$Name
nba <- nba[,2:19]
nba_matrix <- data.matrix(nba)
nba_heatmap <- heatmap(nba_matrix, Rowv=NA, Colv=NA, 
                       col = cm.colors(256), scale="column", margins=c(5,10),
                       xlab = "NBA Player Stats",
                       ylab = "NBA Players",
                       main = "NBA Player Stats in 2008")

Change the heatmap colors to heat colors

nba_heatmap <- heatmap(nba_matrix, Rowv=NA, Colv=NA, col = heat.colors(256), 
                       scale="column", margins=c(5,10),
                        xlab = "NBA Player Stats",
                       ylab = "NBA Players",
                       main = "NBA Player Stats in 2008")

Use the viridis colors

Try using direction = -1 and then try removing it; you will see that it reverses the heatmap levels

library(viridis)
## Loading required package: viridisLite
nba_heatmap <- heatmap(nba_matrix, Rowv=NA, Colv = NA, col = viridis(25, direction = -1), 
                       scale="column", margins=c(5,10),
                        xlab = "NBA Player Stats",
                       ylab = "NBA Players",
                       main = "NBA Player Stats in 2008")

Code for creating a treemap

treemap(data, index="category", vSize="views", 
                vColor="comments", type="value", 
                palette="RdYlBu")

  • Notice how the treemap includes a legend for number of comments *

Use RColorBrewer to change the palette to RdYlBu

treemap(data, index="category", vSize="views", 
        vColor="comments", type="manual", 
        palette="RdYlBu")

Use the dataset NYCFlights13 to create a heatmap that explores Late Arrivals

library(nycflights13)
library(RColorBrewer)
flights <- flights
view(flights)

Late Arrivals Affect the Usage Cost of Airports

This was modified from Raul Miranda’s work

flights_nona <- flights %>%
  filter(!is.na(distance) & !is.na(arr_delay))   
delays <- flights_nona %>%             
  group_by (dest) %>%                 
  summarize (count = n(),             
             dist = mean (distance),   
             delay = mean (arr_delay), 
             delaycost = mean(count*delay/dist)) 
                                       
delays <- arrange(delays, desc(delaycost))    
head(delays)                            
## # A tibble: 6 x 5
##   dest  count  dist delay delaycost
##   <chr> <int> <dbl> <dbl>     <dbl>
## 1 DCA    9111  211.  9.07      391.
## 2 IAD    5383  225. 13.9       332.
## 3 ATL   16837  757. 11.3       251.
## 4 BOS   15022  191.  2.91      230.
## 5 CLT   13674  538.  7.36      187.
## 6 RDU    7770  427. 10.1       183.

This gives Reagan National (DCA) with the highest delay cost. Now get the top 100 and create the heatmap.

top100 <- delays %>%                
  head(100) %>%
  arrange(delaycost)                  
row.names(top100) <- top100$dest    
## Warning: Setting row names on a tibble is deprecated.

Create a matrix from the dataframe

row.names(top100) <- top100$dest       
## Warning: Setting row names on a tibble is deprecated.
delays_mat <- data.matrix(top100)     
delays_mat2 <- delays_mat[,2:5]       

Call heatmap using a ColorBrewer color set, margins=c(7,10) for aspect ratio, titles of graph, x and y labels,font size of x and y labels, and set up a RowSideColors bar

varcols = setNames(colorRampPalette(brewer.pal(nrow(delays_mat2), "YlGnBu"))(nrow(delays_mat2)), 
                   rownames(delays_mat2))   
## Warning in brewer.pal(nrow(delays_mat2), "YlGnBu"): n too large, allowed maximum for palette YlGnBu is 9
## Returning the palette you asked for with that many colors
heatmap(delays_mat2, 
        Rowv = NA, Colv = NA, 
        col= colorRampPalette(brewer.pal(nrow(delays_mat2), "YlGnBu"))(nrow(delays_mat2)),
        s=0.6, v=1, scale="column", 
        margins=c(7,10), 
        main = "Cost of Late Arrivals", 
        xlab = "Flight Characteristics", 
        ylab="Arrival Airport", labCol = c("Flights","Distance","Delay","Cost Index"),
        cexCol=1, cexRow =1, RowSideColors = varcols)
## layout: widths =  0.05 0.2 4 , heights =  0.25 4 ; lmat=
##      [,1] [,2] [,3]
## [1,]    0    0    4
## [2,]    3    1    2
## Warning in brewer.pal(nrow(delays_mat2), "YlGnBu"): n too large, allowed maximum for palette YlGnBu is 9
## Returning the palette you asked for with that many colors

What did this heatmap show?

“Cost index” is defined as a measure of how arrival delays impact the cost of flying into each airport and is calculated as number of flights * mean delay / mean flight distance. For airlines it is a measure of how much the cost to fly to an airport increases due to frequent delays of arrival. Cost index is inversely proportional to distance because delays affect short flights more than long flights and because the profit per seat increases with distance due to the larger and more efficient planes used for longer distances.

The variance in delays across airports is mainly due to (a) airline traffic congestion relative to the airport size; and (b)regional climate and weather events. It is not strongly dependent upon airline carrier or tailnumber.

Therefore, airports such as ORD and BOS have high cost index because they are highly congested and are frequently delayed due to weather. Airports like IAD, PHL, DTW, etc., are very congested despite their large size and also show high cost index. Smaller airports such as HDN, SNA, HNL, LEX, etc., have null to slightly negative cost index because they are not congested and keep flights on time.

Streamgraph code

Load devtools and libraries to create the following streamgraphs

devtools::install_github("hrbrmstr/streamgraph")
## Skipping install of 'streamgraph' from a github remote, the SHA1 (76f7173e) has not changed since last install.
##   Use `force = TRUE` to force installation
devtools::session_info()
## - Session info ---------------------------------------------------------------
##  setting  value                       
##  version  R version 4.1.1 (2021-08-10)
##  os       Windows 10 x64              
##  system   x86_64, mingw32             
##  ui       RTerm                       
##  language (EN)                        
##  collate  English_United States.1252  
##  ctype    English_United States.1252  
##  tz       America/New_York            
##  date     2021-10-05                  
## 
## - Packages -------------------------------------------------------------------
##  package      * version date       lib source        
##  assertthat     0.2.1   2019-03-21 [1] CRAN (R 4.1.1)
##  backports      1.2.1   2020-12-09 [1] CRAN (R 4.1.0)
##  broom          0.7.9   2021-07-27 [1] CRAN (R 4.1.1)
##  bslib          0.3.0   2021-09-02 [1] CRAN (R 4.1.1)
##  cachem         1.0.6   2021-08-19 [1] CRAN (R 4.1.1)
##  callr          3.7.0   2021-04-20 [1] CRAN (R 4.1.1)
##  cellranger     1.1.0   2016-07-27 [1] CRAN (R 4.1.1)
##  cli            3.0.1   2021-07-17 [1] CRAN (R 4.1.1)
##  colorspace     2.0-2   2021-06-24 [1] CRAN (R 4.1.1)
##  crayon         1.4.1   2021-02-08 [1] CRAN (R 4.1.1)
##  curl           4.3.2   2021-06-23 [1] CRAN (R 4.1.1)
##  data.table     1.14.0  2021-02-21 [1] CRAN (R 4.1.1)
##  DBI            1.1.1   2021-01-15 [1] CRAN (R 4.1.1)
##  dbplyr         2.1.1   2021-04-06 [1] CRAN (R 4.1.1)
##  desc           1.4.0   2021-09-28 [1] CRAN (R 4.1.1)
##  devtools       2.4.2   2021-06-07 [1] CRAN (R 4.1.1)
##  digest         0.6.27  2020-10-24 [1] CRAN (R 4.1.1)
##  dplyr        * 1.0.7   2021-06-18 [1] CRAN (R 4.1.1)
##  ellipsis       0.3.2   2021-04-29 [1] CRAN (R 4.1.1)
##  evaluate       0.14    2019-05-28 [1] CRAN (R 4.1.1)
##  fansi          0.5.0   2021-05-25 [1] CRAN (R 4.1.1)
##  fastmap        1.1.0   2021-01-25 [1] CRAN (R 4.1.1)
##  forcats      * 0.5.1   2021-01-27 [1] CRAN (R 4.1.1)
##  fs             1.5.0   2020-07-31 [1] CRAN (R 4.1.1)
##  generics       0.1.0   2020-10-31 [1] CRAN (R 4.1.1)
##  ggplot2      * 3.3.5   2021-06-25 [1] CRAN (R 4.1.1)
##  glue           1.4.2   2020-08-27 [1] CRAN (R 4.1.1)
##  gridBase       0.4-7   2014-02-24 [1] CRAN (R 4.1.1)
##  gridExtra      2.3     2017-09-09 [1] CRAN (R 4.1.1)
##  gtable         0.3.0   2019-03-25 [1] CRAN (R 4.1.1)
##  haven          2.4.3   2021-08-04 [1] CRAN (R 4.1.1)
##  highr          0.9     2021-04-16 [1] CRAN (R 4.1.1)
##  hms            1.1.0   2021-05-17 [1] CRAN (R 4.1.1)
##  htmltools      0.5.2   2021-08-25 [1] CRAN (R 4.1.1)
##  httpuv         1.6.3   2021-09-09 [1] CRAN (R 4.1.1)
##  httr           1.4.2   2020-07-20 [1] CRAN (R 4.1.1)
##  igraph         1.2.6   2020-10-06 [1] CRAN (R 4.1.1)
##  jquerylib      0.1.4   2021-04-26 [1] CRAN (R 4.1.1)
##  jsonlite       1.7.2   2020-12-09 [1] CRAN (R 4.1.1)
##  knitr          1.34    2021-09-09 [1] CRAN (R 4.1.1)
##  later          1.3.0   2021-08-18 [1] CRAN (R 4.1.1)
##  lifecycle      1.0.1   2021-09-24 [1] CRAN (R 4.1.1)
##  lubridate      1.7.10  2021-02-26 [1] CRAN (R 4.1.1)
##  magrittr       2.0.1   2020-11-17 [1] CRAN (R 4.1.1)
##  memoise        2.0.0   2021-01-26 [1] CRAN (R 4.1.1)
##  mime           0.11    2021-06-23 [1] CRAN (R 4.1.0)
##  modelr         0.1.8   2020-05-19 [1] CRAN (R 4.1.1)
##  munsell        0.5.0   2018-06-12 [1] CRAN (R 4.1.1)
##  nycflights13 * 1.0.2   2021-04-12 [1] CRAN (R 4.1.1)
##  pillar         1.6.3   2021-09-26 [1] CRAN (R 4.1.1)
##  pkgbuild       1.2.0   2020-12-15 [1] CRAN (R 4.1.1)
##  pkgconfig      2.0.3   2019-09-22 [1] CRAN (R 4.1.1)
##  pkgload        1.2.2   2021-09-11 [1] CRAN (R 4.1.1)
##  prettyunits    1.1.1   2020-01-24 [1] CRAN (R 4.1.1)
##  processx       3.5.2   2021-04-30 [1] CRAN (R 4.1.1)
##  promises       1.2.0.1 2021-02-11 [1] CRAN (R 4.1.1)
##  ps             1.6.0   2021-02-28 [1] CRAN (R 4.1.1)
##  purrr        * 0.3.4   2020-04-17 [1] CRAN (R 4.1.1)
##  R6             2.5.1   2021-08-19 [1] CRAN (R 4.1.1)
##  RColorBrewer * 1.1-2   2014-12-07 [1] CRAN (R 4.1.0)
##  Rcpp           1.0.7   2021-07-07 [1] CRAN (R 4.1.1)
##  readr        * 2.0.2   2021-09-27 [1] CRAN (R 4.1.1)
##  readxl         1.3.1   2019-03-13 [1] CRAN (R 4.1.1)
##  remotes        2.4.1   2021-09-29 [1] CRAN (R 4.1.1)
##  reprex         2.0.1   2021-08-05 [1] CRAN (R 4.1.1)
##  rlang          0.4.11  2021-04-30 [1] CRAN (R 4.1.1)
##  rmarkdown      2.11    2021-09-14 [1] CRAN (R 4.1.1)
##  rprojroot      2.0.2   2020-11-15 [1] CRAN (R 4.1.1)
##  rstudioapi     0.13    2020-11-12 [1] CRAN (R 4.1.1)
##  rvest          1.0.1   2021-07-26 [1] CRAN (R 4.1.1)
##  sass           0.4.0   2021-05-12 [1] CRAN (R 4.1.1)
##  scales         1.1.1   2020-05-11 [1] CRAN (R 4.1.1)
##  sessioninfo    1.1.1   2018-11-05 [1] CRAN (R 4.1.1)
##  shiny          1.7.1   2021-10-02 [1] CRAN (R 4.1.1)
##  stringi        1.7.4   2021-08-25 [1] CRAN (R 4.1.1)
##  stringr      * 1.4.0   2019-02-10 [1] CRAN (R 4.1.1)
##  testthat       3.1.0   2021-10-04 [1] CRAN (R 4.1.1)
##  tibble       * 3.1.5   2021-09-30 [1] CRAN (R 4.1.1)
##  tidyr        * 1.1.4   2021-09-27 [1] CRAN (R 4.1.1)
##  tidyselect     1.1.1   2021-04-30 [1] CRAN (R 4.1.1)
##  tidyverse    * 1.3.1   2021-04-15 [1] CRAN (R 4.1.1)
##  treemap      * 2.4-3   2021-08-22 [1] CRAN (R 4.1.1)
##  tzdb           0.1.2   2021-07-20 [1] CRAN (R 4.1.1)
##  usethis        2.0.1   2021-02-10 [1] CRAN (R 4.1.1)
##  utf8           1.2.2   2021-07-24 [1] CRAN (R 4.1.1)
##  vctrs          0.3.8   2021-04-29 [1] CRAN (R 4.1.1)
##  viridis      * 0.6.1   2021-05-11 [1] CRAN (R 4.1.1)
##  viridisLite  * 0.4.0   2021-04-13 [1] CRAN (R 4.1.1)
##  withr          2.4.2   2021-04-18 [1] CRAN (R 4.1.1)
##  xfun           0.25    2021-08-06 [1] CRAN (R 4.1.1)
##  xml2           1.3.2   2020-04-23 [1] CRAN (R 4.1.1)
##  xtable         1.8-4   2019-04-21 [1] CRAN (R 4.1.1)
##  yaml           2.2.1   2020-02-01 [1] CRAN (R 4.1.0)
## 
## [1] C:/Users/Jerem/OneDrive/Documents/R/win-library/4.1
## [2] C:/Program Files/R/R-4.1.1/library
library(dplyr)
library(streamgraph)
library(babynames)

Here is a trivial streamgraph using simulated names over time

# Create data:
year=rep(seq(1990,2016) , each=10)
name=rep(letters[1:10] , 27)
value=sample( seq(0,1,0.0001) , length(year))
data=data.frame(year, name, value)
streamgraph(data, key="name", value="value", date="year")
## Warning in widget_html(name = class(x)[1], package = attr(x, "package"), :
## streamgraph_html returned an object of class `list` instead of a `shiny.tag`.

Now look at the babynames dataset

ncol(babynames)
## [1] 5
head(babynames)
## # A tibble: 6 x 5
##    year sex   name          n   prop
##   <dbl> <chr> <chr>     <int>  <dbl>
## 1  1880 F     Mary       7065 0.0724
## 2  1880 F     Anna       2604 0.0267
## 3  1880 F     Emma       2003 0.0205
## 4  1880 F     Elizabeth  1939 0.0199
## 5  1880 F     Minnie     1746 0.0179
## 6  1880 F     Margaret   1578 0.0162
str(babynames)
## tibble [1,924,665 x 5] (S3: tbl_df/tbl/data.frame)
##  $ year: num [1:1924665] 1880 1880 1880 1880 1880 1880 1880 1880 1880 1880 ...
##  $ sex : chr [1:1924665] "F" "F" "F" "F" ...
##  $ name: chr [1:1924665] "Mary" "Anna" "Emma" "Elizabeth" ...
##  $ n   : int [1:1924665] 7065 2604 2003 1939 1746 1578 1472 1414 1320 1288 ...
##  $ prop: num [1:1924665] 0.0724 0.0267 0.0205 0.0199 0.0179 ...

Babynames streamgraph

Mouse over the colors and years to look at the pattern of various names

babynames %>%
  filter(grepl("^Kr", name)) %>%
  group_by(year, name) %>%
  tally(wt=n) %>%
  streamgraph("name", "n", "year")
## Warning in widget_html(name = class(x)[1], package = attr(x, "package"), :
## streamgraph_html returned an object of class `list` instead of a `shiny.tag`.
# Streamgraphing Commercial Real Estate Transaction Volume by Asset Class Since 2001

dat <- read.csv("http://asbcllc.com/blog/2015/february/cre_stream_graph_test/data/cre_transaction-data.csv")

dat %>%
  streamgraph("asset_class", "volume_billions", "year", interpolate="cardinal") %>%
  sg_axis_x(1, "year", "%Y") %>%
  sg_fill_brewer("PuOr") 
## Warning in widget_html(name = class(x)[1], package = attr(x, "package"), :
## streamgraph_html returned an object of class `list` instead of a `shiny.tag`.

Alluvials

Load the alluvial package

#install.packages("alluvial")
library(alluvial)

Refugees is a prebuilt dataset in the alluvial package

If you want to save the prebuilt dataset to your folder, use the write_csv function

alluvial::Refugees
##         country year refugees
## 1   Afghanistan 2003  2136043
## 2       Burundi 2003   531637
## 3     Congo DRC 2003   453465
## 4          Iraq 2003   368580
## 5       Myanmar 2003   151384
## 6     Palestine 2003   350568
## 7       Somalia 2003   402336
## 8         Sudan 2003   606242
## 9         Syria 2003    20819
## 10      Vietnam 2003   363179
## 11  Afghanistan 2004  2084109
## 12      Burundi 2004   485454
## 13    Congo DRC 2004   461042
## 14         Iraq 2004   311905
## 15      Myanmar 2004   161013
## 16    Palestine 2004   350617
## 17      Somalia 2004   389304
## 18        Sudan 2004   730647
## 19        Syria 2004    21440
## 20      Vietnam 2004   349809
## 21  Afghanistan 2005  2166149
## 22      Burundi 2005   438706
## 23    Congo DRC 2005   430929
## 24         Iraq 2005   262299
## 25      Myanmar 2005   164864
## 26    Palestine 2005   349673
## 27      Somalia 2005   395553
## 28        Sudan 2005   693632
## 29        Syria 2005    16401
## 30      Vietnam 2005   358268
## 31  Afghanistan 2006  2107519
## 32      Burundi 2006   396541
## 33    Congo DRC 2006   401914
## 34         Iraq 2006  1450905
## 35      Myanmar 2006   202826
## 36    Palestine 2006   334142
## 37      Somalia 2006   464252
## 38        Sudan 2006   686311
## 39        Syria 2006    12338
## 40      Vietnam 2006   374279
## 41  Afghanistan 2007  1909911
## 42      Burundi 2007   375715
## 43    Congo DRC 2007   370386
## 44         Iraq 2007  2279245
## 45      Myanmar 2007   191256
## 46    Palestine 2007   335219
## 47      Somalia 2007   455356
## 48        Sudan 2007   523032
## 49        Syria 2007    13671
## 50      Vietnam 2007   327776
## 51  Afghanistan 2008  1817913
## 52      Burundi 2008   281592
## 53    Congo DRC 2008   367995
## 54         Iraq 2008  1873519
## 55      Myanmar 2008   184347
## 56    Palestine 2008   333990
## 57      Somalia 2008   559153
## 58        Sudan 2008   397013
## 59        Syria 2008    15186
## 60      Vietnam 2008   328183
## 61  Afghanistan 2009  1905804
## 62      Burundi 2009    94239
## 63    Congo DRC 2009   455852
## 64         Iraq 2009  1785212
## 65      Myanmar 2009   206650
## 66    Palestine 2009    95177
## 67      Somalia 2009   678308
## 68        Sudan 2009   348500
## 69        Syria 2009    17884
## 70      Vietnam 2009   339289
## 71  Afghanistan 2010  3054709
## 72      Burundi 2010    84064
## 73    Congo DRC 2010   476693
## 74         Iraq 2010  1683575
## 75      Myanmar 2010   215644
## 76    Palestine 2010    93299
## 77      Somalia 2010   770148
## 78        Sudan 2010   379067
## 79        Syria 2010    18428
## 80      Vietnam 2010   338698
## 81  Afghanistan 2011  2664436
## 82      Burundi 2011   101288
## 83    Congo DRC 2011   491481
## 84         Iraq 2011  1428308
## 85      Myanmar 2011   214594
## 86    Palestine 2011    94121
## 87      Somalia 2011  1075148
## 88        Sudan 2011   491013
## 89        Syria 2011    19900
## 90      Vietnam 2011   337829
## 91  Afghanistan 2012  2586034
## 92      Burundi 2012    73362
## 93    Congo DRC 2012   509082
## 94         Iraq 2012   746181
## 95      Myanmar 2012   215338
## 96    Palestine 2012    94820
## 97      Somalia 2012  1136713
## 98        Sudan 2012   558195
## 99        Syria 2012   728603
## 100     Vietnam 2012   336939
## 101 Afghanistan 2013  2556507
## 102     Burundi 2013    72652
## 103   Congo DRC 2013   499320
## 104        Iraq 2013   401384
## 105     Myanmar 2013   222053
## 106   Palestine 2013    96044
## 107     Somalia 2013  1121772
## 108       Sudan 2013   636400
## 109       Syria 2013  2457255
## 110     Vietnam 2013   314105
Refugees <- Refugees

Create the alluvial to show UNHCR-recognised refugess in the top 10 countries from 2003-2013

Alluvials need the variables: category, time-variable, value

library("ggalluvial")
options(scipen = 999)  
ggalluv <- ggplot(alluvial::Refugees,
             aes(y = refugees, x = year, alluvium = country)) + 
  theme_bw() +
  geom_alluvium(aes(fill = country, color = country),
                   width = .1, alpha = .5, decreasing = FALSE,
                   curve_type = "sigmoid") +
  scale_fill_brewer(palette = "Accent") +
  ggtitle("UNHCR-recognised refugees\nTop 10 countries (2003-2013)\n") +
  ylab("Number of Refugees")
ggalluv
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Accent is 8
## Returning the palette you asked for with that many colors