load libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.3.0      ✔ stringr 1.5.0 
## ✔ readr   2.1.4      ✔ forcats 1.0.0 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(treemap)
library(RColorBrewer)

load the NBA data, preview the headings, set as data frame (needed for error)

nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
as.data.frame(nba)
##                  Name  G  MIN  PTS  FGM  FGA   FGP FTM  FTA   FTP X3PM X3PA
## 1        Dwyane Wade  79 38.6 30.2 10.8 22.0 0.491 7.5  9.8 0.765  1.1  3.5
## 2       LeBron James  81 37.7 28.4  9.7 19.9 0.489 7.3  9.4 0.780  1.6  4.7
## 3        Kobe Bryant  82 36.2 26.8  9.8 20.9 0.467 5.9  6.9 0.856  1.4  4.1
## 4      Dirk Nowitzki  81 37.7 25.9  9.6 20.0 0.479 6.0  6.7 0.890  0.8  2.1
## 5      Danny Granger  67 36.2 25.8  8.5 19.1 0.447 6.0  6.9 0.878  2.7  6.7
## 6       Kevin Durant  74 39.0 25.3  8.9 18.8 0.476 6.1  7.1 0.863  1.3  3.1
## 7       Kevin Martin  51 38.2 24.6  6.7 15.9 0.420 9.0 10.3 0.867  2.3  5.4
## 8       Al Jefferson  50 36.6 23.1  9.7 19.5 0.497 3.7  5.0 0.738  0.0  0.1
## 9         Chris Paul  78 38.5 22.8  8.1 16.1 0.503 5.8  6.7 0.868  0.8  2.3
## 10   Carmelo Anthony  66 34.5 22.8  8.1 18.3 0.443 5.6  7.1 0.793  1.0  2.6
## 11        Chris Bosh  77 38.1 22.7  8.0 16.4 0.487 6.5  8.0 0.817  0.2  0.6
## 12       Brandon Roy  78 37.2 22.6  8.1 16.9 0.480 5.3  6.5 0.824  1.1  2.8
## 13    Antawn Jamison  81 38.2 22.2  8.3 17.8 0.468 4.2  5.6 0.754  1.4  3.9
## 14       Tony Parker  72 34.1 22.0  8.9 17.5 0.506 3.9  5.0 0.782  0.3  0.9
## 15  Amare Stoudemire  53 36.8 21.4  7.6 14.1 0.539 6.1  7.3 0.835  0.1  0.1
## 16       Joe Johnson  79 39.5 21.4  7.8 18.0 0.437 3.8  4.6 0.826  1.9  5.2
## 17      Devin Harris  69 36.1 21.3  6.6 15.1 0.438 7.2  8.8 0.820  0.9  3.2
## 18      Michael Redd  33 36.4 21.2  7.5 16.6 0.455 4.0  4.9 0.814  2.1  5.8
## 19        David West  76 39.3 21.0  8.0 17.0 0.472 4.8  5.5 0.884  0.1  0.3
## 20  Zachary Randolph  50 35.1 20.8  8.3 17.5 0.475 3.6  4.9 0.734  0.6  1.9
## 21      Caron Butler  67 38.6 20.8  7.3 16.2 0.453 5.1  6.0 0.858  1.0  3.1
## 22      Vince Carter  80 36.8 20.8  7.4 16.8 0.437 4.2  5.1 0.817  1.9  4.9
## 23   Stephen Jackson  59 39.7 20.7  7.0 16.9 0.414 5.0  6.0 0.826  1.7  5.2
## 24        Ben Gordon  82 36.6 20.7  7.3 16.0 0.455 4.0  4.7 0.864  2.1  5.1
## 25     Dwight Howard  79 35.7 20.6  7.1 12.4 0.572 6.4 10.7 0.594  0.0  0.0
## 26       Paul Pierce  81 37.4 20.5  6.7 14.6 0.457 5.7  6.8 0.830  1.5  3.8
## 27     Al Harrington  73 34.9 20.1  7.3 16.6 0.439 3.2  4.0 0.793  2.3  6.4
## 28    Jamal Crawford  65 38.1 19.7  6.4 15.7 0.410 4.6  5.3 0.872  2.2  6.1
## 29          Yao Ming  77 33.6 19.7  7.4 13.4 0.548 4.9  5.7 0.866  0.0  0.0
## 30 Richard Jefferson  82 35.9 19.6  6.5 14.9 0.439 5.1  6.3 0.805  1.4  3.6
## 31       Jason Terry  74 33.6 19.6  7.3 15.8 0.463 2.7  3.0 0.880  2.3  6.2
## 32    Deron Williams  68 36.9 19.4  6.8 14.5 0.471 4.8  5.6 0.849  1.0  3.3
## 33        Tim Duncan  75 33.7 19.3  7.4 14.8 0.504 4.5  6.4 0.692  0.0  0.0
## 34       Monta Ellis  25 35.6 19.0  7.8 17.2 0.451 3.1  3.8 0.830  0.3  1.0
## 35          Rudy Gay  79 37.3 18.9  7.2 16.0 0.453 3.3  4.4 0.767  1.1  3.1
## 36         Pau Gasol  81 37.1 18.9  7.3 12.9 0.567 4.2  5.4 0.781  0.0  0.0
## 37    Andre Iguodala  82 39.8 18.8  6.6 14.0 0.473 4.6  6.4 0.724  1.0  3.2
## 38    Corey Maggette  51 31.1 18.6  5.7 12.4 0.461 6.7  8.1 0.824  0.5  1.9
## 39         O.J. Mayo  82 38.0 18.5  6.9 15.6 0.438 3.0  3.4 0.879  1.8  4.6
## 40      John Salmons  79 37.5 18.3  6.5 13.8 0.472 3.6  4.4 0.830  1.6  3.8
## 41  Richard Hamilton  67 34.0 18.3  7.0 15.6 0.447 3.3  3.9 0.848  1.0  2.8
## 42         Ray Allen  79 36.3 18.2  6.3 13.2 0.480 3.0  3.2 0.952  2.5  6.2
## 43 LaMarcus Aldridge  81 37.1 18.1  7.4 15.3 0.484 3.2  4.1 0.781  0.1  0.3
## 44       Josh Howard  52 31.9 18.0  6.8 15.1 0.451 3.3  4.2 0.782  1.1  3.2
## 45  Maurice Williams  81 35.0 17.8  6.5 13.9 0.467 2.6  2.8 0.912  2.3  5.2
## 46  Shaquille O'neal  75 30.1 17.8  6.8 11.2 0.609 4.1  6.9 0.595  0.0  0.0
## 47     Rashard Lewis  79 36.2 17.7  6.1 13.8 0.439 2.8  3.4 0.836  2.8  7.0
## 48  Chauncey Billups  79 35.3 17.7  5.2 12.4 0.418 5.3  5.8 0.913  2.1  5.0
## 49     Allen Iverson  57 36.7 17.5  6.1 14.6 0.417 4.8  6.1 0.781  0.5  1.7
## 50     Nate Robinson  74 29.9 17.2  6.1 13.9 0.437 3.4  4.0 0.841  1.7  5.2
##     X3PP ORB DRB  TRB  AST STL BLK  TO  PF
## 1  0.317 1.1 3.9  5.0  7.5 2.2 1.3 3.4 2.3
## 2  0.344 1.3 6.3  7.6  7.2 1.7 1.1 3.0 1.7
## 3  0.351 1.1 4.1  5.2  4.9 1.5 0.5 2.6 2.3
## 4  0.359 1.1 7.3  8.4  2.4 0.8 0.8 1.9 2.2
## 5  0.404 0.7 4.4  5.1  2.7 1.0 1.4 2.5 3.1
## 6  0.422 1.0 5.5  6.5  2.8 1.3 0.7 3.0 1.8
## 7  0.415 0.6 3.0  3.6  2.7 1.2 0.2 2.9 2.3
## 8  0.000 3.4 7.5 11.0  1.6 0.8 1.7 1.8 2.8
## 9  0.364 0.9 4.7  5.5 11.0 2.8 0.1 3.0 2.7
## 10 0.371 1.6 5.2  6.8  3.4 1.1 0.4 3.0 3.0
## 11 0.245 2.8 7.2 10.0  2.5 0.9 1.0 2.3 2.5
## 12 0.377 1.3 3.4  4.7  5.1 1.1 0.3 1.9 1.6
## 13 0.351 2.4 6.5  8.9  1.9 1.2 0.3 1.5 2.7
## 14 0.292 0.4 2.7  3.1  6.9 0.9 0.1 2.6 1.5
## 15 0.429 2.2 5.9  8.1  2.0 0.9 1.1 2.8 3.1
## 16 0.360 0.8 3.6  4.4  5.8 1.1 0.2 2.5 2.2
## 17 0.291 0.4 2.9  3.3  6.9 1.7 0.2 3.1 2.4
## 18 0.366 0.7 2.5  3.2  2.7 1.1 0.1 1.6 1.4
## 19 0.240 2.1 6.4  8.5  2.3 0.6 0.9 2.1 2.7
## 20 0.330 3.1 6.9 10.1  2.1 0.9 0.3 2.3 2.7
## 21 0.310 1.8 4.4  6.2  4.3 1.6 0.3 3.1 2.5
## 22 0.385 0.9 4.2  5.1  4.7 1.0 0.5 2.1 2.9
## 23 0.338 1.2 3.9  5.1  6.5 1.5 0.5 3.9 2.6
## 24 0.410 0.6 2.8  3.5  3.4 0.9 0.3 2.4 2.2
## 25 0.000 4.3 9.6 13.8  1.4 1.0 2.9 3.0 3.4
## 26 0.391 0.7 5.0  5.6  3.6 1.0 0.3 2.8 2.7
## 27 0.364 1.4 4.9  6.2  1.4 1.2 0.3 2.2 3.1
## 28 0.360 0.4 2.6  3.0  4.4 0.9 0.2 2.3 1.4
## 29 1.000 2.6 7.2  9.9  1.8 0.4 1.9 3.0 3.3
## 30 0.397 0.7 3.9  4.6  2.4 0.8 0.2 2.0 3.1
## 31 0.366 0.5 1.9  2.4  3.4 1.3 0.3 1.6 1.9
## 32 0.310 0.4 2.5  2.9 10.7 1.1 0.3 3.4 2.0
## 33 0.000 2.7 8.0 10.7  3.5 0.5 1.7 2.2 2.3
## 34 0.308 0.6 3.8  4.3  3.7 1.6 0.3 2.7 2.7
## 35 0.351 1.4 4.2  5.5  1.7 1.2 0.7 2.6 2.8
## 36 0.500 3.2 6.4  9.6  3.5 0.6 1.0 1.9 2.1
## 37 0.307 1.1 4.6  5.7  5.3 1.6 0.4 2.7 1.9
## 38 0.253 1.0 4.6  5.5  1.8 0.9 0.2 2.4 3.8
## 39 0.384 0.7 3.1  3.8  3.2 1.1 0.2 2.8 2.5
## 40 0.417 0.7 3.5  4.2  3.2 1.1 0.3 2.1 2.3
## 41 0.368 0.7 2.4  3.1  4.4 0.6 0.1 2.0 2.6
## 42 0.409 0.8 2.7  3.5  2.8 0.9 0.2 1.7 2.0
## 43 0.250 2.9 4.6  7.5  1.9 1.0 1.0 1.5 2.6
## 44 0.345 1.1 3.9  5.1  1.6 1.1 0.6 1.7 2.6
## 45 0.436 0.6 2.9  3.4  4.1 0.9 0.1 2.2 2.7
## 46 0.000 2.5 5.9  8.4  1.7 0.7 1.4 2.2 3.4
## 47 0.397 1.2 4.6  5.7  2.6 1.0 0.6 2.0 2.5
## 48 0.408 0.4 2.6  3.0  6.4 1.2 0.2 2.2 2.0
## 49 0.283 0.5 2.5  3.0  5.0 1.5 0.1 2.6 1.5
## 50 0.325 1.3 2.6  3.9  4.1 1.3 0.1 1.9 2.8
head(nba)
##             Name  G  MIN  PTS  FGM  FGA   FGP FTM FTA   FTP X3PM X3PA  X3PP ORB
## 1   Dwyane Wade  79 38.6 30.2 10.8 22.0 0.491 7.5 9.8 0.765  1.1  3.5 0.317 1.1
## 2  LeBron James  81 37.7 28.4  9.7 19.9 0.489 7.3 9.4 0.780  1.6  4.7 0.344 1.3
## 3   Kobe Bryant  82 36.2 26.8  9.8 20.9 0.467 5.9 6.9 0.856  1.4  4.1 0.351 1.1
## 4 Dirk Nowitzki  81 37.7 25.9  9.6 20.0 0.479 6.0 6.7 0.890  0.8  2.1 0.359 1.1
## 5 Danny Granger  67 36.2 25.8  8.5 19.1 0.447 6.0 6.9 0.878  2.7  6.7 0.404 0.7
## 6  Kevin Durant  74 39.0 25.3  8.9 18.8 0.476 6.1 7.1 0.863  1.3  3.1 0.422 1.0
##   DRB TRB AST STL BLK  TO  PF
## 1 3.9 5.0 7.5 2.2 1.3 3.4 2.3
## 2 6.3 7.6 7.2 1.7 1.1 3.0 1.7
## 3 4.1 5.2 4.9 1.5 0.5 2.6 2.3
## 4 7.3 8.4 2.4 0.8 0.8 1.9 2.2
## 5 4.4 5.1 2.7 1.0 1.4 2.5 3.1
## 6 5.5 6.5 2.8 1.3 0.7 3.0 1.8

create a color heatmap

nba <- nba[order(nba$PTS),]
row.names(nba) <- nba$Name
nba <- nba[,2:19]
nba_matrix <- data.matrix(nba)
nba_heatmap <- heatmap(nba_matrix, Rowv=NA, Colv=NA,
 col = cm.colors(256), scale="column", margins=c(5,10),
 xlab = "NBA Player Stats",
 ylab = "NBA Players",
 main = "NBA Player Stats in 2008")

change to warm color palette

nba_heatmap <- heatmap(nba_matrix, Rowv=NA, Colv=NA, col = heat.colors(256),
 scale="column", margins=c(5,10),
 xlab = "NBA Player Stats",
 ylab = "NBA Players",
 main = "NBA Player Stats in 2008")

use the viridis color palette for heatmap

load viridis and create heatmap

library(viridis)
## Loading required package: viridisLite
nba_heatmap <- heatmap(nba_matrix, Rowv=NA, col = viridis(25),
 scale="column", margins=c(5,10),
 xlab = "NBA Player Stats",
 ylab = "NBA Players",
 main = "NBA Payer Stats in 2008")

create a treemap which explores categories of views

load and preview the data

data <- read.csv("http://datasets.flowingdata.com/post-data.txt")
head(data)
##     id  views comments               category
## 1 5019 148896       28 Artistic Visualization
## 2 1416  81374       26          Visualization
## 3 1416  81374       26               Featured
## 4 3485  80819       37               Featured
## 5 3485  80819       37                Mapping
## 6 3485  80819       37           Data Sources

use RColorBrewer to change the palette to RdYlBu

treemap(data, index = "category", vSize = "views",
        vColor = "comments", type = "manual", 
        # note: type = "manual" changes to red yellow blue
        palette = "RdYlBu")

use the dataset NYCFlights13 to create a heatmap that explores Late Arrivals

load data

library(nycflights13)
library(RColorBrewer)
data(flights)

create an initial scatterplot with loess smoother for distance to delays

filter NA values from data

flights_nona <- flights %>%
 filter(!is.na(distance) & !is.na(arr_delay)) 
# remove na's for distance and arr_delay

use group_by and summarise to create a summary table

by_tailnum <- flights_nona %>%
 group_by(tailnum) %>% # group all tailnumbers together
 summarise(count = n(), # counts totals for each tailnumber
 dist = mean(distance), # calculates the mean distance traveled
 delay = mean(arr_delay)) # calculates the mean arrival delay
delay <- filter(by_tailnum, count > 20, dist < 2000) # only include counts > 20 and distance < 2000 mi

create dataframe that is cmomposed of summary statistics

delays <- flights_nona %>% # create a delays dataframe by:
 group_by (dest) %>% # grouping by point of destination
 summarize (count = n(), # creating variables: number of flights to each destination
 dist = mean (distance), # the mean distance flown to each destination
 delay = mean (arr_delay), # the mean delay of arrival to each destination
 delaycost = mean(count*delay/dist)) # delay cost index defined as:
 # [(number of flights)*delay/distance] for a destination
delays <- arrange(delays, desc(delaycost)) #sort the rows by delay cost

head(delays) # look at the data
## # A tibble: 6 × 5
##   dest  count  dist delay delaycost
##   <chr> <int> <dbl> <dbl>     <dbl>
## 1 DCA    9111  211.  9.07      391.
## 2 IAD    5383  225. 13.9       332.
## 3 ATL   16837  757. 11.3       251.
## 4 BOS   15022  191.  2.91      230.
## 5 CLT   13674  538.  7.36      187.
## 6 RDU    7770  427. 10.1       183.

use the knitr package with the function kable to display all destination

load knitr

library(knitr)
kable(delays,
 caption = "Table of Mean Distance, Mean Arrival Delay, and Highest Delay Costs",
 digits = 2) # round values to 2 decimal places
Table of Mean Distance, Mean Arrival Delay, and Highest Delay Costs
dest count dist delay delaycost
DCA 9111 211.08 9.07 391.36
IAD 5383 224.74 13.86 332.08
ATL 16837 757.14 11.30 251.29
BOS 15022 190.74 2.91 229.53
CLT 13674 538.01 7.36 187.07
RDU 7770 426.73 10.05 183.04
RIC 2346 281.27 20.11 167.74
PHL 1541 94.34 10.13 165.42
BUF 4570 296.87 8.95 137.71
ORD 16566 729.02 5.88 133.54
ROC 2358 259.36 11.56 105.11
BWI 1687 179.35 10.73 100.90
CVG 3725 575.23 15.36 99.50
DTW 9031 498.20 5.43 98.43
CLE 4394 414.00 9.18 97.45
PWM 2288 276.03 11.66 96.65
BNA 6084 758.22 11.81 94.78
FLL 11897 1070.06 8.08 89.86
BTV 2510 265.12 8.95 84.74
MCO 13967 943.11 5.45 80.78
CMH 3326 476.55 10.60 73.99
SYR 1707 206.07 8.90 73.76
MDW 4025 718.09 12.36 69.30
MHT 932 207.38 14.79 66.46
PIT 2746 334.10 7.68 63.13
TPA 7390 1003.93 7.41 54.53
ORF 1434 288.55 10.95 54.41
PBI 6487 1028.82 8.56 53.99
MKE 2709 733.37 14.17 52.33
STL 4142 878.83 11.08 52.21
MSP 6929 1017.46 7.27 49.51
GSO 1492 449.79 14.11 46.81
CHS 2759 632.96 10.59 46.17
ALB 418 143.00 14.40 42.08
CAK 842 397.00 19.70 41.78
DEN 7169 1614.69 8.61 38.21
JAX 2623 824.71 11.84 37.67
PVD 358 160.00 16.23 36.32
DAY 1399 536.91 12.68 33.04
IND 1981 652.26 9.94 30.19
BDL 412 116.00 7.05 25.03
MCI 1885 1097.65 14.51 24.92
GRR 728 605.71 18.19 21.86
TYS 578 638.34 24.07 21.79
SDF 1104 645.96 12.67 21.65
IAH 7085 1407.18 4.24 21.35
GSP 790 595.98 15.94 21.12
MSY 3715 1177.73 6.49 20.47
MEM 1686 954.48 10.65 18.80
SAV 749 709.27 15.13 15.98
MSN 556 803.93 20.20 13.97
SFO 13173 2577.93 2.67 13.66
OMA 817 1135.56 14.70 10.58
RSW 3502 1072.85 3.24 10.57
HOU 2083 1420.26 7.18 10.52
DSM 523 1020.56 19.01 9.74
AUS 2411 1514.25 6.02 9.58
SJU 5773 1599.84 2.52 9.10
TUL 294 1215.00 33.66 8.14
BGR 358 378.00 8.03 7.60
CAE 106 603.70 41.76 7.33
OKC 315 1325.00 30.62 7.28
XNA 992 1142.44 7.47 6.48
ACK 264 199.00 4.85 6.44
BHM 269 866.00 16.88 5.24
BQN 888 1578.99 8.25 4.64
PHX 4606 2141.34 2.10 4.51
CRW 134 444.00 14.67 4.43
AVL 261 583.61 8.00 3.58
LAX 16026 2468.62 0.55 3.55
SRQ 1201 1044.64 3.08 3.54
SAN 2709 2437.28 3.14 3.49
MIA 11593 1091.54 0.30 3.18
SAT 659 1578.18 6.95 2.90
PDX 1342 2445.61 5.14 2.82
DFW 8388 1383.06 0.32 1.95
TVC 95 652.45 12.97 1.89
PSE 358 1617.00 7.87 1.74
CHO 46 305.00 9.50 1.43
SMF 282 2521.00 12.11 1.35
BUR 370 2465.00 8.18 1.23
ILM 107 500.00 4.64 0.99
EGE 207 1735.80 6.30 0.75
LAS 5952 2240.98 0.26 0.68
ABQ 254 1826.00 4.38 0.61
MYR 58 550.67 4.60 0.48
SJC 328 2569.00 3.45 0.44
OAK 309 2576.00 3.08 0.37
JAC 21 1875.90 28.10 0.31
SLC 2451 1986.99 0.18 0.22
BZN 35 1882.00 7.60 0.14
SBN 10 645.40 6.50 0.10
EYW 17 1207.00 6.35 0.09
HDN 14 1728.00 2.14 0.02
MTJ 14 1795.00 1.79 0.01
ANC 8 3370.00 -2.50 -0.01
LGB 661 2465.00 -0.06 -0.02
LEX 1 604.00 -22.00 -0.04
PSP 18 2378.00 -12.72 -0.10
HNL 701 4972.76 -1.37 -0.19
MVY 210 173.00 -0.29 -0.35
STT 518 1626.99 -3.84 -1.22
SEA 3885 2412.68 -1.10 -1.77
SNA 812 2434.00 -7.87 -2.62

get the top 100 delay costs to create a heatmap of those flights

top100 <- delays %>% # select the 100 largest delay costs
 head(100) %>%
 arrange(delaycost) # sort ascending so the heatmap displays descending costs
row.names(top100) <- top100$dest # rename the rows according to destination airport codes
## Warning: Setting row names on a tibble is deprecated.

convert the dataframe to matrix form

delays_mat <- data.matrix(top100) # convert delays dataframe to a matrix (required by heatmap)
delays_mat2 <- delays_mat[,2:5] # remove the redundant column of destination airport codes

create a heatmap using colorBrewer

heatmap(delays_mat2,
 Rowv = NA, Colv = NA,
 col= viridis(25),
 s=0.6, v=1, scale="column",
 margins=c(7,10),
 main = "Cost of Late Arrivals",
 xlab = "Flight Characteristics",
 ylab="Arrival Airport", labCol = c("Flights","Distance","Delay","Cost Index"
),
 cexCol=1, cexRow =1)
## layout: widths =  0.05 4 , heights =  0.25 4 ; lmat=
##      [,1] [,2]
## [1,]    0    3
## [2,]    2    1

# install and load devtools/libraries to create streamgrpah

devtools::install_github("hrbrmstr/streamgraph")
## Skipping install of 'streamgraph' from a github remote, the SHA1 (76f7173e) has not changed since last install.
##   Use `force = TRUE` to force installation
library(dplyr)
library(babynames)
library(streamgraph)

trivial streamgraph using simulated names over time

# Create data:
year=rep(seq(1990,2016) , each=10)
name=rep(letters[1:10] , 27)
value=sample( seq(0,1,0.0001) , length(year))
data=data.frame(year, name, value)
# Basic stream graph: just give the 3 arguments
streamgraph(data, key="name", value="value", date="year")
## Warning in widget_html(name, package, id = x$id, style = css(width =
## validateCssUnit(sizeInfo$width), : streamgraph_html returned an object of class
## `list` instead of a `shiny.tag`.
## Warning: `bindFillRole()` only works on htmltools::tag() objects (e.g., div(),
## p(), etc.), not objects of type 'list'.

look at the babynames dataset

ncol(babynames)
## [1] 5
head(babynames)
## # A tibble: 6 × 5
##    year sex   name          n   prop
##   <dbl> <chr> <chr>     <int>  <dbl>
## 1  1880 F     Mary       7065 0.0724
## 2  1880 F     Anna       2604 0.0267
## 3  1880 F     Emma       2003 0.0205
## 4  1880 F     Elizabeth  1939 0.0199
## 5  1880 F     Minnie     1746 0.0179
## 6  1880 F     Margaret   1578 0.0162
str(babynames)
## tibble [1,924,665 × 5] (S3: tbl_df/tbl/data.frame)
##  $ year: num [1:1924665] 1880 1880 1880 1880 1880 1880 1880 1880 1880 1880 ...
##  $ sex : chr [1:1924665] "F" "F" "F" "F" ...
##  $ name: chr [1:1924665] "Mary" "Anna" "Emma" "Elizabeth" ...
##  $ n   : int [1:1924665] 7065 2604 2003 1939 1746 1578 1472 1414 1320 1288 ...
##  $ prop: num [1:1924665] 0.0724 0.0267 0.0205 0.0199 0.0179 ...

babynames streamgraph

babynames %>%
 filter(grepl("^Kr", name)) %>%
 group_by(year, name) %>%
 tally(wt=n) %>%
 streamgraph("name", "n", "year")
## Warning in widget_html(name, package, id = x$id, style = css(width =
## validateCssUnit(sizeInfo$width), : streamgraph_html returned an object of class
## `list` instead of a `shiny.tag`.
## Warning: `bindFillRole()` only works on htmltools::tag() objects (e.g., div(),
## p(), etc.), not objects of type 'list'.

load alluvial package

library(alluvial) # this package contains the refugee dataset we will use
library(ggalluvial) # this is the improved alluvial package 

write refugees prebuilt data in alluvial package as csv

Refugees <- Refugees
write_csv(Refugees, "refugees.csv")

preview to check data

head(Refugees)
##       country year refugees
## 1 Afghanistan 2003  2136043
## 2     Burundi 2003   531637
## 3   Congo DRC 2003   453465
## 4        Iraq 2003   368580
## 5     Myanmar 2003   151384
## 6   Palestine 2003   350568

create the alluvial to show UNHCR-recognised refugess in the top 10 countries from 2003-2013

ggalluv <- ggplot(Refugees,aes(x = year, y = refugees, alluvium = country)) + # time series bump chart (quintic flows)
 theme_bw() +
 geom_alluvium(aes(fill = country),
 color = "white",
 width = .1,
 alpha = .8,
 decreasing = FALSE) +
 scale_fill_brewer(palette = "Spectral") + # Spectral has enough colors for all countries listed
 scale_x_continuous(lim = c(2002, 2013))+
 ggtitle("UNHCR-Recognised Refugees \n Top 10 Countries(2003-2013)\n")+ # \n breaks the long title
 ylab("Number of Refugees")

ggalluv

y-values are in scientific notation. We can conver them to standard notation with options scipen function

options(scipen=999) # this code eliminates scientific notation for the refugee values

ggalluv