How to create heatmaps
NBA Stats Heatmap
Load the data from url and display the first 6 rows of the data
nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
head(nba)
## Name G MIN PTS FGM FGA FGP FTM FTA FTP X3PM X3PA X3PP ORB
## 1 Dwyane Wade 79 38.6 30.2 10.8 22.0 0.491 7.5 9.8 0.765 1.1 3.5 0.317 1.1
## 2 LeBron James 81 37.7 28.4 9.7 19.9 0.489 7.3 9.4 0.780 1.6 4.7 0.344 1.3
## 3 Kobe Bryant 82 36.2 26.8 9.8 20.9 0.467 5.9 6.9 0.856 1.4 4.1 0.351 1.1
## 4 Dirk Nowitzki 81 37.7 25.9 9.6 20.0 0.479 6.0 6.7 0.890 0.8 2.1 0.359 1.1
## 5 Danny Granger 67 36.2 25.8 8.5 19.1 0.447 6.0 6.9 0.878 2.7 6.7 0.404 0.7
## 6 Kevin Durant 74 39.0 25.3 8.9 18.8 0.476 6.1 7.1 0.863 1.3 3.1 0.422 1.0
## DRB TRB AST STL BLK TO PF
## 1 3.9 5.0 7.5 2.2 1.3 3.4 2.3
## 2 6.3 7.6 7.2 1.7 1.1 3.0 1.7
## 3 4.1 5.2 4.9 1.5 0.5 2.6 2.3
## 4 7.3 8.4 2.4 0.8 0.8 1.9 2.2
## 5 4.4 5.1 2.7 1.0 1.4 2.5 3.1
## 6 5.5 6.5 2.8 1.3 0.7 3.0 1.8
Create a cool color heat map ordered by points.
nba <- nba[order(nba$PTS),]
row.names(nba) <- nba$Name
nba <- nba[2:19,]
nba_matrix <- data.matrix(nba)
nba_heatmap <- heatmap(nba_matrix, Rowv=NA, Colv=NA,
col = cm.colors(80), scale="column", margins=c(5,10),
xlab = "NBA Player Stats",
ylab = "NBA Players",
main = "NBA Player Stats in 2008")
Cool colored map is changed to a warm color heatmap
nba_heatmap <- heatmap(nba_matrix, Rowv=NA, Colv=NA, col = heat.colors(80),
scale="column", margins=c(5,10),
xlab = "NBA Player Stats",
ylab = "NBA Players",
main = "NBA Player Stats in 2008")
Warm color heatmap is changed to Viridis color palette
library(viridis)
## Loading required package: viridisLite
nba_heatmap <- heatmap(nba_matrix, Rowv=NA, col = viridis(25),
scale="column", margins=c(5,10),
xlab = "NBA Player Stats",
ylab = "NBA Players",
main = "NBA Payer Stats in 2008")
Late Arrivals heatmap
Load data from a prebuilt dataset
library(nycflights13)
library(RColorBrewer)
data(flights)
Clean the data to ignore NA Values
flights_nona <- flights %>%
filter(!is.na(distance) & !is.na(arr_delay))
Create a summary table
by_tailnum <- flights_nona %>%
group_by(tailnum) %>%
summarise(count = n(),
dist = mean(distance),
delay = mean(arr_delay))
delay <- filter(by_tailnum, count > 20, dist < 2000)
Calculate the cost of usage cost of late arrivals
delays <- flights_nona %>%
group_by (dest) %>%
summarize (count = n(),
dist = mean (distance),
delay = mean (arr_delay),
delaycost = mean(count*delay/dist))
delays <- arrange(delays, desc(delaycost))
head(delays)
## # A tibble: 6 × 5
## dest count dist delay delaycost
## <chr> <int> <dbl> <dbl> <dbl>
## 1 DCA 9111 211. 9.07 391.
## 2 IAD 5383 225. 13.9 332.
## 3 ATL 16837 757. 11.3 251.
## 4 BOS 15022 191. 2.91 230.
## 5 CLT 13674 538. 7.36 187.
## 6 RDU 7770 427. 10.1 183.
DCA and IAD have the highest delay cost
displayed using knitr package and kable function
library(knitr)
kable(delays,
caption = "Table of Mean Distance, Mean Arrival Delay, and Highest Delay Costs",
digits = 2)
Table of Mean Distance, Mean Arrival Delay, and Highest Delay
Costs
DCA
9111
211.08
9.07
391.36
IAD
5383
224.74
13.86
332.08
ATL
16837
757.14
11.30
251.29
BOS
15022
190.74
2.91
229.53
CLT
13674
538.01
7.36
187.07
RDU
7770
426.73
10.05
183.04
RIC
2346
281.27
20.11
167.74
PHL
1541
94.34
10.13
165.42
BUF
4570
296.87
8.95
137.71
ORD
16566
729.02
5.88
133.54
ROC
2358
259.36
11.56
105.11
BWI
1687
179.35
10.73
100.90
CVG
3725
575.23
15.36
99.50
DTW
9031
498.20
5.43
98.43
CLE
4394
414.00
9.18
97.45
PWM
2288
276.03
11.66
96.65
BNA
6084
758.22
11.81
94.78
FLL
11897
1070.06
8.08
89.86
BTV
2510
265.12
8.95
84.74
MCO
13967
943.11
5.45
80.78
CMH
3326
476.55
10.60
73.99
SYR
1707
206.07
8.90
73.76
MDW
4025
718.09
12.36
69.30
MHT
932
207.38
14.79
66.46
PIT
2746
334.10
7.68
63.13
TPA
7390
1003.93
7.41
54.53
ORF
1434
288.55
10.95
54.41
PBI
6487
1028.82
8.56
53.99
MKE
2709
733.37
14.17
52.33
STL
4142
878.83
11.08
52.21
MSP
6929
1017.46
7.27
49.51
GSO
1492
449.79
14.11
46.81
CHS
2759
632.96
10.59
46.17
ALB
418
143.00
14.40
42.08
CAK
842
397.00
19.70
41.78
DEN
7169
1614.69
8.61
38.21
JAX
2623
824.71
11.84
37.67
PVD
358
160.00
16.23
36.32
DAY
1399
536.91
12.68
33.04
IND
1981
652.26
9.94
30.19
BDL
412
116.00
7.05
25.03
MCI
1885
1097.65
14.51
24.92
GRR
728
605.71
18.19
21.86
TYS
578
638.34
24.07
21.79
SDF
1104
645.96
12.67
21.65
IAH
7085
1407.18
4.24
21.35
GSP
790
595.98
15.94
21.12
MSY
3715
1177.73
6.49
20.47
MEM
1686
954.48
10.65
18.80
SAV
749
709.27
15.13
15.98
MSN
556
803.93
20.20
13.97
SFO
13173
2577.93
2.67
13.66
OMA
817
1135.56
14.70
10.58
RSW
3502
1072.85
3.24
10.57
HOU
2083
1420.26
7.18
10.52
DSM
523
1020.56
19.01
9.74
AUS
2411
1514.25
6.02
9.58
SJU
5773
1599.84
2.52
9.10
TUL
294
1215.00
33.66
8.14
BGR
358
378.00
8.03
7.60
CAE
106
603.70
41.76
7.33
OKC
315
1325.00
30.62
7.28
XNA
992
1142.44
7.47
6.48
ACK
264
199.00
4.85
6.44
BHM
269
866.00
16.88
5.24
BQN
888
1578.99
8.25
4.64
PHX
4606
2141.34
2.10
4.51
CRW
134
444.00
14.67
4.43
AVL
261
583.61
8.00
3.58
LAX
16026
2468.62
0.55
3.55
SRQ
1201
1044.64
3.08
3.54
SAN
2709
2437.28
3.14
3.49
MIA
11593
1091.54
0.30
3.18
SAT
659
1578.18
6.95
2.90
PDX
1342
2445.61
5.14
2.82
DFW
8388
1383.06
0.32
1.95
TVC
95
652.45
12.97
1.89
PSE
358
1617.00
7.87
1.74
CHO
46
305.00
9.50
1.43
SMF
282
2521.00
12.11
1.35
BUR
370
2465.00
8.18
1.23
ILM
107
500.00
4.64
0.99
EGE
207
1735.80
6.30
0.75
LAS
5952
2240.98
0.26
0.68
ABQ
254
1826.00
4.38
0.61
MYR
58
550.67
4.60
0.48
SJC
328
2569.00
3.45
0.44
OAK
309
2576.00
3.08
0.37
JAC
21
1875.90
28.10
0.31
SLC
2451
1986.99
0.18
0.22
BZN
35
1882.00
7.60
0.14
SBN
10
645.40
6.50
0.10
EYW
17
1207.00
6.35
0.09
HDN
14
1728.00
2.14
0.02
MTJ
14
1795.00
1.79
0.01
ANC
8
3370.00
-2.50
-0.01
LGB
661
2465.00
-0.06
-0.02
LEX
1
604.00
-22.00
-0.04
PSP
18
2378.00
-12.72
-0.10
HNL
701
4972.76
-1.37
-0.19
MVY
210
173.00
-0.29
-0.35
STT
518
1626.99
-3.84
-1.22
SEA
3885
2412.68
-1.10
-1.77
SNA
812
2434.00
-7.87
-2.62
Top 100 of the largest delay costs
top100 <- delays %>%
head(100) %>%
arrange(delaycost)
row.names(top100) <- top100$dest
## Warning: Setting row names on a tibble is deprecated.
Convert dataframe to matrix
delays_mat <- data.matrix(top100)
delays_mat2 <-delays_mat[,2:5]
Create a heatmap of the cost of late arrivals
heatmap(delays_mat2,
Rowv = NA, Colv = NA,
col= viridis(25),
s=0.6, v=1, scale = "column",
margins = c(7,10),
main = "Cost of Late Arrivals",
xlab = "Flight Characteristics",
ylab = "Arrival Airport",
labCol = c("Flights", "Distance", "Delay", "Cost Index"),
cexCol=1, cexRow=1)
## layout: widths = 0.05 4 , heights = 0.25 4 ; lmat=
## [,1] [,2]
## [1,] 0 3
## [2,] 2 1
How to create Streamgraphs
Load data from a prebuilt dataset
library(dplyr)
library(streamgraph)
library(babynames)
babynames <- babynames
Create a steamgraph showing names over time
year=rep(seq(1990,2016), each=10)
name=rep(letters[1:10], 27)
value=sample(seq(0,1,0.0001), length(year))
data=data.frame(year, name, value)
streamgraph(data, key="name", value="value", date="year")
## Warning in widget_html(name, package, id = x$id, style = css(width =
## validateCssUnit(sizeInfo$width), : streamgraph_html returned an object of class
## `list` instead of a `shiny.tag`.
## Warning: `bindFillRole()` only works on htmltools::tag() objects (e.g., div(),
## p(), etc.), not objects of type 'list'.
Examine the babynames dataset
ncol(babynames)
## [1] 5
head(babynames)
## # A tibble: 6 × 5
## year sex name n prop
## <dbl> <chr> <chr> <int> <dbl>
## 1 1880 F Mary 7065 0.0724
## 2 1880 F Anna 2604 0.0267
## 3 1880 F Emma 2003 0.0205
## 4 1880 F Elizabeth 1939 0.0199
## 5 1880 F Minnie 1746 0.0179
## 6 1880 F Margaret 1578 0.0162
str(babynames)
## tibble [1,924,665 × 5] (S3: tbl_df/tbl/data.frame)
## $ year: num [1:1924665] 1880 1880 1880 1880 1880 1880 1880 1880 1880 1880 ...
## $ sex : chr [1:1924665] "F" "F" "F" "F" ...
## $ name: chr [1:1924665] "Mary" "Anna" "Emma" "Elizabeth" ...
## $ n : int [1:1924665] 7065 2604 2003 1939 1746 1578 1472 1414 1320 1288 ...
## $ prop: num [1:1924665] 0.0724 0.0267 0.0205 0.0199 0.0179 ...
Babynames that begin with S and their popularity over the years
babynames %>%
filter(grepl("^Si", name)) %>%
group_by(year,name) %>%
tally(wt=n) %>%
streamgraph("name", "n", "year")
## Warning in widget_html(name, package, id = x$id, style = css(width =
## validateCssUnit(sizeInfo$width), : streamgraph_html returned an object of class
## `list` instead of a `shiny.tag`.
## Warning: `bindFillRole()` only works on htmltools::tag() objects (e.g., div(),
## p(), etc.), not objects of type 'list'.
How to create alluvials
Load the alluvial package and a prebuilt dataset
library(alluvial)
library(ggalluvial)
data(Refugees)
Show the top 10 most affected countries
ggalluv <- ggplot(Refugees,
aes(x = year, y = refugees, alluvium = country)) +
theme_bw() +
geom_alluvium(aes(fill = country),
color = "white",
width = .1,
alpha = .8,
decreasing = FALSE) +
scale_fill_brewer(palette = "Paired") +
scale_x_continuous(lim = c(2002,2013))+
ggtitle("UNHCR Recognized Refugees \n Top 10 Countries(2003-2013)\n")+
ylab("Number of Refugees")
ggalluv
Change the y axis to show numerical values
options(scipen=999)
ggalluv