# How to make a heatmap
nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
#apparently you have to use read.csv here instead of read_csv
head(nba)
flights_nona <- flights %>%
filter(!is.na(distance) & !is.na(arr_delay)) # remove na's for distance and arr_delay
by_tailnum <- flights_nona %>%
group_by(tailnum) %>% # group all tailnumbers together
summarise(count = n(), # counts totals for each tailnumber
dist = mean(distance), # calculates the mean distance traveled
delay = mean(arr_delay)) # calculates the mean arrival delay
delay <- filter(by_tailnum, count > 20, dist < 2000) # only include counts > 20 and distance < 2000 mi
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
delays <- flights_nona %>% # create a delays dataframe by:
group_by (dest) %>% # grouping by point of destination
summarize (count = n(), # creating variables: number of flights to each destination,
dist = mean (distance), # the mean distance flown to each destination,
delay = mean (arr_delay), # the mean delay of arrival to each destination,
delaycost = mean(count*delay/dist)) # delay cost index defined as:
# [(number of flights)*delay/distance] for a destination
delays <- arrange(delays, desc(delaycost)) # sort the rows by delay cost
head(delays) # look at the data
#install.packages("knitr")
library(knitr)
kable(delays,
caption = "Table of Mean Distance, Mean Arrival Delay, and Highest Delay Costs",
digits = 2) # round values to 2 decimal places
Table of Mean Distance, Mean Arrival Delay, and Highest Delay
Costs
dest
count
dist
delay
delaycost
DCA
9111
211.08
9.07
391.36
IAD
5383
224.74
13.86
332.08
ATL
16837
757.14
11.30
251.29
BOS
15022
190.74
2.91
229.53
CLT
13674
538.01
7.36
187.07
RDU
7770
426.73
10.05
183.04
RIC
2346
281.27
20.11
167.74
PHL
1541
94.34
10.13
165.42
BUF
4570
296.87
8.95
137.71
ORD
16566
729.02
5.88
133.54
ROC
2358
259.36
11.56
105.11
BWI
1687
179.35
10.73
100.90
CVG
3725
575.23
15.36
99.50
DTW
9031
498.20
5.43
98.43
CLE
4394
414.00
9.18
97.45
PWM
2288
276.03
11.66
96.65
BNA
6084
758.22
11.81
94.78
FLL
11897
1070.06
8.08
89.86
BTV
2510
265.12
8.95
84.74
MCO
13967
943.11
5.45
80.78
CMH
3326
476.55
10.60
73.99
SYR
1707
206.07
8.90
73.76
MDW
4025
718.09
12.36
69.30
MHT
932
207.38
14.79
66.46
PIT
2746
334.10
7.68
63.13
TPA
7390
1003.93
7.41
54.53
ORF
1434
288.55
10.95
54.41
PBI
6487
1028.82
8.56
53.99
MKE
2709
733.37
14.17
52.33
STL
4142
878.83
11.08
52.21
MSP
6929
1017.46
7.27
49.51
GSO
1492
449.79
14.11
46.81
CHS
2759
632.96
10.59
46.17
ALB
418
143.00
14.40
42.08
CAK
842
397.00
19.70
41.78
DEN
7169
1614.69
8.61
38.21
JAX
2623
824.71
11.84
37.67
PVD
358
160.00
16.23
36.32
DAY
1399
536.91
12.68
33.04
IND
1981
652.26
9.94
30.19
BDL
412
116.00
7.05
25.03
MCI
1885
1097.65
14.51
24.92
GRR
728
605.71
18.19
21.86
TYS
578
638.34
24.07
21.79
SDF
1104
645.96
12.67
21.65
IAH
7085
1407.18
4.24
21.35
GSP
790
595.98
15.94
21.12
MSY
3715
1177.73
6.49
20.47
MEM
1686
954.48
10.65
18.80
SAV
749
709.27
15.13
15.98
MSN
556
803.93
20.20
13.97
SFO
13173
2577.93
2.67
13.66
OMA
817
1135.56
14.70
10.58
RSW
3502
1072.85
3.24
10.57
HOU
2083
1420.26
7.18
10.52
DSM
523
1020.56
19.01
9.74
AUS
2411
1514.25
6.02
9.58
SJU
5773
1599.84
2.52
9.10
TUL
294
1215.00
33.66
8.14
BGR
358
378.00
8.03
7.60
CAE
106
603.70
41.76
7.33
OKC
315
1325.00
30.62
7.28
XNA
992
1142.44
7.47
6.48
ACK
264
199.00
4.85
6.44
BHM
269
866.00
16.88
5.24
BQN
888
1578.99
8.25
4.64
PHX
4606
2141.34
2.10
4.51
CRW
134
444.00
14.67
4.43
AVL
261
583.61
8.00
3.58
LAX
16026
2468.62
0.55
3.55
SRQ
1201
1044.64
3.08
3.54
SAN
2709
2437.28
3.14
3.49
MIA
11593
1091.54
0.30
3.18
SAT
659
1578.18
6.95
2.90
PDX
1342
2445.61
5.14
2.82
DFW
8388
1383.06
0.32
1.95
TVC
95
652.45
12.97
1.89
PSE
358
1617.00
7.87
1.74
CHO
46
305.00
9.50
1.43
SMF
282
2521.00
12.11
1.35
BUR
370
2465.00
8.18
1.23
ILM
107
500.00
4.64
0.99
EGE
207
1735.80
6.30
0.75
LAS
5952
2240.98
0.26
0.68
ABQ
254
1826.00
4.38
0.61
MYR
58
550.67
4.60
0.48
SJC
328
2569.00
3.45
0.44
OAK
309
2576.00
3.08
0.37
JAC
21
1875.90
28.10
0.31
SLC
2451
1986.99
0.18
0.22
BZN
35
1882.00
7.60
0.14
SBN
10
645.40
6.50
0.10
EYW
17
1207.00
6.35
0.09
HDN
14
1728.00
2.14
0.02
MTJ
14
1795.00
1.79
0.01
ANC
8
3370.00
-2.50
-0.01
LGB
661
2465.00
-0.06
-0.02
LEX
1
604.00
-22.00
-0.04
PSP
18
2378.00
-12.72
-0.10
HNL
701
4972.76
-1.37
-0.19
MVY
210
173.00
-0.29
-0.35
STT
518
1626.99
-3.84
-1.22
SEA
3885
2412.68
-1.10
-1.77
SNA
812
2434.00
-7.87
-2.62
top100 <- delays %>% # select the 100 largest delay costs
head(100) %>%
arrange(delaycost) # sort ascending so the heatmap displays descending costs
row.names(top100) <- top100$dest # rename the rows according to destination airport codes
## Warning: Setting row names on a tibble is deprecated.
delays_mat <- data.matrix(top100) # convert delays dataframe to a matrix (required by heatmap)
delays_mat2 <- delays_mat[,2:5] # remove the redundant column of destination airport codes
# Create data:
year=rep(seq(1990,2016) , each=10)
name=rep(letters[1:10] , 27)
value=sample( seq(0,1,0.0001) , length(year))
data=data.frame(year, name, value)
# Basic stream graph: just give the 3 arguments
streamgraph(data, key="name", value="value", date="year")
## Warning in widget_html(name, package, id = x$id, style = css(width =
## validateCssUnit(sizeInfo$width), : streamgraph_html returned an object of class
## `list` instead of a `shiny.tag`.
## Warning: `bindFillRole()` only works on htmltools::tag() objects (e.g., div(),
## p(), etc.), not objects of type 'list'.
ncol(babynames)
## [1] 5
head(babynames)
## # A tibble: 6 × 5
## year sex name n prop
## <dbl> <chr> <chr> <int> <dbl>
## 1 1880 F Mary 7065 0.0724
## 2 1880 F Anna 2604 0.0267
## 3 1880 F Emma 2003 0.0205
## 4 1880 F Elizabeth 1939 0.0199
## 5 1880 F Minnie 1746 0.0179
## 6 1880 F Margaret 1578 0.0162
## Warning in widget_html(name, package, id = x$id, style = css(width =
## validateCssUnit(sizeInfo$width), : streamgraph_html returned an object of class
## `list` instead of a `shiny.tag`.
## Warning: `bindFillRole()` only works on htmltools::tag() objects (e.g., div(),
## p(), etc.), not objects of type 'list'.
library(alluvial)
library(ggalluvial)
data(Refugees)
#write_csv(Refugees, "refugees.csv") # if you want to save this dataset to your own folder
ggalluv <- ggplot(Refugees,
aes(x = year, y = refugees, alluvium = country)) + # time series bump chart (quintic flows)
theme_bw() +
geom_alluvium(aes(fill = country),
color = "white",
width = .1,
alpha = .8,
decreasing = FALSE) +
scale_fill_brewer(palette = "Spectral") + # Spectral has enough colors for all countries listed
scale_x_continuous(lim = c(2002, 2013))+
ggtitle("UNHCR-Recognised Refugees \n Top 10 Countries(2003-2013)\n")+ # \n breaks the long title
ylab("Number of Refugees")