The data is a scv file that compares number of views, number of comments to various categories of Yau’s visualization creations
library(treemap)
## Warning: package 'treemap' was built under R version 4.0.5
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 4.0.3
data <- read.csv("http://datasets.flowingdata.com/post-data.txt")
head(data)
## id views comments category
## 1 5019 148896 28 Artistic Visualization
## 2 1416 81374 26 Visualization
## 3 1416 81374 26 Featured
## 4 3485 80819 37 Featured
## 5 3485 80819 37 Mapping
## 6 3485 80819 37 Data Sources
This data appears to contain data about 2008 NBA player stats.
nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv")
nba
## Name G MIN PTS FGM FGA FGP FTM FTA FTP X3PM X3PA
## 1 Dwyane Wade 79 38.6 30.2 10.8 22.0 0.491 7.5 9.8 0.765 1.1 3.5
## 2 LeBron James 81 37.7 28.4 9.7 19.9 0.489 7.3 9.4 0.780 1.6 4.7
## 3 Kobe Bryant 82 36.2 26.8 9.8 20.9 0.467 5.9 6.9 0.856 1.4 4.1
## 4 Dirk Nowitzki 81 37.7 25.9 9.6 20.0 0.479 6.0 6.7 0.890 0.8 2.1
## 5 Danny Granger 67 36.2 25.8 8.5 19.1 0.447 6.0 6.9 0.878 2.7 6.7
## 6 Kevin Durant 74 39.0 25.3 8.9 18.8 0.476 6.1 7.1 0.863 1.3 3.1
## 7 Kevin Martin 51 38.2 24.6 6.7 15.9 0.420 9.0 10.3 0.867 2.3 5.4
## 8 Al Jefferson 50 36.6 23.1 9.7 19.5 0.497 3.7 5.0 0.738 0.0 0.1
## 9 Chris Paul 78 38.5 22.8 8.1 16.1 0.503 5.8 6.7 0.868 0.8 2.3
## 10 Carmelo Anthony 66 34.5 22.8 8.1 18.3 0.443 5.6 7.1 0.793 1.0 2.6
## 11 Chris Bosh 77 38.1 22.7 8.0 16.4 0.487 6.5 8.0 0.817 0.2 0.6
## 12 Brandon Roy 78 37.2 22.6 8.1 16.9 0.480 5.3 6.5 0.824 1.1 2.8
## 13 Antawn Jamison 81 38.2 22.2 8.3 17.8 0.468 4.2 5.6 0.754 1.4 3.9
## 14 Tony Parker 72 34.1 22.0 8.9 17.5 0.506 3.9 5.0 0.782 0.3 0.9
## 15 Amare Stoudemire 53 36.8 21.4 7.6 14.1 0.539 6.1 7.3 0.835 0.1 0.1
## 16 Joe Johnson 79 39.5 21.4 7.8 18.0 0.437 3.8 4.6 0.826 1.9 5.2
## 17 Devin Harris 69 36.1 21.3 6.6 15.1 0.438 7.2 8.8 0.820 0.9 3.2
## 18 Michael Redd 33 36.4 21.2 7.5 16.6 0.455 4.0 4.9 0.814 2.1 5.8
## 19 David West 76 39.3 21.0 8.0 17.0 0.472 4.8 5.5 0.884 0.1 0.3
## 20 Zachary Randolph 50 35.1 20.8 8.3 17.5 0.475 3.6 4.9 0.734 0.6 1.9
## 21 Caron Butler 67 38.6 20.8 7.3 16.2 0.453 5.1 6.0 0.858 1.0 3.1
## 22 Vince Carter 80 36.8 20.8 7.4 16.8 0.437 4.2 5.1 0.817 1.9 4.9
## 23 Stephen Jackson 59 39.7 20.7 7.0 16.9 0.414 5.0 6.0 0.826 1.7 5.2
## 24 Ben Gordon 82 36.6 20.7 7.3 16.0 0.455 4.0 4.7 0.864 2.1 5.1
## 25 Dwight Howard 79 35.7 20.6 7.1 12.4 0.572 6.4 10.7 0.594 0.0 0.0
## 26 Paul Pierce 81 37.4 20.5 6.7 14.6 0.457 5.7 6.8 0.830 1.5 3.8
## 27 Al Harrington 73 34.9 20.1 7.3 16.6 0.439 3.2 4.0 0.793 2.3 6.4
## 28 Jamal Crawford 65 38.1 19.7 6.4 15.7 0.410 4.6 5.3 0.872 2.2 6.1
## 29 Yao Ming 77 33.6 19.7 7.4 13.4 0.548 4.9 5.7 0.866 0.0 0.0
## 30 Richard Jefferson 82 35.9 19.6 6.5 14.9 0.439 5.1 6.3 0.805 1.4 3.6
## 31 Jason Terry 74 33.6 19.6 7.3 15.8 0.463 2.7 3.0 0.880 2.3 6.2
## 32 Deron Williams 68 36.9 19.4 6.8 14.5 0.471 4.8 5.6 0.849 1.0 3.3
## 33 Tim Duncan 75 33.7 19.3 7.4 14.8 0.504 4.5 6.4 0.692 0.0 0.0
## 34 Monta Ellis 25 35.6 19.0 7.8 17.2 0.451 3.1 3.8 0.830 0.3 1.0
## 35 Rudy Gay 79 37.3 18.9 7.2 16.0 0.453 3.3 4.4 0.767 1.1 3.1
## 36 Pau Gasol 81 37.1 18.9 7.3 12.9 0.567 4.2 5.4 0.781 0.0 0.0
## 37 Andre Iguodala 82 39.8 18.8 6.6 14.0 0.473 4.6 6.4 0.724 1.0 3.2
## 38 Corey Maggette 51 31.1 18.6 5.7 12.4 0.461 6.7 8.1 0.824 0.5 1.9
## 39 O.J. Mayo 82 38.0 18.5 6.9 15.6 0.438 3.0 3.4 0.879 1.8 4.6
## 40 John Salmons 79 37.5 18.3 6.5 13.8 0.472 3.6 4.4 0.830 1.6 3.8
## 41 Richard Hamilton 67 34.0 18.3 7.0 15.6 0.447 3.3 3.9 0.848 1.0 2.8
## 42 Ray Allen 79 36.3 18.2 6.3 13.2 0.480 3.0 3.2 0.952 2.5 6.2
## 43 LaMarcus Aldridge 81 37.1 18.1 7.4 15.3 0.484 3.2 4.1 0.781 0.1 0.3
## 44 Josh Howard 52 31.9 18.0 6.8 15.1 0.451 3.3 4.2 0.782 1.1 3.2
## 45 Maurice Williams 81 35.0 17.8 6.5 13.9 0.467 2.6 2.8 0.912 2.3 5.2
## 46 Shaquille O'neal 75 30.1 17.8 6.8 11.2 0.609 4.1 6.9 0.595 0.0 0.0
## 47 Rashard Lewis 79 36.2 17.7 6.1 13.8 0.439 2.8 3.4 0.836 2.8 7.0
## 48 Chauncey Billups 79 35.3 17.7 5.2 12.4 0.418 5.3 5.8 0.913 2.1 5.0
## 49 Allen Iverson 57 36.7 17.5 6.1 14.6 0.417 4.8 6.1 0.781 0.5 1.7
## 50 Nate Robinson 74 29.9 17.2 6.1 13.9 0.437 3.4 4.0 0.841 1.7 5.2
## X3PP ORB DRB TRB AST STL BLK TO PF
## 1 0.317 1.1 3.9 5.0 7.5 2.2 1.3 3.4 2.3
## 2 0.344 1.3 6.3 7.6 7.2 1.7 1.1 3.0 1.7
## 3 0.351 1.1 4.1 5.2 4.9 1.5 0.5 2.6 2.3
## 4 0.359 1.1 7.3 8.4 2.4 0.8 0.8 1.9 2.2
## 5 0.404 0.7 4.4 5.1 2.7 1.0 1.4 2.5 3.1
## 6 0.422 1.0 5.5 6.5 2.8 1.3 0.7 3.0 1.8
## 7 0.415 0.6 3.0 3.6 2.7 1.2 0.2 2.9 2.3
## 8 0.000 3.4 7.5 11.0 1.6 0.8 1.7 1.8 2.8
## 9 0.364 0.9 4.7 5.5 11.0 2.8 0.1 3.0 2.7
## 10 0.371 1.6 5.2 6.8 3.4 1.1 0.4 3.0 3.0
## 11 0.245 2.8 7.2 10.0 2.5 0.9 1.0 2.3 2.5
## 12 0.377 1.3 3.4 4.7 5.1 1.1 0.3 1.9 1.6
## 13 0.351 2.4 6.5 8.9 1.9 1.2 0.3 1.5 2.7
## 14 0.292 0.4 2.7 3.1 6.9 0.9 0.1 2.6 1.5
## 15 0.429 2.2 5.9 8.1 2.0 0.9 1.1 2.8 3.1
## 16 0.360 0.8 3.6 4.4 5.8 1.1 0.2 2.5 2.2
## 17 0.291 0.4 2.9 3.3 6.9 1.7 0.2 3.1 2.4
## 18 0.366 0.7 2.5 3.2 2.7 1.1 0.1 1.6 1.4
## 19 0.240 2.1 6.4 8.5 2.3 0.6 0.9 2.1 2.7
## 20 0.330 3.1 6.9 10.1 2.1 0.9 0.3 2.3 2.7
## 21 0.310 1.8 4.4 6.2 4.3 1.6 0.3 3.1 2.5
## 22 0.385 0.9 4.2 5.1 4.7 1.0 0.5 2.1 2.9
## 23 0.338 1.2 3.9 5.1 6.5 1.5 0.5 3.9 2.6
## 24 0.410 0.6 2.8 3.5 3.4 0.9 0.3 2.4 2.2
## 25 0.000 4.3 9.6 13.8 1.4 1.0 2.9 3.0 3.4
## 26 0.391 0.7 5.0 5.6 3.6 1.0 0.3 2.8 2.7
## 27 0.364 1.4 4.9 6.2 1.4 1.2 0.3 2.2 3.1
## 28 0.360 0.4 2.6 3.0 4.4 0.9 0.2 2.3 1.4
## 29 1.000 2.6 7.2 9.9 1.8 0.4 1.9 3.0 3.3
## 30 0.397 0.7 3.9 4.6 2.4 0.8 0.2 2.0 3.1
## 31 0.366 0.5 1.9 2.4 3.4 1.3 0.3 1.6 1.9
## 32 0.310 0.4 2.5 2.9 10.7 1.1 0.3 3.4 2.0
## 33 0.000 2.7 8.0 10.7 3.5 0.5 1.7 2.2 2.3
## 34 0.308 0.6 3.8 4.3 3.7 1.6 0.3 2.7 2.7
## 35 0.351 1.4 4.2 5.5 1.7 1.2 0.7 2.6 2.8
## 36 0.500 3.2 6.4 9.6 3.5 0.6 1.0 1.9 2.1
## 37 0.307 1.1 4.6 5.7 5.3 1.6 0.4 2.7 1.9
## 38 0.253 1.0 4.6 5.5 1.8 0.9 0.2 2.4 3.8
## 39 0.384 0.7 3.1 3.8 3.2 1.1 0.2 2.8 2.5
## 40 0.417 0.7 3.5 4.2 3.2 1.1 0.3 2.1 2.3
## 41 0.368 0.7 2.4 3.1 4.4 0.6 0.1 2.0 2.6
## 42 0.409 0.8 2.7 3.5 2.8 0.9 0.2 1.7 2.0
## 43 0.250 2.9 4.6 7.5 1.9 1.0 1.0 1.5 2.6
## 44 0.345 1.1 3.9 5.1 1.6 1.1 0.6 1.7 2.6
## 45 0.436 0.6 2.9 3.4 4.1 0.9 0.1 2.2 2.7
## 46 0.000 2.5 5.9 8.4 1.7 0.7 1.4 2.2 3.4
## 47 0.397 1.2 4.6 5.7 2.6 1.0 0.6 2.0 2.5
## 48 0.408 0.4 2.6 3.0 6.4 1.2 0.2 2.2 2.0
## 49 0.283 0.5 2.5 3.0 5.0 1.5 0.1 2.6 1.5
## 50 0.325 1.3 2.6 3.9 4.1 1.3 0.1 1.9 2.8
nba <- nba[order(nba$PTS),] # that does , mean?
row.names(nba) <- nba$Name
nba <- nba[,2:19]
nba_matrix <- data.matrix(nba)
nba_heatmap <- heatmap(nba_matrix, Rowv= NA, Colv = NA,
col = cm.colors(256), scale="column", margins = c(5,10),
xlab = "NBA Player Stats",
ylab = "NBA Player",
main = "NBA Player Stats in 2008")
nba_heatmap <- heatmap(nba_matrix, Rowv= NA, Colv = NA,
col = heat.colors(256), scale="column",
margins = c(5,10),
xlab = "NBA Player Stats",
ylab = "NBA Player",
main = "NBA Player Stats in 2008")
Try using direction = -1 and then try removing it; you will see that it reverses the heatmap levels.
library(viridis)
## Warning: package 'viridis' was built under R version 4.0.5
## Loading required package: viridisLite
## Warning: package 'viridisLite' was built under R version 4.0.5
nba_heatmap <- heatmap(nba_matrix, Rowv=NA, Colv = NA,
col = viridis(25, direction = -1),
scale="column", margins=c(5,10),
xlab = "NBA Player Stats",
ylab = "NBA Players",
main = "NBA Player Stats in 2008")
library(treemap)
library(tidyverse)
library(RColorBrewer)
data <- read.csv("http://datasets.flowingdata.com/post-data.txt")
head(data)
## id views comments category
## 1 5019 148896 28 Artistic Visualization
## 2 1416 81374 26 Visualization
## 3 1416 81374 26 Featured
## 4 3485 80819 37 Featured
## 5 3485 80819 37 Mapping
## 6 3485 80819 37 Data Sources
treemap(data, index = "category", vSize = "views", vColor = "comments", type ="value", palette = "RdYlBu" )
**** I cannot run the treemap function in the chunk. But it worked in other console. I don’t know why.
treemap(data,index ="category", vSize = "views", vColor ="comments", type = "manual", palette= "RdYlBu")
# install.packages("nycflights13")
library(nycflights13)
## Warning: package 'nycflights13' was built under R version 4.0.5
library(RColorBrewer)
flights <- flights
view(flights)
summary(flights)
## year month day dep_time sched_dep_time
## Min. :2013 Min. : 1.000 Min. : 1.00 Min. : 1 Min. : 106
## 1st Qu.:2013 1st Qu.: 4.000 1st Qu.: 8.00 1st Qu.: 907 1st Qu.: 906
## Median :2013 Median : 7.000 Median :16.00 Median :1401 Median :1359
## Mean :2013 Mean : 6.549 Mean :15.71 Mean :1349 Mean :1344
## 3rd Qu.:2013 3rd Qu.:10.000 3rd Qu.:23.00 3rd Qu.:1744 3rd Qu.:1729
## Max. :2013 Max. :12.000 Max. :31.00 Max. :2400 Max. :2359
## NA's :8255
## dep_delay arr_time sched_arr_time arr_delay
## Min. : -43.00 Min. : 1 Min. : 1 Min. : -86.000
## 1st Qu.: -5.00 1st Qu.:1104 1st Qu.:1124 1st Qu.: -17.000
## Median : -2.00 Median :1535 Median :1556 Median : -5.000
## Mean : 12.64 Mean :1502 Mean :1536 Mean : 6.895
## 3rd Qu.: 11.00 3rd Qu.:1940 3rd Qu.:1945 3rd Qu.: 14.000
## Max. :1301.00 Max. :2400 Max. :2359 Max. :1272.000
## NA's :8255 NA's :8713 NA's :9430
## carrier flight tailnum origin
## Length:336776 Min. : 1 Length:336776 Length:336776
## Class :character 1st Qu.: 553 Class :character Class :character
## Mode :character Median :1496 Mode :character Mode :character
## Mean :1972
## 3rd Qu.:3465
## Max. :8500
##
## dest air_time distance hour
## Length:336776 Min. : 20.0 Min. : 17 Min. : 1.00
## Class :character 1st Qu.: 82.0 1st Qu.: 502 1st Qu.: 9.00
## Mode :character Median :129.0 Median : 872 Median :13.00
## Mean :150.7 Mean :1040 Mean :13.18
## 3rd Qu.:192.0 3rd Qu.:1389 3rd Qu.:17.00
## Max. :695.0 Max. :4983 Max. :23.00
## NA's :9430
## minute time_hour
## Min. : 0.00 Min. :2013-01-01 05:00:00
## 1st Qu.: 8.00 1st Qu.:2013-04-04 13:00:00
## Median :29.00 Median :2013-07-03 10:00:00
## Mean :26.23 Mean :2013-07-03 05:22:54
## 3rd Qu.:44.00 3rd Qu.:2013-10-01 07:00:00
## Max. :59.00 Max. :2013-12-31 23:00:00
##
# nycflights13::flights
by_tailnum <- group_by(flights, tailnum)
delay <- summarise(by_tailnum,
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE))
delay <- filter(delay, count > 20, dist < 2000)
# Interestingly, the average delay is only slightly related to the average distance flown by a plane.
ggplot(delay, aes(dist, delay)) +
geom_point(aes(size = count), alpha = 1/2) +
geom_smooth() +
scale_size_area()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Use RColorBrewer to change the palette to RdYlBu
devtools::install_github("hrbrmstr/streamgraph")
## Skipping install of 'streamgraph' from a github remote, the SHA1 (76f7173e) has not changed since last install.
## Use `force = TRUE` to force installation
#install.packages("babynames")
#devtools::session_info()
library(dplyr)
library(streamgraph)
library(babynames)
## Warning: package 'babynames' was built under R version 4.0.5
# Create data:
year=rep(seq(1990,2016) , each = 10) # repeat each year from 1990 to 2016 10 times
name=rep(letters[1:10] , 27) # repeat the first 10 alphabet letters 27 times
value=sample(seq(0, 1, 0.0001) , length(year)) # sample 270 values from 0.0000 to 1.0000
data=data.frame(year, name, value)
# Basic stream graph: just give the 3 arguments
streamgraph(data, key="name", value="value", date="year")
ncol(babynames)
## [1] 5
head(babynames)
## # A tibble: 6 x 5
## year sex name n prop
## <dbl> <chr> <chr> <int> <dbl>
## 1 1880 F Mary 7065 0.0724
## 2 1880 F Anna 2604 0.0267
## 3 1880 F Emma 2003 0.0205
## 4 1880 F Elizabeth 1939 0.0199
## 5 1880 F Minnie 1746 0.0179
## 6 1880 F Margaret 1578 0.0162
str(babynames)
## tibble [1,924,665 x 5] (S3: tbl_df/tbl/data.frame)
## $ year: num [1:1924665] 1880 1880 1880 1880 1880 1880 1880 1880 1880 1880 ...
## $ sex : chr [1:1924665] "F" "F" "F" "F" ...
## $ name: chr [1:1924665] "Mary" "Anna" "Emma" "Elizabeth" ...
## $ n : int [1:1924665] 7065 2604 2003 1939 1746 1578 1472 1414 1320 1288 ...
## $ prop: num [1:1924665] 0.0724 0.0267 0.0205 0.0199 0.0179 ...
Mouse over the colors and years to look at the pattern of various names
babynames %>%
filter(grepl(pattern = "^Ra" , name)) %>% # grepl = grep logical
group_by(year, name) %>% # row number 27,095 decrease to 24,620. Why?
tally(n) %>% # tally() = df %>% summarise(n = n())
streamgraph("name","n","year")
Load the alluvial package
# install.packages("alluvial")
library(alluvial)
## Warning: package 'alluvial' was built under R version 4.0.5
If you want to save the prebuilt dataset to your folder, use the write_csv function
# install.packages("ggalluvial")
library(ggalluvial)
## Warning: package 'ggalluvial' was built under R version 4.0.5
Refugee <- Refugees
write_csv(Refugees, "refugees.csv")
For help understanding the package ggalluvial, in a chunk, use the syntax:
?ggalluvial
## starting httpd help server ... done
Refugees <- Refugees
library(ggalluvial)
ggalluv <- ggplot(Refugees, aes(x = year, y = refugees, alluvium = country)) +
theme_bw() +
geom_alluvium(aes(fill = country),
width = .1, alpha = .75, decreasing = F, curve_type = "quintic") + # I tried the different curve type but the 'sigmoid' looks better.
scale_fill_brewer(palette = "Spectral") +
scale_x_continuous(lim = c(2002,2013)) +
ggtitle("UNHCR-Recognised Refugees\nTop 10 Countries (2003-2013)\n") +
ylab("Number of Refugees")
ggalluv
Code from “https://corybrunson.github.io/ggalluvial/reference/geom_alluvium.html”
ggplot(transform(alluvial::Refugees, id = 1),
aes(y = refugees, x = year, alluvium = id)) +
facet_wrap(~ country) +
geom_alluvium(aes(fill = country), alpha = .75) +
scale_x_continuous(breaks = seq(2004, 2012, 4))
## Warning in f(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.