This is my first network visualization for a Tidy Tuesday project. It follows the steps outlined on this useful blog:
https://www.jessesadler.com/post/network-analysis-with-r/
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 2.0.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'tibble' was built under R version 3.4.4
## Warning: package 'tidyr' was built under R version 3.4.4
## Warning: package 'readr' was built under R version 3.4.4
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
## ── Conflicts ──────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(igraph)
## Warning: package 'igraph' was built under R version 3.4.4
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(tidygraph)
## Warning: package 'tidygraph' was built under R version 3.4.4
##
## Attaching package: 'tidygraph'
## The following object is masked from 'package:igraph':
##
## groups
## The following object is masked from 'package:stats':
##
## filter
library(ggraph)
## Warning: package 'ggraph' was built under R version 3.4.4
library(networkD3)
train.dat <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-02-26/full_trains.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## service = col_character(),
## departure_station = col_character(),
## arrival_station = col_character(),
## comment_cancellations = col_logical(),
## comment_delays_at_departure = col_logical(),
## comment_delays_on_arrival = col_character()
## )
## See spec(...) for full column specifications.
# create nodes list
departs <- train.dat %>%
dplyr::distinct(departure_station) %>%
rename(city = departure_station)
arrives <- train.dat %>%
dplyr::distinct(arrival_station) %>%
rename(city = arrival_station)
nodes <- full_join(departs, arrives, by = "city") %>%
rowid_to_column("id")
# create edge list
per_route <- train.dat %>%
filter(year == 2018) %>%
group_by(departure_station, arrival_station) %>%
summarise(weight = mean(journey_time_avg)) %>%
ungroup()
head(per_route)
## # A tibble: 6 x 3
## departure_station arrival_station weight
## <chr> <chr> <dbl>
## 1 AIX EN PROVENCE TGV PARIS LYON 186.
## 2 ANGERS SAINT LAUD PARIS MONTPARNASSE 94.1
## 3 ANGOULEME PARIS MONTPARNASSE 128.
## 4 ANNECY PARIS LYON 227.
## 5 ARRAS PARIS NORD 51.5
## 6 AVIGNON TGV PARIS LYON 160.
edges <- per_route %>%
left_join(nodes, by = c("departure_station" = "city")) %>%
rename(from = id) %>%
left_join(nodes, by = c("arrival_station" = "city")) %>%
rename(to = id)
edges <- select(edges, from, to, weight)
routes_tidy <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)
ggraph(routes_tidy, layout='graphopt')+
geom_edge_link(alpha=0.15)+
geom_node_point()+
theme_graph()+
geom_node_text(aes(label = city), repel=TRUE, cex=2.5)
nodes_d3 <- mutate(nodes, id = id-1)
edges_d3 <- mutate(edges, from = from-1, to = to-1)
forceNetwork(Links = edges_d3, Nodes = nodes_d3, Source = "from", Target = "to",
NodeID = "city", Group = "id", Value = "weight", opacity = 0.5,
fontSize = 20, zoom = TRUE)
## Links is a tbl_df. Converting to a plain data frame.
## Nodes is a tbl_df. Converting to a plain data frame.