This is my first network visualization for a Tidy Tuesday project. It follows the steps outlined on this useful blog:

https://www.jessesadler.com/post/network-analysis-with-r/

Load Packages and Data

library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0       ✔ purrr   0.2.5  
## ✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.2       ✔ stringr 1.3.1  
## ✔ readr   1.3.1       ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'tibble' was built under R version 3.4.4
## Warning: package 'tidyr' was built under R version 3.4.4
## Warning: package 'readr' was built under R version 3.4.4
## Warning: package 'purrr' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'stringr' was built under R version 3.4.4
## ── Conflicts ──────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(igraph)
## Warning: package 'igraph' was built under R version 3.4.4
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(tidygraph)
## Warning: package 'tidygraph' was built under R version 3.4.4
## 
## Attaching package: 'tidygraph'
## The following object is masked from 'package:igraph':
## 
##     groups
## The following object is masked from 'package:stats':
## 
##     filter
library(ggraph)
## Warning: package 'ggraph' was built under R version 3.4.4
library(networkD3)

train.dat <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-02-26/full_trains.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   service = col_character(),
##   departure_station = col_character(),
##   arrival_station = col_character(),
##   comment_cancellations = col_logical(),
##   comment_delays_at_departure = col_logical(),
##   comment_delays_on_arrival = col_character()
## )
## See spec(...) for full column specifications.

Prepare Data for Network Analysis

# create nodes list

departs <- train.dat %>%
  dplyr::distinct(departure_station) %>%
  rename(city = departure_station)

arrives <- train.dat %>%
  dplyr::distinct(arrival_station) %>%
  rename(city = arrival_station)

nodes <- full_join(departs, arrives, by = "city") %>%
  rowid_to_column("id")

# create edge list

per_route <- train.dat %>%
  filter(year == 2018) %>%
  group_by(departure_station, arrival_station) %>%
  summarise(weight = mean(journey_time_avg)) %>%
  ungroup()

head(per_route)
## # A tibble: 6 x 3
##   departure_station   arrival_station    weight
##   <chr>               <chr>               <dbl>
## 1 AIX EN PROVENCE TGV PARIS LYON          186. 
## 2 ANGERS SAINT LAUD   PARIS MONTPARNASSE   94.1
## 3 ANGOULEME           PARIS MONTPARNASSE  128. 
## 4 ANNECY              PARIS LYON          227. 
## 5 ARRAS               PARIS NORD           51.5
## 6 AVIGNON TGV         PARIS LYON          160.
edges <- per_route %>%
  left_join(nodes, by = c("departure_station" = "city")) %>%
  rename(from = id) %>%
  left_join(nodes, by = c("arrival_station" = "city")) %>%
  rename(to = id) 

edges <- select(edges, from, to, weight)

Plot Network Diagram

routes_tidy <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)

ggraph(routes_tidy, layout='graphopt')+
  geom_edge_link(alpha=0.15)+
  geom_node_point()+
  theme_graph()+
  geom_node_text(aes(label = city), repel=TRUE, cex=2.5)

Create Interactive Network Diagram

nodes_d3 <- mutate(nodes, id = id-1)
edges_d3 <- mutate(edges, from = from-1, to = to-1)

forceNetwork(Links = edges_d3, Nodes = nodes_d3, Source = "from", Target = "to",
             NodeID = "city", Group = "id", Value = "weight", opacity = 0.5, 
             fontSize = 20, zoom = TRUE)
## Links is a tbl_df. Converting to a plain data frame.
## Nodes is a tbl_df. Converting to a plain data frame.