First, let set use the following library.

Simple Functionality

You can create a simple graph by using create_*() and a random simple graph by using play_*(). The baseR plot() function also works on this object.

#Create
create_ring(8)
## # A tbl_graph: 8 nodes and 8 edges
## #
## # An undirected simple graph with 1 component
## #
## # Node Data: 8 x 0 (active)
## #
## # Edge Data: 8 x 2
##    from    to
##   <int> <int>
## 1     1     2
## 2     2     3
## 3     3     4
## # ... with 5 more rows
plot(create_ring(8))

#Play
set.seed(13)
play_erdos_renyi(n = 8,m=13)
## # A tbl_graph: 8 nodes and 13 edges
## #
## # A directed simple graph with 1 component
## #
## # Node Data: 8 x 0 (active)
## #
## # Edge Data: 13 x 2
##    from    to
##   <int> <int>
## 1     1     6
## 2     2     3
## 3     3     4
## # ... with 10 more rows
plot(play_erdos_renyi(n = 8,m=13,directed=F))

play_erdos_renyi(n=8,p=0.2,directed = F)
## # A tbl_graph: 8 nodes and 5 edges
## #
## # An undirected simple graph with 3 components
## #
## # Node Data: 8 x 0 (active)
## #
## # Edge Data: 5 x 2
##    from    to
##   <int> <int>
## 1     1     3
## 2     3     4
## 3     3     5
## # ... with 2 more rows
plot(play_erdos_renyi(n=8,p=0.2,directed=F))

Here I use media dataset to demonstrate the tidygraph and ggraph packages. You can create graph object using tbl_graph by providing nodes and edges dataset.

media.edge<-read.csv("data/Dataset1-Media-Example-EDGES.csv")
media.node<-read.csv("data/Dataset1-Media-Example-NODES.csv")

media<-tbl_graph(media.node,media.edge)
media
## # A tbl_graph: 17 nodes and 52 edges
## #
## # A directed multigraph with 1 component
## #
## # Node Data: 17 x 5 (active)
##   id    media               media.type type.label audience.size
##   <fct> <fct>                    <int> <fct>              <int>
## 1 s01   NY Times                     1 Newspaper             20
## 2 s02   Washington Post              1 Newspaper             25
## 3 s03   Wall Street Journal          1 Newspaper             30
## 4 s04   USA Today                    1 Newspaper             32
## 5 s05   LA Times                     1 Newspaper             20
## 6 s06   New York Post                1 Newspaper             50
## # ... with 11 more rows
## #
## # Edge Data: 52 x 4
##    from    to weight type     
##   <int> <int>  <int> <fct>    
## 1     1     2     10 hyperlink
## 2     1     2     12 hyperlink
## 3     1     3     22 hyperlink
## # ... with 49 more rows
plot(media)

There is activate() function to access the nodes or the edges dataset. Since tidygraph was built under dplyr environment, any functionality in dplyr package is also can be accessed.

media %>% activate(edges) %>% as_tibble()
## # A tibble: 52 x 4
##     from    to weight type     
##    <int> <int>  <int> <fct>    
##  1     1     2     10 hyperlink
##  2     1     2     12 hyperlink
##  3     1     3     22 hyperlink
##  4     1     4     21 hyperlink
##  5     4     5     22 mention  
##  6     6     7     21 mention  
##  7     8     9     21 mention  
##  8    10    11     11 mention  
##  9    10    11     12 mention  
## 10     3     4     22 hyperlink
## # ... with 42 more rows
media %>% activate(nodes) %>% as_tibble()
## # A tibble: 17 x 5
##    id    media               media.type type.label audience.size
##    <fct> <fct>                    <int> <fct>              <int>
##  1 s01   NY Times                     1 Newspaper             20
##  2 s02   Washington Post              1 Newspaper             25
##  3 s03   Wall Street Journal          1 Newspaper             30
##  4 s04   USA Today                    1 Newspaper             32
##  5 s05   LA Times                     1 Newspaper             20
##  6 s06   New York Post                1 Newspaper             50
##  7 s07   CNN                          2 TV                    56
##  8 s08   MSNBC                        2 TV                    34
##  9 s09   FOX News                     2 TV                    60
## 10 s10   ABC                          2 TV                    23
## 11 s11   BBC                          2 TV                    34
## 12 s12   Yahoo News                   3 Online                33
## 13 s13   Google News                  3 Online                23
## 14 s14   Reuters.com                  3 Online                12
## 15 s15   NYTimes.com                  3 Online                24
## 16 s16   WashingtonPost.com           3 Online                28
## 17 s17   AOL.com                      3 Online                33
media_list<-media %>% activate(nodes) %>% pull(media)
media_list
##  [1] NY Times            Washington Post     Wall Street Journal
##  [4] USA Today           LA Times            New York Post      
##  [7] CNN                 MSNBC               FOX News           
## [10] ABC                 BBC                 Yahoo News         
## [13] Google News         Reuters.com         NYTimes.com        
## [16] WashingtonPost.com  AOL.com            
## 17 Levels: ABC AOL.com BBC CNN FOX News Google News LA Times ... Yahoo News

Here is some example of how you subset the graph based on certain category.

media_h <- media %>% activate(edges) %>% filter(type=="hyperlink") 
plot(media_h)

media_m <- media %>% activate(edges) %>% filter(type=="mention")
plot(media_m)

Lastly, you can also join and bind some graph. See the results below to observe about the difference between graph_join() and bind_graphs().

set.seed(100)
media_m %>% graph_join(media_h) %>% plot()
## Joining, by = c("id", "media", "media.type", "type.label", "audience.size")

media %>% plot()

media_m %>% plot()

media_h %>% plot()

media_m %>% bind_graphs(media_h) %>% plot()

Ggraph Package

From now and so on, I just use media_m as a dataset. ggraph package uses some similar property as in igraph package and same grammar as in ggplot package. You can access graph layout from igraph to be implemented in ggraph. The layout list can be viewed by typing layout_with_ (and click ‘tab’ button) in igraph package. Once you decide to use layout_with_* from igraph in ggraph, you can just add layout=* as input in ggraph function.

The standard graph visualization is as follows.

media_m %>%
  ggraph(layout = "kk") +
    geom_node_point() +
    geom_edge_link() 

Here, you can adjust the node and edge style using some provided style in ggraph by using geom_node_* and geom_edge_*.

media_m %>%
  ggraph(layout = "gem") +
    geom_node_point() +
    geom_edge_diagonal() 

Furthermore, there are some visualization techniques, using aes() such as in ggplot(). You can also use theme here.

media_m %>%
  ggraph(layout = "sugiyama") +
    geom_node_text(aes(label = media, color = type.label), size=3) +     geom_edge_diagonal(color = "gray", alpha = 0.4) +theme_graph()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Notice that the direction/arrow of edges are not appear, so the graph is interpreted as undirected graph. You can just add arrow() attribute in order to make it appears.

set.seed(100) 
media_m %>% 
ggraph(layout = 'graphopt') + 
    geom_edge_link(arrow = arrow(length = unit(2, 'mm')), 
                   end_cap = circle(3, 'mm')) + 
    geom_node_point(size = 3) +  geom_node_text(aes(label = media, color = type.label), size=3,repel = T) + theme_graph()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Facetting graph using some category is also possible.

media_m1<-media_m %>% activate(nodes)%>%  mutate(graph="Mention")
media_h1<-media_h %>% activate(nodes)%>%  mutate(graph="Hyperlink")

ggraph(bind_graphs(media_m1,media_h1), layout = 'nicely') + 
    geom_edge_link(arrow = arrow(length = unit(2, 'mm')), 
                   end_cap = circle(3, 'mm')) + 
    geom_node_point(size = 3) +  geom_node_text(aes(label = media, color = type.label), size=3,repel = T) +
    facet_nodes(~graph) + 
    theme_graph(foreground = 'steelblue')
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Graph Manipulation

Here, morph() function is introduced. This function play a crucial role in graph manipulation, it provides us with some graph manipulation techniques that can be applied without changing the graph structure. For example, here I demonstrate how to find the shortest path between “Reuters.com” and “FOX News”.

from <- which(media_list == "Reuters.com")
to <-  which(media_list == "FOX News")

shortest <- media_m %>%
  morph(to_shortest_path, from, to, weights = weight)

#example of unmorph() to create new tbl_graph
shortest %>%
  mutate(selected_node = TRUE) %>%
  unmorph()
## # A tbl_graph: 17 nodes and 21 edges
## #
## # A directed multigraph with 3 components
## #
## # Edge Data: 21 x 5 (active)
##    from    to weight type    selected_node
##   <int> <int>  <int> <fct>   <lgl>        
## 1     4     5     22 mention NA           
## 2     6     7     21 mention NA           
## 3     8     9     21 mention NA           
## 4    10    11     11 mention NA           
## 5    10    11     12 mention NA           
## 6     1     7     20 mention NA           
## # ... with 15 more rows
## #
## # Node Data: 17 x 5
##   id    media               media.type type.label audience.size
##   <fct> <fct>                    <int> <fct>              <int>
## 1 s01   NY Times                     1 Newspaper             20
## 2 s02   Washington Post              1 Newspaper             25
## 3 s03   Wall Street Journal          1 Newspaper             30
## # ... with 14 more rows
shortest <- shortest %>%
  activate(nodes) %>% 
  mutate(selected_node = TRUE) %>%
  activate(edges) %>%
  mutate(selected_edge = TRUE) %>%
  unmorph() 

shortest %>% activate(edges) %>% as_tibble()
## # A tibble: 21 x 5
##     from    to weight type    selected_edge
##    <int> <int>  <int> <fct>   <lgl>        
##  1     4     5     22 mention NA           
##  2     6     7     21 mention NA           
##  3     8     9     21 mention NA           
##  4    10    11     11 mention NA           
##  5    10    11     12 mention NA           
##  6     1     7     20 mention NA           
##  7    12     9     21 mention NA           
##  8    11    13     21 mention NA           
##  9    10    14     21 mention NA           
## 10    14    10     22 mention NA           
## # ... with 11 more rows
shortest %>% activate(nodes) %>% as_tibble()
## # A tibble: 17 x 6
##    id    media            media.type type.label audience.size selected_node
##    <fct> <fct>                 <int> <fct>              <int> <lgl>        
##  1 s01   NY Times                  1 Newspaper             20 NA           
##  2 s02   Washington Post           1 Newspaper             25 NA           
##  3 s03   Wall Street Jou~          1 Newspaper             30 NA           
##  4 s04   USA Today                 1 Newspaper             32 NA           
##  5 s05   LA Times                  1 Newspaper             20 NA           
##  6 s06   New York Post             1 Newspaper             50 NA           
##  7 s07   CNN                       2 TV                    56 NA           
##  8 s08   MSNBC                     2 TV                    34 NA           
##  9 s09   FOX News                  2 TV                    60 TRUE         
## 10 s10   ABC                       2 TV                    23 NA           
## 11 s11   BBC                       2 TV                    34 NA           
## 12 s12   Yahoo News                3 Online                33 NA           
## 13 s13   Google News               3 Online                23 NA           
## 14 s14   Reuters.com               3 Online                12 TRUE         
## 15 s15   NYTimes.com               3 Online                24 NA           
## 16 s16   WashingtonPost.~          3 Online                28 TRUE         
## 17 s17   AOL.com                   3 Online                33 TRUE
#Label the NA with 1 and True with 2, then order them
shortest <- shortest %>%
  activate(nodes) %>%
  mutate(selected_node = ifelse(is.na(selected_node), 1, 2)) %>%
  activate(edges) %>%
  mutate(selected_edge = ifelse(is.na(selected_edge), 1, 2)) %>%
  arrange(selected_edge)

shortest %>%
ggraph(layout = 'graphopt') + 
    geom_edge_link(aes(alpha=selected_edge),color="gray", arrow = arrow(length = unit(2, 'mm')), 
                   end_cap = circle(3, 'mm')) + 
    geom_node_point(size = 3) +  geom_node_text(aes(label = media, color = type.label), size=3,repel = T) 

Additional: Centrality measurement There are some measurements of centrality in graph, some of them are provided in tidygraph. They can ne accessed through centrality_* when you ‘activate’ nodes of the graph.

For instance, centrality_betweeness() measure the centrality of every nodes by calculating how many shortest path containing that node in the network. This can be visualized to give you information about how important each node in the term of graph/network connectivity.

set.seed(100)
media_m %>%
  activate(nodes) %>%
  mutate(centrality = centrality_betweenness()) %>% 
  ggraph(layout = "graphopt") + 
  geom_edge_link(width = 1, colour = "lightgray") +
  geom_node_point(aes(size = centrality, colour = centrality)) +
  geom_node_text(aes(label = media), repel = TRUE)+
  scale_color_gradient(low = "yellow", high = "red")+
  theme_graph()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Reference: