Yeast Protein Interaction Network

Loading Packages

library(tidyverse)

library(igraph)

library(igraphdata)

library(ggraph)

library(latex2exp)

Loading the Data Set

Data from igraphdata::yeast

Data Source: von Mering, C., Krause, R., Snel, B. et al. Comparative assessment of large-scale data sets of protein–protein interactions. Nature 417, 399–403 (2002). https://doi.org/10.1038/nature750

data(yeast)

g <- yeast
V(g)
## + 2617/2617 vertices, named, from 65c41bb:
##    [1] YLR197W   YOR039W   YDR473C   YOR332W   YER090W   YDR394W   YER021W  
##    [8] YPR029C   YIL106W   YKL166C   YGL026C   YOR061W   YGL115W   YGL049C  
##   [15] YDL140C   YLR291C   YGR158C   YDR328C   YOL094C   YDR460W   YBR154C  
##   [22] YOR116C   YIL062C   YPR010C   YER027C   YPL093W   YER006W   YFR052W  
##   [29] YOR261C   YHR052W   YDR280W   YOR260W   YMR193W   YGR162W   YPR187W  
##   [36] YDR101C   YOL041C   YHR197W   YBL045C   YOR207C   YPL259C   YLL008W  
##   [43] YPL043W   YGL220W   YOR117W   YOR310C   YBL046W   YNL002C   YBR126C  
##   [50] YKL014C   YCR077C   YLR208W   YHR107C   YPL012W   YGL237C   YOR341W  
##   [57] YPL203W   YMR049C   YLR409C   YMR146C   YMR061W   YIL112W   YJL203W  
##   [64] YLR222C   YBR251W   YDL160C   YLR370C   YLL036C   YBR135W   YIL021W  
## + ... omitted several vertices
E(g)
## + 11855/11855 edges from 65c41bb (vertex names):
##  [1] YLR197W--YDL014W YOR039W--YOR061W YDR473C--YPR178W YOR332W--YLR447C
##  [5] YER090W--YKL211C YDR394W--YGR232W YER021W--YPR108W YPR029C--YKL135C
##  [9] YIL106W--YGR092W YKL166C--YIL033C YGL026C--YKL211C YOR061W--YGL019W
## [13] YGL115W--YER027C YGL049C--YGR162W YDR394W--YOR117W YDL140C--YML010W
## [17] YLR291C--YKR026C YGR158C--YDL111C YDR328C--YDL132W YOL094C--YNL290W
## [21] YDR460W--YPR025C YBR154C--YOR341W YBR154C--YOR116C YIL062C--YKL013C
## [25] YBR154C--YOR207C YBR154C--YPR010C YER027C--YDR477W YLR291C--YGR083C
## [29] YPL093W--YDR496C YER006W--YMR049C YER006W--YMR290C YFR052W--YHR200W
## [33] YOR261C--YFR004W YHR052W--YDR496C YDL140C--YBR154C YDR394W--YOR259C
## [37] YDR280W--YGR195W YOR260W--YDR211W YMR193W--YML009C YGR162W--YOL139C
## + ... omitted several edges
components(g)$no
## [1] 92
components(g)$csize
##  [1] 2375    3    5    5    7    6    4    3    5    2    7    2    2    7    3
## [16]    2    2    4    2    2    2    2    2    2    2    2    2    2    2    2
## [31]    2    4    2    2    2    2    2    5    2    2    3    2    2    3    2
## [46]    2    5    3    3    2    2    2    2    3    3    2    2    2    2    2
## [61]    2    2    2    2    2    2    2    2    2    2    2    3    5    3    3
## [76]    4    2    2    2    2    2    3    2    2    4    2    2    2    2    2
## [91]    2    2
glimpse(vertex_attr(g))
## List of 3
##  $ name       : chr [1:2617] "YLR197W" "YOR039W" "YDR473C" "YOR332W" ...
##  $ Class      : chr [1:2617] "T" "T" "T" "O" ...
##  $ Description: chr [1:2617] "SIK1 involved in pre-rRNA processing" "CKB2 casein kinase II beta' chain" "PRP3 essential splicing factor" "VMA4 H+-ATPase V1 domain 27 KD subunit, vacuolar" ...
glimpse(edge_attr(g))
## List of 1
##  $ Confidence: chr [1:11855] "high" "high" "high" "high" ...
vertex_attr(g, name = 'Class')[1:10]
##  [1] "T" "T" "T" "O" "G" "F" "F" "O" "D" "C"
edge_attr(g, name = 'Confidence')[1:10]
##  [1] "high" "high" "high" "high" "high" "high" "high" "high" "high" "high"

Visualizing the Yeast Network

set.seed(42)

ggraph(g, layout = 'lgl') + 
  geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) + 
  geom_node_point(color = 'dark red', size = 1, alpha = 0.75) + 
  theme_graph(base_family = 'Helvetica') +
  labs(title = 'Yeast Interaction Network',
       subtitle = 'Displayed Using Layout Generator for Larger Graphs')

set.seed(42)

ggraph(g, layout = 'drl') + 
  geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) + 
  geom_node_point(color = 'dark red', size = 1, alpha = 0.75) + 
  theme_graph(base_family = 'Helvetica') +
  labs(title = 'Yeast Interaction Network',
       subtitle = 'Displayed Using Distributed Recursive Layout')

set.seed(42)

ggraph(g, layout = 'mds') + 
  geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) + 
  geom_node_point(color = 'dark red', size = 1, alpha = 0.75) + 
  theme_graph(base_family = 'Helvetica') +
  labs(title = 'Yeast Interaction Network',
       subtitle = 'Displayed Using Multidimensional Scaling Layout')

Summary Statistics of the Yeast Network

suppressMessages(df <- bind_cols(enframe(eccentricity(g)), 
                                 enframe(betweenness(g)), 
                                 enframe(degree(g)), 
                                 enframe(transitivity(g, type = c('local')))))

df <- df %>% select(name...1, value...2, value...4, value...6, value...8)

names(df) <- c('name', 'eccentricity', 'betweenness', 'degree', 'clustering')

head(df)
tail(df)
glimpse(df)
## Rows: 2,617
## Columns: 5
## $ name         <chr> "YLR197W", "YOR039W", "YDR473C", "YOR332W", "YER090W", "Y…
## $ eccentricity <dbl> 9, 9, 11, 10, 9, 8, 9, 14, 13, 9, 10, 9, 11, 9, 10, 10, 9…
## $ betweenness  <dbl> 4522.729086, 1274.738111, 12293.315437, 2009.662274, 9015…
## $ degree       <dbl> 40, 19, 9, 13, 21, 37, 21, 5, 6, 2, 15, 23, 5, 24, 43, 13…
## $ clustering   <dbl> 0.48333333, 0.69005848, 0.77777778, 0.57692308, 0.1952381…
df %>% 
  summarize(avg_deg = mean(degree), 
            delta = max(degree), 
            prop = sum(degree <= avg_deg) / n(), 
            diam = max(eccentricity),
            radius = min(eccentricity),
            avg_cc = mean(clustering, na.rm = TRUE),
            avg_distance = mean_distance(g, directed = FALSE, unconnected = TRUE))
(d <- mean_distance(g, directed = FALSE, unconnected = TRUE))
## [1] 5.095629
mean(distances(g))
## [1] Inf

Fig. 2.18(a) on p. 66

distance_table(g)
## $res
##  [1]  11855  67910 276506 670068 786151 550965 274661 114390  43508  16152
## [11]   5473   1378    306     56     16
## 
## $unconnected
## [1] 603641
D <- data.frame(1:length(distance_table(g)$res), 
                distance_table(g)$res / sum(distance_table(g)$res))

names(D) <- c('x', 'y')

D %>% 
  ggplot(aes(x = x, y = y)) + 
  geom_point() + 
  geom_line(aes(x = d), color = 'blue') +
  labs(title = 'Distribution of Distance (Proportions) in the Yeast Network') +
  labs(x = 'distance', y = 'density')

The Degree Distribution

df %>% 
  ggplot(aes(x = degree, y = ..density..)) + 
  geom_density(fill = 'red') +
  labs(title = 'KDE of Degrees in the Yeast Network')

df %>% 
  ggplot(aes(x = degree, y = ..density..)) + 
  geom_histogram(binwidth = 1, fill = 'blue') +
  labs(title = 'Histogram of Degrees in the Yeast Network')

df %>% 
  filter(degree <= 20) %>% 
  ggplot(aes(x = degree, y = ..density..)) + 
  geom_density(fill = 'red') + 
  labs(title = 'KDE of Degrees in the Yeast Network',
       subtitle = TeX('for Nodes with Degree $\\leq 20$'))

df %>% 
  filter(degree <= 20) %>% 
  ggplot(aes(x = degree, y = ..density..)) + 
  geom_histogram(binwidth = 1, fill = 'blue') +
  labs(title = 'Histogram of Degrees in the Yeast Network',
       subtitle = TeX('for Nodes with Degree $\\leq 20$'))

Fig. 2.18(b) on p. 66

df %>% 
  group_by(degree) %>% 
  summarise(cc_deg = mean(clustering, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = degree, y = cc_deg)) + 
  geom_point(na.rm = TRUE, color = 'blue') + 
  scale_x_log10() + 
  scale_y_log10() +
  labs(title = 'Relation Between Local Clustering Coefficient and Degree',
       subtitle = 'in the Yeast Network') +
  labs(x = TeX('$p_k$'), y = TeX('$C_k$'))

Local Clustering Coefficient Distribution

df %>% 
  ggplot(aes(x = clustering, y = ..density..)) + 
  geom_density(fill = 'red', na.rm = TRUE) +
  labs(title = 'KDE of Local Clustering Coefficients in the Yeast Network')

df %>% 
ggplot(aes(x = clustering, y = ..density..)) + 
  geom_histogram(binwidth = .1, fill = 'blue', na.rm = TRUE) +
  labs(title = 'Histogram of Local Clustering Coefficients in the Yeast Network')

log(gorder(g)) / log(mean(df$degree))
## [1] 3.570896
mean_distance(g, directed = FALSE, unconnected = TRUE)
## [1] 5.095629
diameter(g)
## [1] 15
C <- mean(df$clustering, na.rm = TRUE)

M <- mean(df$degree)

df %>%
  group_by(degree) %>% 
  summarise(cc_deg = mean(clustering)) %>%
  ungroup()

Fig. 3.13(d) on p. 96

df %>% 
  group_by(degree) %>% 
  summarise(cc_deg = mean(clustering)) %>% 
  ggplot(aes(x = degree, y = cc_deg)) + 
  geom_point(na.rm = TRUE, color = 'blue') + 
  geom_line(aes(y = C), color = 'blue') + 
  geom_line(aes(y = M / gorder(g)), color = 'red') + 
  scale_x_log10() + 
  scale_y_log10() + 
  labs(title =  'Relation Between Local Clustering Coefficient and Degree',
       subtitle = 'The blue line is the average local clustering coefficient; \nthe red one is the one predicted by the random model.') +
  labs(x = 'k', y = TeX('$C(k)$'))

Visualizing Other Relations with Degree

df %>% 
  ggplot(aes(x = degree, y = betweenness)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'red') + 
  labs(title = 'Relationship Between Betweenness Centrality and Degree')

df %>% 
  ggplot(aes(x = degree, y = betweenness + 0.00000001)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'red') + 
  scale_y_log10() +
  labs(title = TeX('Relationship Between $\\log_{10}$ of Betweenness Centrality and Degree')) +
  labs(y = '$\\log_{10}$(betweenness)')

df %>% 
  filter(betweenness > 0) %>% 
  ggplot(aes(x = degree, y = betweenness)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'red') + 
  scale_y_log10() +
  labs(title = TeX('Relationship Between $\\log_{10}$ of Betweenness Centrality and Degree')) +
  labs(y = TeX('$\\log_{10}$(betweenness)'))

df %>% 
  ggplot(aes(x = degree, y = eccentricity)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'orange') + 
  labs(title = 'Relationship Between Eccentricity and Degree')

df %>% 
  ggplot(aes(x = degree, y = clustering)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'blue') +
  labs(title = 'Relationship Between Local Clustering Coefficient and Degree')

References