library(tidyverse)
library(igraph)
library(igraphdata)
library(ggraph)
library(latex2exp)
igraphdata::yeastData Source: von Mering, C., Krause, R., Snel, B. et al. Comparative assessment of large-scale data sets of protein–protein interactions. Nature 417, 399–403 (2002). https://doi.org/10.1038/nature750
data(yeast)
g <- yeast
V(g)
## + 2617/2617 vertices, named, from 65c41bb:
## [1] YLR197W YOR039W YDR473C YOR332W YER090W YDR394W YER021W
## [8] YPR029C YIL106W YKL166C YGL026C YOR061W YGL115W YGL049C
## [15] YDL140C YLR291C YGR158C YDR328C YOL094C YDR460W YBR154C
## [22] YOR116C YIL062C YPR010C YER027C YPL093W YER006W YFR052W
## [29] YOR261C YHR052W YDR280W YOR260W YMR193W YGR162W YPR187W
## [36] YDR101C YOL041C YHR197W YBL045C YOR207C YPL259C YLL008W
## [43] YPL043W YGL220W YOR117W YOR310C YBL046W YNL002C YBR126C
## [50] YKL014C YCR077C YLR208W YHR107C YPL012W YGL237C YOR341W
## [57] YPL203W YMR049C YLR409C YMR146C YMR061W YIL112W YJL203W
## [64] YLR222C YBR251W YDL160C YLR370C YLL036C YBR135W YIL021W
## + ... omitted several vertices
E(g)
## + 11855/11855 edges from 65c41bb (vertex names):
## [1] YLR197W--YDL014W YOR039W--YOR061W YDR473C--YPR178W YOR332W--YLR447C
## [5] YER090W--YKL211C YDR394W--YGR232W YER021W--YPR108W YPR029C--YKL135C
## [9] YIL106W--YGR092W YKL166C--YIL033C YGL026C--YKL211C YOR061W--YGL019W
## [13] YGL115W--YER027C YGL049C--YGR162W YDR394W--YOR117W YDL140C--YML010W
## [17] YLR291C--YKR026C YGR158C--YDL111C YDR328C--YDL132W YOL094C--YNL290W
## [21] YDR460W--YPR025C YBR154C--YOR341W YBR154C--YOR116C YIL062C--YKL013C
## [25] YBR154C--YOR207C YBR154C--YPR010C YER027C--YDR477W YLR291C--YGR083C
## [29] YPL093W--YDR496C YER006W--YMR049C YER006W--YMR290C YFR052W--YHR200W
## [33] YOR261C--YFR004W YHR052W--YDR496C YDL140C--YBR154C YDR394W--YOR259C
## [37] YDR280W--YGR195W YOR260W--YDR211W YMR193W--YML009C YGR162W--YOL139C
## + ... omitted several edges
components(g)$no
## [1] 92
components(g)$csize
## [1] 2375 3 5 5 7 6 4 3 5 2 7 2 2 7 3
## [16] 2 2 4 2 2 2 2 2 2 2 2 2 2 2 2
## [31] 2 4 2 2 2 2 2 5 2 2 3 2 2 3 2
## [46] 2 5 3 3 2 2 2 2 3 3 2 2 2 2 2
## [61] 2 2 2 2 2 2 2 2 2 2 2 3 5 3 3
## [76] 4 2 2 2 2 2 3 2 2 4 2 2 2 2 2
## [91] 2 2
glimpse(vertex_attr(g))
## List of 3
## $ name : chr [1:2617] "YLR197W" "YOR039W" "YDR473C" "YOR332W" ...
## $ Class : chr [1:2617] "T" "T" "T" "O" ...
## $ Description: chr [1:2617] "SIK1 involved in pre-rRNA processing" "CKB2 casein kinase II beta' chain" "PRP3 essential splicing factor" "VMA4 H+-ATPase V1 domain 27 KD subunit, vacuolar" ...
glimpse(edge_attr(g))
## List of 1
## $ Confidence: chr [1:11855] "high" "high" "high" "high" ...
vertex_attr(g, name = 'Class')[1:10]
## [1] "T" "T" "T" "O" "G" "F" "F" "O" "D" "C"
edge_attr(g, name = 'Confidence')[1:10]
## [1] "high" "high" "high" "high" "high" "high" "high" "high" "high" "high"
set.seed(42)
ggraph(g, layout = 'lgl') +
geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) +
geom_node_point(color = 'dark red', size = 1, alpha = 0.75) +
theme_graph(base_family = 'Helvetica') +
labs(title = 'Yeast Interaction Network',
subtitle = 'Displayed Using Layout Generator for Larger Graphs')
set.seed(42)
ggraph(g, layout = 'drl') +
geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) +
geom_node_point(color = 'dark red', size = 1, alpha = 0.75) +
theme_graph(base_family = 'Helvetica') +
labs(title = 'Yeast Interaction Network',
subtitle = 'Displayed Using Distributed Recursive Layout')
set.seed(42)
ggraph(g, layout = 'mds') +
geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) +
geom_node_point(color = 'dark red', size = 1, alpha = 0.75) +
theme_graph(base_family = 'Helvetica') +
labs(title = 'Yeast Interaction Network',
subtitle = 'Displayed Using Multidimensional Scaling Layout')
suppressMessages(df <- bind_cols(enframe(eccentricity(g)),
enframe(betweenness(g)),
enframe(degree(g)),
enframe(transitivity(g, type = c('local')))))
df <- df %>% select(name...1, value...2, value...4, value...6, value...8)
names(df) <- c('name', 'eccentricity', 'betweenness', 'degree', 'clustering')
head(df)
tail(df)
glimpse(df)
## Rows: 2,617
## Columns: 5
## $ name <chr> "YLR197W", "YOR039W", "YDR473C", "YOR332W", "YER090W", "Y…
## $ eccentricity <dbl> 9, 9, 11, 10, 9, 8, 9, 14, 13, 9, 10, 9, 11, 9, 10, 10, 9…
## $ betweenness <dbl> 4522.729086, 1274.738111, 12293.315437, 2009.662274, 9015…
## $ degree <dbl> 40, 19, 9, 13, 21, 37, 21, 5, 6, 2, 15, 23, 5, 24, 43, 13…
## $ clustering <dbl> 0.48333333, 0.69005848, 0.77777778, 0.57692308, 0.1952381…
df %>%
summarize(avg_deg = mean(degree),
delta = max(degree),
prop = sum(degree <= avg_deg) / n(),
diam = max(eccentricity),
radius = min(eccentricity),
avg_cc = mean(clustering, na.rm = TRUE),
avg_distance = mean_distance(g, directed = FALSE, unconnected = TRUE))
(d <- mean_distance(g, directed = FALSE, unconnected = TRUE))
## [1] 5.095629
mean(distances(g))
## [1] Inf
distance_table(g)
## $res
## [1] 11855 67910 276506 670068 786151 550965 274661 114390 43508 16152
## [11] 5473 1378 306 56 16
##
## $unconnected
## [1] 603641
D <- data.frame(1:length(distance_table(g)$res),
distance_table(g)$res / sum(distance_table(g)$res))
names(D) <- c('x', 'y')
D %>%
ggplot(aes(x = x, y = y)) +
geom_point() +
geom_line(aes(x = d), color = 'blue') +
labs(title = 'Distribution of Distance (Proportions) in the Yeast Network') +
labs(x = 'distance', y = 'density')
df %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_density(fill = 'red') +
labs(title = 'KDE of Degrees in the Yeast Network')
df %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_histogram(binwidth = 1, fill = 'blue') +
labs(title = 'Histogram of Degrees in the Yeast Network')
df %>%
filter(degree <= 20) %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_density(fill = 'red') +
labs(title = 'KDE of Degrees in the Yeast Network',
subtitle = TeX('for Nodes with Degree $\\leq 20$'))
df %>%
filter(degree <= 20) %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_histogram(binwidth = 1, fill = 'blue') +
labs(title = 'Histogram of Degrees in the Yeast Network',
subtitle = TeX('for Nodes with Degree $\\leq 20$'))
df %>%
group_by(degree) %>%
summarise(cc_deg = mean(clustering, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = degree, y = cc_deg)) +
geom_point(na.rm = TRUE, color = 'blue') +
scale_x_log10() +
scale_y_log10() +
labs(title = 'Relation Between Local Clustering Coefficient and Degree',
subtitle = 'in the Yeast Network') +
labs(x = TeX('$p_k$'), y = TeX('$C_k$'))
df %>%
ggplot(aes(x = clustering, y = ..density..)) +
geom_density(fill = 'red', na.rm = TRUE) +
labs(title = 'KDE of Local Clustering Coefficients in the Yeast Network')
df %>%
ggplot(aes(x = clustering, y = ..density..)) +
geom_histogram(binwidth = .1, fill = 'blue', na.rm = TRUE) +
labs(title = 'Histogram of Local Clustering Coefficients in the Yeast Network')
log(gorder(g)) / log(mean(df$degree))
## [1] 3.570896
mean_distance(g, directed = FALSE, unconnected = TRUE)
## [1] 5.095629
diameter(g)
## [1] 15
C <- mean(df$clustering, na.rm = TRUE)
M <- mean(df$degree)
df %>%
group_by(degree) %>%
summarise(cc_deg = mean(clustering)) %>%
ungroup()
df %>%
group_by(degree) %>%
summarise(cc_deg = mean(clustering)) %>%
ggplot(aes(x = degree, y = cc_deg)) +
geom_point(na.rm = TRUE, color = 'blue') +
geom_line(aes(y = C), color = 'blue') +
geom_line(aes(y = M / gorder(g)), color = 'red') +
scale_x_log10() +
scale_y_log10() +
labs(title = 'Relation Between Local Clustering Coefficient and Degree',
subtitle = 'The blue line is the average local clustering coefficient; \nthe red one is the one predicted by the random model.') +
labs(x = 'k', y = TeX('$C(k)$'))
df %>%
ggplot(aes(x = degree, y = betweenness)) +
geom_point(na.rm = TRUE, size = 0.5, color = 'red') +
labs(title = 'Relationship Between Betweenness Centrality and Degree')
df %>%
ggplot(aes(x = degree, y = betweenness + 0.00000001)) +
geom_point(na.rm = TRUE, size = 0.5, color = 'red') +
scale_y_log10() +
labs(title = TeX('Relationship Between $\\log_{10}$ of Betweenness Centrality and Degree')) +
labs(y = '$\\log_{10}$(betweenness)')
df %>%
filter(betweenness > 0) %>%
ggplot(aes(x = degree, y = betweenness)) +
geom_point(na.rm = TRUE, size = 0.5, color = 'red') +
scale_y_log10() +
labs(title = TeX('Relationship Between $\\log_{10}$ of Betweenness Centrality and Degree')) +
labs(y = TeX('$\\log_{10}$(betweenness)'))
df %>%
ggplot(aes(x = degree, y = eccentricity)) +
geom_point(na.rm = TRUE, size = 0.5, color = 'orange') +
labs(title = 'Relationship Between Eccentricity and Degree')
df %>%
ggplot(aes(x = degree, y = clustering)) +
geom_point(na.rm = TRUE, size = 0.5, color = 'blue') +
labs(title = 'Relationship Between Local Clustering Coefficient and Degree')