# configuring figure size
options(repr.plot.width = 6, repr.plot.height = 4)
# uncomment the following lines if you have not installed qdread/forestscaling; for more information, see https://github.com/qdread/forestscaling/
#install.packages('remotes')
#remotes::install_github('qdread/forestscaling')
library(tidyverse)
library(igraph)
library(igraphdata)
library(ggraph)
library(e1071)
library(forestscaling)
library(latex2exp)
igraphdata::yeastData Source: von Mering, C., Krause, R., Snel, B. et al. Comparative assessment of large-scale data sets of protein–protein interactions. Nature 417, 399–403 (2002). https://doi.org/10.1038/nature750
data(yeast)
g <- yeast
set.seed(42)
ggraph(g, layout = 'mds') +
geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) +
geom_node_point(color = 'dark red', size = .75, alpha = 0.75) +
theme_graph(base_family = 'Helvetica') +
labs(title = 'Yeast Interaction Network',
subtitle = 'Displayed Using Multidimensional Scaling Layout')
suppressMessages(df <- bind_cols(enframe(eccentricity(g)),
enframe(betweenness(g)),
enframe(degree(g)),
enframe(transitivity(g, type = c('local')))))
df <- df %>% select(name...1, value...2, value...4, value...6, value...8)
names(df) <- c('name', 'eccentricity', 'betweenness', 'degree', 'clustering')
head(df)
tail(df)
glimpse(df)
## Rows: 2,617
## Columns: 5
## $ name <chr> "YLR197W", "YOR039W", "YDR473C", "YOR332W", "YER090W", "Y…
## $ eccentricity <dbl> 9, 9, 11, 10, 9, 8, 9, 14, 13, 9, 10, 9, 11, 9, 10, 10, 9…
## $ betweenness <dbl> 4522.729086, 1274.738111, 12293.315437, 2009.662274, 9015…
## $ degree <dbl> 40, 19, 9, 13, 21, 37, 21, 5, 6, 2, 15, 23, 5, 24, 43, 13…
## $ clustering <dbl> 0.48333333, 0.69005848, 0.77777778, 0.57692308, 0.1952381…
df %>%
summarize(avg_deg = mean(degree),
delta = max(degree),
prop = sum(degree <= avg_deg) / n(),
diam = max(eccentricity),
radius = min(eccentricity),
avg_cc = mean(clustering, na.rm = TRUE),
avg_distance = mean_distance(g, directed = FALSE, unconnected = TRUE))
df %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_density(fill = 'red') +
labs(title = 'KDE of Degrees in the Yeast Data Set')
df %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_histogram(binwidth = 1, fill = 'blue') +
labs(title = 'Histogram of Degrees in the Yeast Data Set')
# the top 20 degree values
df %>% arrange(desc(degree)) %>% head(20)
df %>%
summarize(avg_deg = mean(df$degree),
sd_deg = sd(df$degree),
second_moment_deg = moment(df$degree, 2),
var_deg = moment(df$degree, 2) - moment(df$degree,1)**2,
sd_deg_formula = sqrt(moment(df$degree, 2) - moment(df$degree,1)**2))
(pl.fit <- fit_power_law(df$degree, implementation = 'plfit'))
## $continuous
## [1] FALSE
##
## $alpha
## [1] 2.445538
##
## $xmin
## [1] 13
##
## $logLik
## [1] -2056.231
##
## $KS.stat
## [1] 0.0694019
##
## $KS.p
## [1] 0.01166682
fit_power_law(df$degree, implementation = 'plfit', force.continuous = TRUE)
## $continuous
## [1] TRUE
##
## $alpha
## [1] 2.465169
##
## $xmin
## [1] 12
##
## $logLik
## [1] -2172.849
##
## $KS.stat
## [1] 0.08090561
##
## $KS.p
## [1] 0.001090263
# maximum likelihood optimizing function
fit_power_law(df$degree, implementation = 'R.mle')
##
## Call:
## stats4::mle(minuslogl = mlogl, start = list(alpha = start))
##
## Coefficients:
## alpha
## 1.502747
# maximum likelihood optimizing function
fit_power_law(df$degree, xmin = 13, implementation = 'R.mle')
##
## Call:
## stats4::mle(minuslogl = mlogl, start = list(alpha = start))
##
## Coefficients:
## alpha
## 2.444644
total <- sum(df$degree)
df %>%
group_by(degree) %>%
mutate(k = degree, p_k = n() / gorder(g)) %>%
ungroup() %>%
ggplot(aes(x = k, y = p_k)) +
geom_point(color = 'blue', size = .5) +
scale_x_log10() +
scale_y_log10() +
labs(title = 'Log-Log Scale, Linear Binning for Degree Distribution',
subtitle = 'in the Yeast Data Set') +
labs(y = TeX('$p_k$'))
df %>%
group_by(degree) %>%
mutate(k = degree, count = n()) %>%
ungroup() %>%
ggplot(aes(x = k, y = count)) +
geom_point(color = 'red', size = .5) +
labs(title = 'Degree Distribution (Counts) in the Yeast Data Set')
df %>%
group_by(degree) %>%
mutate(k = degree, p_k = n() / gorder(g)) %>%
ungroup() %>%
ggplot(aes(x = k, y = p_k)) +
geom_point(color = 'blue', size = .5) +
labs(title = 'Degree Distribution (Proportions) in the Yeast Data Set') +
labs(y = TeX('$p_k$'))
df %>%
group_by(degree) %>%
mutate(k = degree, p_k = n() / gorder(g)) %>%
ungroup() %>%
ggplot(aes(x = k, y = p_k)) +
geom_point(color = 'blue', size = .5) +
geom_line(aes(x = degree, y = degree ** (-pl.fit$alpha)),
color = 'red') +
scale_x_log10() +
scale_y_log10() +
labs(y = TeX('$p_k$')) +
labs(title = 'Log-Log Scale, Linear Binning for Degree Distribution',
subtitle = 'with Fitted Power Law Curve in the Yeast Data Set')
df2 <- logbin(df$degree, n = 100) #change n to 10, 15, 20, 25, 50, 75 to compare
df2
total2 <- sum(df2$bin_value)
df2 %>%
filter(bin_count != 0) %>%
ggplot(aes(x = bin_midpoint, y = bin_value / total2)) +
geom_point(color = 'blue', size = .5) +
geom_line(data = df,
aes(x = degree, y = degree ** (-pl.fit$alpha)),
color = 'red') +
scale_x_log10() +
scale_y_log10() +
labs(y = TeX('$p_k$'), x = 'k') +
labs(title = 'Log-Log Scale, Logarithmic Binning for Degree Distribution',
subtitle = 'with Fitted Power Law Curve in the Yeast Data Set')
mean_distance(g, directed = FALSE, unconnected = TRUE)
## [1] 5.095629
log(log(vcount(g)))
## [1] 2.063031
set.seed(42)
h <- sample_degseq(df$degree, method = 'simple.no.multiple') # method = c('simple', 'vl', 'simple.no.multiple')
set.seed(42)
ggraph(h, layout = 'mds') +
geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) +
geom_node_point(color = 'dark red', size = .75, alpha = 0.75) +
theme_graph(base_family = 'Helvetica') +
labs(title = 'A Reconfigured Yeast Interaction Network',
subtitle = 'Displayed Using Multidimensional Scaling Layout')
suppressMessages(df3 <- bind_cols(enframe(eccentricity(h)),
enframe(betweenness(h)),
enframe(degree(h)),
enframe(transitivity(h, type = c('local')))))
df3 <- df3 %>% select(name...1, value...2, value...4, value...6, value...8)
names(df3) <- c('name', 'eccentricity', 'betweenness', 'degree', 'clustering')
df3 %>%
summarize(avg_deg = mean(degree),
delta = max(degree),
prop = sum(degree <= avg_deg) / n(),
diam = max(eccentricity),
radius = min(eccentricity),
avg_cc = mean(clustering, na.rm = TRUE))
df3 %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_density(fill = 'red') +
labs(title = 'KDE of Degrees in the Reconfigured Yeast Data Set')
df3 %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_histogram(binwidth = 1, fill = 'blue') +
labs(title = 'Histogram of Degrees in the Reconfigured Yeast Data Set')
(pl.fit2 <- fit_power_law(df3$degree, implementation = 'plfit'))
## $continuous
## [1] FALSE
##
## $alpha
## [1] 2.445538
##
## $xmin
## [1] 13
##
## $logLik
## [1] -2056.231
##
## $KS.stat
## [1] 0.0694019
##
## $KS.p
## [1] 0.01166682
df3 %>%
group_by(degree) %>%
mutate(k = degree, p_k = n() / gorder(h)) %>%
ungroup() %>%
ggplot(aes(x = k, y = p_k)) +
geom_point(color = 'blue', size = .5) +
labs(title = 'Degree Distribution (Proportions) in the Reconfigured Yeast Data Set') +
labs(y = TeX('$p_k$'))
df4 <- logbin(df3$degree, n = 100)
total4 <- sum(df4$bin_value)
df4 %>%
filter(bin_count != 0) %>%
ggplot(aes(x = bin_midpoint, y = bin_value / total4)) +
geom_point(color = 'blue', size = .5) +
geom_line(data = df3,
aes(x = degree, y = degree ** (-pl.fit2$alpha)),
color = 'red') +
scale_x_log10() +
scale_y_log10() +
labs(y = TeX('$p_k$'), x = 'k') +
labs(title = 'Log-Log Scale, Logarithmic Binning for Degree Distribution',
subtitle = 'with Fitted Power Law Curve in the Reconfigured Yeast Data Set')
set.seed(42)
data(yeast)
f <- yeast
f <- rewire(f, keeping_degseq(niter = gorder(f) * 10))
set.seed(42)
ggraph(f, layout = 'mds') +
geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) +
geom_node_point(color = 'dark red', size = .75, alpha = 0.75) +
theme_graph(base_family = 'Helvetica') +
labs(title = 'A Randomized Yeast Interaction Network',
subtitle = 'Displayed Using Multidimensional Scaling Layout')
suppressMessages(df5 <- bind_cols(enframe(eccentricity(f)),
enframe(betweenness(f)),
enframe(degree(f)),
enframe(transitivity(f, type = c('local')))))
df5 <- df5 %>% select(name...1, value...2, value...4, value...6, value...8)
names(df5) <- c('name', 'eccentricity', 'betweenness', 'degree', 'clustering')
df5 %>%
summarize(avg_deg = mean(degree),
delta = max(degree),
prop = sum(degree <= avg_deg) / n(),
diam = max(eccentricity),
radius = min(eccentricity),
avg_cc = mean(clustering, na.rm = TRUE))
df5 %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_density(fill = 'red') +
labs(title = 'KDE of Degrees in the Randomized Yeast Data Set')
df5 %>%
ggplot(aes(x = degree, y = ..density..)) +
geom_histogram(binwidth = 1, fill = 'blue') +
labs(title = 'Histrogram of Degrees in the Randomized Yeast Data Set')
(pl.fit3 <- fit_power_law(df5$degree, implementation = 'plfit'))
## $continuous
## [1] FALSE
##
## $alpha
## [1] 2.445538
##
## $xmin
## [1] 13
##
## $logLik
## [1] -2056.231
##
## $KS.stat
## [1] 0.0694019
##
## $KS.p
## [1] 0.01166682
df5 %>%
group_by(degree) %>%
mutate(k = degree, p_k = n() / gorder(f)) %>%
ungroup() %>%
ggplot(aes(x = k, y = p_k)) +
geom_point(color = 'blue', size = .5) +
labs(title = 'Degree Distribution (Proportions) in the Randomized Yeast Data Set') +
labs(y = TeX('$p_k$'))
df6 <- logbin(df5$degree, n = 100)
total6 <- sum(df6$bin_value)
df6 %>% filter(bin_count != 0) %>%
ggplot(aes(x = bin_midpoint, y = bin_value/total6)) +
geom_point(color = 'blue', size = .5) +
geom_line(data = df5,
aes(x = degree, y = degree ** (-pl.fit3$alpha)),
color = 'red') +
scale_x_log10() +
scale_y_log10() +
labs(y = TeX('$p_k$'), x = 'k') +
labs(title = 'Log-Log Scale, Logarithmic Binning for Degree Distribution',
subtitle = 'with Fitted Power Law Curve in the Randomized Yeast Data Set')