Fly (Drosophila Medulla) Connectome

Loading Packages

library(tidyverse)

library(igraph)

library(igraphdata)

library(ggraph)

library(latex2exp)

Loading the Data Set

Data from NeuroData’s Graph DataBase

Data Source: Takemura, Sy., Bharioke, A., Lu, Z. et al. A visual motion detection circuit suggested by Drosophila connectomics. Nature 500, 175–181 (2013). https://doi.org/10.1038/nature12450

g <- read_graph('https://s3.amazonaws.com/connectome-graphs/fly/drosophila_medulla_1.graphml', 
                format = 'graphml')
V(g)
## + 1781/1781 vertices, named, from e3ccd35:
##    [1] 200                   R7 205                C2 445362            
##    [4] C2 214                R8 469338             606290               
##    [7] R7 217                R8 111                545947               
##   [10] L3 206                1607897               R8 216               
##   [13] 5708                  518376                221                  
##   [16] 373383                539827                510627               
##   [19] Tangential 531715     487051                L1 209               
##   [22] Dm9 178127            5738                  L3 520639            
##   [25] 494524                510632                510128               
##   [28] L1 189938             L3 507445             502293               
## + ... omitted several vertices
E(g)
## + 33641/33641 edges from e3ccd35 (vertex names):
##  [1] 200      ->201       200      ->unknown   200      ->unknown  
##  [4] 200      ->unknown   R7 205   ->L3 206    R7 205   ->204      
##  [7] C2 445362->unknown   C2 445362->L3 520639 C2 445362->unknown  
## [10] 200      ->L2 198    200      ->unknown   200      ->unknown  
## [13] 200      ->L3 491118 200      ->unknown   200      ->unknown  
## [16] 200      ->L5 226    C2 214   ->Mi15 222  C2 214   ->204      
## [19] C2 214   ->L5 208    C2 214   ->L3 206    R7 205   ->L3 206   
## [22] R7 205   ->L2 212    C2 214   ->Mi15 222  C2 214   ->L1 209   
## [25] C2 214   ->L3 206    C2 214   ->L5 208    R7 205   ->204      
## [28] R7 205   ->R8 111    R8 469338->L1 196    200      ->L5 226   
## + ... omitted several edges
components(g)$no
## [1] 6
components(g)$csize
## [1] 1770    3    2    2    2    2
glimpse(vertex_attr(g))
## List of 2
##  $ name: chr [1:1781] "200" "R7 205" "C2 445362" "C2 214" ...
##  $ id  : chr [1:1781] "n0" "n1" "n2" "n3" ...
glimpse(edge_attr(g))
## List of 7
##  $ pre.x               : num [1:33641] 5697 5697 5697 5697 4240 ...
##  $ post.x              : num [1:33641] 5726 5734 5703 5738 4179 ...
##  $ pre.y               : num [1:33641] 7408 7408 7408 7408 5833 ...
##  $ post.y              : num [1:33641] 7373 7417 7449 7456 5809 ...
##  $ pre.z               : num [1:33641] 168 168 168 168 169 169 169 169 169 169 ...
##  $ post.z              : num [1:33641] 166 167 167 168 169 171 168 169 169 169 ...
##  $ proofreading.details: chr [1:33641] "2 reached same anchor body" "Both are orphan" "Both are orphan" "Both are orphan" ...
vertex_attr(g, name = 'name')[1:10]
##  [1] "200"       "R7 205"    "C2 445362" "C2 214"    "R8 469338" "606290"   
##  [7] "R7 217"    "R8 111"    "545947"    "L3 206"
edge_attr(g, name = 'proofreading.details')[1:10]
##  [1] "2 reached same anchor body" "Both are orphan"           
##  [3] "Both are orphan"            "Both are orphan"           
##  [5] "Densely Named"              "Densely Named"             
##  [7] "Both are orphan"            "Densely Named"             
##  [9] "Both are orphan"            "1 reached named body"

Visualizing the Fly Connectome

set.seed(42)

ggraph(g, layout = 'lgl') + 
  geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) + 
  geom_node_point(color = 'dark red', size = 1, alpha = 0.75) + 
  theme_graph(base_family = 'Helvetica') +
  labs(title = 'Fly Connectome',
       subtitle = 'Displayed Using Layout Generator for Larger Graphs')

set.seed(42)

ggraph(g, layout = 'drl') + 
  geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) + 
  geom_node_point(color = 'dark red', size = 1, alpha = 0.75) + 
  theme_graph(base_family = 'Helvetica') +
  labs(title = 'Fly Connectome',
       subtitle = 'Displayed Using Distributed Recursive Layout')

set.seed(42)

ggraph(g, layout = 'mds') + 
  geom_edge_fan(edge_linetype = 3, color = 'dark blue', alpha = 0.25) + 
  geom_node_point(color = 'dark red', size = 1, alpha = 0.75) + 
  theme_graph(base_family = 'Helvetica') +
  labs(title = 'Fly Connectome',
       subtitle = 'Displayed Using Multidimensional Scaling Layout')

Summary Statistics of the Fly Connectome

suppressMessages(df <- bind_cols(enframe(eccentricity(g, mode = 'out')), 
                                 enframe(betweenness(g)), 
                                 enframe(degree(g, mode = 'out')), 
                                 enframe(transitivity(g, type = c('local')))))

df <- df %>% select(name...1, value...2, value...4, value...6, value...8)

names(df) <- c('name', 'eccentricity', 'betweenness', 'outdegree', 'clustering')

head(df)
tail(df)
glimpse(df)
## Rows: 1,781
## Columns: 5
## $ name         <chr> "200", "R7 205", "C2 445362", "C2 214", "R8 469338", "606…
## $ eccentricity <dbl> 8, 7, 7, 7, 8, 7, 8, 6, 8, 6, 7, 7, 6, 7, 7, 8, 7, 8, 7, …
## $ betweenness  <dbl> 0.000000, 4367.658291, 9871.554739, 11257.573198, 0.00000…
## $ outdegree    <dbl> 44, 92, 44, 171, 1, 1, 8, 157, 4, 199, 7, 23, 21, 9, 3, 9…
## $ clustering   <dbl> 0.33333333, 0.12000000, 0.40000000, 0.15238095, NaN, NaN,…
df %>% 
  summarize(avg_deg = mean(outdegree), 
            delta = max(outdegree), 
            prop = sum(outdegree <= avg_deg) / n(), 
            diam = max(eccentricity),
            radius = min(eccentricity),
            avg_cc = mean(clustering, na.rm = TRUE),
            avg_distance = mean_distance(g, directed = TRUE, unconnected = TRUE))
(d <- mean_distance(g, directed = TRUE, unconnected = TRUE))
## [1] 4.020874
mean(distances(g))
## [1] Inf

Fig. 2.18(a) on p. 66

distance_table(g)
## $res
##  [1]   9630  98354 397533 543397 322438 112376  27478   5277    701     79
## [11]     13
## 
## $unconnected
## [1] 1652904
D <- data.frame(1:length(distance_table(g)$res), 
                distance_table(g)$res / sum(distance_table(g)$res))

names(D) <- c('x', 'y')

D %>% 
  ggplot(aes(x = x, y = y)) + 
  geom_point() + 
  geom_line(aes(x = d), color = 'blue') +
  labs(title = 'Distribution of Distance (Proportions) in the Fly Connectome') +
  labs(x = 'distance', y = 'density')

The Outdegree Distribution

df %>% 
  ggplot(aes(x = outdegree, y = ..density..)) + 
  geom_density(fill = 'red') +
  labs(title = 'KDE of Outdegrees in the Fly Connectome')

df %>% 
  ggplot(aes(x = outdegree, y = ..density..)) + 
  geom_histogram(binwidth = 1, fill = 'blue') +
  labs(title = 'Histogram of Outdegrees in the Fly Connectome')

df %>% 
  filter(outdegree <= 20) %>% 
  ggplot(aes(x = outdegree, y = ..density..)) + 
  geom_density(fill = 'red') + 
  labs(title = 'KDE of Outdegrees in the Fly Connectome',
       subtitle = TeX('for Nodes with Outdegree $\\leq 20$'))

df %>% 
  filter(outdegree <= 20) %>% 
  ggplot(aes(x = outdegree, y = ..density..)) + 
  geom_histogram(binwidth = 1, fill = 'blue') +
  labs(title = 'Histogram of Outdegrees in the Fly Connectome',
       subtitle = TeX('for Nodes with Outdegree $\\leq 20$'))

Fig. 2.18(b) on p. 66

df %>% 
  group_by(outdegree) %>% 
  summarise(cc_deg = mean(clustering, na.rm = TRUE)) %>%
  ungroup() %>%
  filter(outdegree > 0) %>%
  ggplot(aes(x = outdegree, y = cc_deg)) + 
  geom_point(na.rm = TRUE, color = 'blue') + 
  scale_x_log10() + 
  scale_y_log10() +
  labs(title = 'Relation Between Local Clustering Coefficient and Outdegree',
       subtitle = 'in the Fly Connectome') +
  labs(x = TeX('$p_k$'), y = TeX('$C_k$'))

Local Clustering Coefficient Distribution

df %>% 
  ggplot(aes(x = clustering, y = ..density..)) + 
  geom_density(fill = 'red', na.rm = TRUE) +
  labs(title = 'KDE of Local Clustering Coefficients in the Fly Connectome')

df %>% 
ggplot(aes(x = clustering, y = ..density..)) + 
  geom_histogram(binwidth = .1, fill = 'blue', na.rm = TRUE) +
  labs(title = 'Histogram of Local Clustering Coefficients in the Fly Connectome')

log(gorder(g)) / log(mean(df$outdegree))
## [1] 2.547133
mean_distance(g, directed = TRUE, unconnected = TRUE)
## [1] 4.020874
diameter(g)
## [1] 11
C <- mean(df$clustering, na.rm = TRUE)

M <- mean(df$outdegree)

df %>%
  group_by(outdegree) %>% 
  summarise(cc_deg = mean(clustering)) %>%
  ungroup()

Fig. 3.13(d) on p. 96

df %>% 
  group_by(outdegree) %>% 
  summarise(cc_deg = mean(clustering)) %>%
  filter(outdegree > 0) %>%
  ggplot(aes(x = outdegree, y = cc_deg)) + 
  geom_point(na.rm = TRUE, color = 'blue') + 
  geom_line(aes(y = C), color = 'blue') + 
  geom_line(aes(y = M / gorder(g)), color = 'red') + 
  scale_x_log10() + 
  scale_y_log10() + 
  labs(title =  'Relation Between Local Clustering Coefficient and Outdegree',
       subtitle = 'The blue line is the average local clustering coefficient; \nthe red one is the one predicted by the random model.') +
  labs(x = 'k', y = TeX('$C(k)$'))

Visualizing Other Relations with Outdegree

df %>%
  filter(outdegree > 0) %>%
  ggplot(aes(x = outdegree, y = betweenness)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'red') + 
  scale_x_log10() +
  labs(title = 'Relationship Between Betweenness Centrality and Outdegree') +
  labs(x = TeX('$\\log_{10}$(outdegree)'))

df %>% 
  filter(outdegree > 0) %>%
  ggplot(aes(x = outdegree, y = betweenness + 0.00000001)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'red') + 
  scale_y_log10() +
  labs(title = TeX('Relationship Between $\\log_{10}$ of Betweenness Centrality and Outdegree')) +
  labs(y = TeX('$\\log_{10}$(betweenness)'))

df %>% 
  filter(betweenness > 0, outdegree > 0) %>% 
  ggplot(aes(x = outdegree, y = betweenness)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'red') + 
  scale_y_log10() +
  scale_x_log10() +
  labs(title = TeX('Relationship Between $\\log_{10}$ of Betweenness Centrality and $\\log_{10}$ of Outdegree')) +
  labs(y = TeX('$\\log_{10}$(betweenness)'),
       x = TeX('$\\log_{10}$(outdegree)'))

df %>% 
  filter(outdegree > 0) %>%
  ggplot(aes(x = outdegree, y = eccentricity)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'orange') + 
  scale_x_log10() +
  labs(title = TeX('Relationship Between Eccentricity and $\\log_{10}$ of Outdegree')) +
  labs(x = TeX('$\\log_{10}$(outdegree)'))

df %>% 
  filter(outdegree > 0) %>%
  ggplot(aes(x = outdegree, y = clustering)) + 
  geom_point(na.rm = TRUE, size = 0.5, color = 'blue') +
  scale_x_log10() +
  labs(title = TeX('Relationship Between Local Clustering Coefficient and $\\log_{10}$ of Outdegree')) +
  labs(x = TeX('$\\log_{10}$(outdegree)'))

References