Disease Transmission Network Analysis

Author

Timothy Achala

Published

May 4, 2026

# Install packages if needed
pkgs <- c("igraph", "ggraph", "tidygraph", "ggplot2", "dplyr", "tibble", "scales", "knitr")
invisible(lapply(pkgs, function(p) {
  if (!requireNamespace(p, quietly = TRUE)) install.packages(p, quiet = TRUE)
}))

library(igraph)
library(ggraph)
library(tidygraph)
library(ggplot2)
library(dplyr)
library(tibble)
library(scales)
library(knitr)

set.seed(42)
n <- 30
communities <- rep(1:4, times = c(8, 8, 7, 7))
names(communities) <- paste0("P", sprintf("%02d", 1:n))

nodes <- tibble(
  name        = names(communities),
  community   = factor(paste0("Community ", communities)),
  status      = sample(c("Infected","Recovered","Susceptible"), n,
                       replace = TRUE, prob = c(0.40, 0.35, 0.25)),
  age_group   = sample(c("<18","18-40","41-60",">60"), n,
                       replace = TRUE, prob = c(0.15, 0.35, 0.30, 0.20)),
  degree_cent = NA_real_   # filled after graph build
)

# Edges: within-community (high prob) + cross-community (low prob)
edge_list <- list()
for (i in 1:(n - 1)) {
  for (j in (i + 1):n) {
    same <- communities[i] == communities[j]
    p    <- ifelse(same, 0.28, 0.05)
    if (runif(1) < p) {
      edge_list[[length(edge_list) + 1]] <-
        c(names(communities)[i], names(communities)[j])
    }
  }
}
edges <- as.data.frame(do.call(rbind, edge_list),
                       stringsAsFactors = FALSE)
colnames(edges) <- c("from", "to")
edges$transmission_prob <- runif(nrow(edges), 0.05, 0.95)
edges$contact_type <- sample(c("Household","Workplace","Social"),
                             nrow(edges), replace = TRUE, prob = c(0.3, 0.4, 0.3))

# Build igraph object
g <- graph_from_data_frame(edges, directed = FALSE, vertices = nodes)

# Centrality measures
nodes$degree_cent    <- degree(g, normalized = TRUE)
nodes$betweenness    <- betweenness(g, normalized = TRUE)
nodes$closeness      <- closeness(g, normalized = TRUE)
V(g)$degree_cent     <- nodes$degree_cent
V(g)$betweenness     <- nodes$betweenness
V(g)$status          <- nodes$status
V(g)$community       <- as.character(nodes$community)

tg <- as_tbl_graph(g)

# Summary statistics
n_infected  <- sum(nodes$status == "Infected")
n_recovered <- sum(nodes$status == "Recovered")
n_suscept   <- sum(nodes$status == "Susceptible")
attack_rate <- round(n_infected / n * 100, 1)

1 Executive Summary

This report analyses a simulated disease outbreak affecting 30 individuals across 4 communities. Network methods identify high-risk individuals, transmission pathways, and key structural features that drive epidemic spread.

Metric Value
Total individuals 30
Infected 7 (23.3%)
Recovered 12
Susceptible 11
Total contacts 41
Network density 0.094

2 Transmission Network Structure

status_cols <- c("Infected" = "#D32F2F", "Recovered" = "#388E3C", "Susceptible" = "#1976D2")

ggraph(tg, layout = "fr") +
  geom_edge_link(
    aes(width = transmission_prob, colour = contact_type),
    alpha = 0.55, show.legend = TRUE
  ) +
  geom_node_point(
    aes(colour = status, size = degree_cent * 15 + 3)
  ) +
  geom_node_text(
    aes(label = name), size = 2.3, repel = TRUE,
    max.overlaps = 15, colour = "grey20"
  ) +
  scale_edge_width(range = c(0.3, 2.2), name = "Trans. Prob.") +
  scale_edge_colour_manual(
    values = c("Household" = "#E65100", "Workplace" = "#6A1B9A", "Social" = "#00838F"),
    name = "Contact Type"
  ) +
  scale_colour_manual(values = status_cols, name = "Status") +
  scale_size_identity() +
  theme_graph(base_family = "sans") +
  theme(legend.position = "right",
        plot.title = element_text(face = "bold", size = 13)) +
  labs(title = "Disease Transmission Network")

Figure 1: Figure 1. Disease transmission network. Node colour indicates infection status; node size reflects degree centrality (connectivity). Edge width reflects transmission probability.

Interpretation: Densely connected nodes (larger circles) act as potential super-spreaders. Cross-community edges (bridges) are critical conduits for inter-cluster transmission.


3 Community-Level Analysis

comm_summary <- nodes %>%
  group_by(community, status) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(community) %>%
  mutate(pct = n / sum(n))

ggplot(comm_summary, aes(x = community, y = n, fill = status)) +
  geom_col(position = "stack", colour = "white", linewidth = 0.3) +
  geom_text(
    aes(label = ifelse(pct > 0.08, paste0(round(pct * 100), "%"), "")),
    position = position_stack(vjust = 0.5),
    size = 3.2, colour = "white", fontface = "bold"
  ) +
  scale_fill_manual(values = status_cols, name = "Status") +
  labs(
    title  = "Infection Status by Community",
    x = NULL, y = "Number of Individuals"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "top",
        panel.grid.major.x = element_blank(),
        plot.title = element_text(face = "bold"))

Figure 2: Figure 2. Infection status by community. Bar heights show counts; numbers inside bars show percentages.

4 Centrality & Superspreader Risk

ggplot(nodes, aes(x = degree_cent, y = betweenness, colour = status, size = closeness)) +
  geom_point(alpha = 0.85) +
  geom_vline(xintercept = median(nodes$degree_cent), linetype = "dashed",
             colour = "grey50", linewidth = 0.5) +
  geom_hline(yintercept = median(nodes$betweenness), linetype = "dashed",
             colour = "grey50", linewidth = 0.5) +
  ggrepel::geom_text_repel(
    data = nodes %>% filter(betweenness > quantile(betweenness, 0.75)),
    aes(label = name), size = 2.8, max.overlaps = 10
  ) +
  scale_colour_manual(values = status_cols, name = "Status") +
  scale_size(range = c(2, 7), name = "Closeness") +
  annotate("text", x = max(nodes$degree_cent) * 0.82,
           y = max(nodes$betweenness) * 0.92,
           label = "High-risk\nsuperspreaders", colour = "#D32F2F",
           size = 3, fontface = "italic") +
  labs(
    title = "Centrality Analysis: Superspreader Identification",
    x = "Degree Centrality", y = "Betweenness Centrality"
  ) +
  theme_minimal(base_size = 11) +
  theme(plot.title = element_text(face = "bold"),
        legend.position = "right")

Figure 3: Figure 3. Betweenness vs degree centrality. Individuals in the upper-right quadrant are high-priority targets for intervention.

5 Transmission Probability Distribution

ggplot(edges, aes(x = transmission_prob, fill = contact_type)) +
  geom_histogram(binwidth = 0.08, colour = "white", linewidth = 0.25,
                 position = "identity", alpha = 0.75) +
  scale_fill_manual(
    values = c("Household" = "#E65100", "Workplace" = "#6A1B9A", "Social" = "#00838F"),
    name = "Contact Type"
  ) +
  facet_wrap(~contact_type, ncol = 3) +
  labs(
    title = "Transmission Probability by Contact Type",
    x = "Transmission Probability", y = "Count"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "none",
        strip.text = element_text(face = "bold"),
        plot.title = element_text(face = "bold"))

Figure 4: Figure 4. Distribution of transmission probabilities by contact type.

6 Key Findings & Recommendations

top5 <- nodes %>%
  arrange(desc(betweenness)) %>%
  slice_head(n = 5) %>%
  select(Individual = name, Community = community,
         Status = status, Degree = degree_cent,
         Betweenness = betweenness) %>%
  mutate(across(c(Degree, Betweenness), ~round(., 3)))

kable(top5, caption = "Top 5 individuals by betweenness centrality (highest intervention priority)")
Top 5 individuals by betweenness centrality (highest intervention priority)
Individual Community Status Degree Betweenness
P17 Community 3 Susceptible 0.172 0.358
P13 Community 2 Susceptible 0.138 0.266
P07 Community 1 Recovered 0.172 0.236
P10 Community 2 Recovered 0.138 0.181
P02 Community 1 Susceptible 0.138 0.174

Key findings:

  1. Attack rate stands at 23.3% — indicating active community spread.
  2. Top superspreaders (Table above) bridge multiple communities and should be prioritised for contact tracing, testing, and isolation.
  3. Household contacts carry the highest per-contact risk; workplace contacts drive the most connections by volume.
  4. Cross-community bridges are the primary mechanism of inter-cluster spread; targeted intervention on these links can significantly reduce outbreak size.

Recommended actions:

  • Immediately isolate/test the top 5 high-betweenness individuals.
  • Strengthen household transmission controls (ventilation, masking) in communities with the highest infection rates.
  • Deploy targeted contact tracing along workplace contact chains.
  • Monitor and reinforce cross-community bridge edges with ring-vaccination or prophylactic treatment.