# Install packages if needed
pkgs <- c("igraph", "ggraph", "tidygraph", "ggplot2", "dplyr", "tibble", "scales", "knitr")
invisible(lapply(pkgs, function(p) {
if (!requireNamespace(p, quietly = TRUE)) install.packages(p, quiet = TRUE)
}))
library(igraph)
library(ggraph)
library(tidygraph)
library(ggplot2)
library(dplyr)
library(tibble)
library(scales)
library(knitr)
set.seed(42)Disease Transmission Network Analysis
n <- 30
communities <- rep(1:4, times = c(8, 8, 7, 7))
names(communities) <- paste0("P", sprintf("%02d", 1:n))
nodes <- tibble(
name = names(communities),
community = factor(paste0("Community ", communities)),
status = sample(c("Infected","Recovered","Susceptible"), n,
replace = TRUE, prob = c(0.40, 0.35, 0.25)),
age_group = sample(c("<18","18-40","41-60",">60"), n,
replace = TRUE, prob = c(0.15, 0.35, 0.30, 0.20)),
degree_cent = NA_real_ # filled after graph build
)
# Edges: within-community (high prob) + cross-community (low prob)
edge_list <- list()
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
same <- communities[i] == communities[j]
p <- ifelse(same, 0.28, 0.05)
if (runif(1) < p) {
edge_list[[length(edge_list) + 1]] <-
c(names(communities)[i], names(communities)[j])
}
}
}
edges <- as.data.frame(do.call(rbind, edge_list),
stringsAsFactors = FALSE)
colnames(edges) <- c("from", "to")
edges$transmission_prob <- runif(nrow(edges), 0.05, 0.95)
edges$contact_type <- sample(c("Household","Workplace","Social"),
nrow(edges), replace = TRUE, prob = c(0.3, 0.4, 0.3))
# Build igraph object
g <- graph_from_data_frame(edges, directed = FALSE, vertices = nodes)
# Centrality measures
nodes$degree_cent <- degree(g, normalized = TRUE)
nodes$betweenness <- betweenness(g, normalized = TRUE)
nodes$closeness <- closeness(g, normalized = TRUE)
V(g)$degree_cent <- nodes$degree_cent
V(g)$betweenness <- nodes$betweenness
V(g)$status <- nodes$status
V(g)$community <- as.character(nodes$community)
tg <- as_tbl_graph(g)
# Summary statistics
n_infected <- sum(nodes$status == "Infected")
n_recovered <- sum(nodes$status == "Recovered")
n_suscept <- sum(nodes$status == "Susceptible")
attack_rate <- round(n_infected / n * 100, 1)1 Executive Summary
This report analyses a simulated disease outbreak affecting 30 individuals across 4 communities. Network methods identify high-risk individuals, transmission pathways, and key structural features that drive epidemic spread.
| Metric | Value |
|---|---|
| Total individuals | 30 |
| Infected | 7 (23.3%) |
| Recovered | 12 |
| Susceptible | 11 |
| Total contacts | 41 |
| Network density | 0.094 |
2 Transmission Network Structure
status_cols <- c("Infected" = "#D32F2F", "Recovered" = "#388E3C", "Susceptible" = "#1976D2")
ggraph(tg, layout = "fr") +
geom_edge_link(
aes(width = transmission_prob, colour = contact_type),
alpha = 0.55, show.legend = TRUE
) +
geom_node_point(
aes(colour = status, size = degree_cent * 15 + 3)
) +
geom_node_text(
aes(label = name), size = 2.3, repel = TRUE,
max.overlaps = 15, colour = "grey20"
) +
scale_edge_width(range = c(0.3, 2.2), name = "Trans. Prob.") +
scale_edge_colour_manual(
values = c("Household" = "#E65100", "Workplace" = "#6A1B9A", "Social" = "#00838F"),
name = "Contact Type"
) +
scale_colour_manual(values = status_cols, name = "Status") +
scale_size_identity() +
theme_graph(base_family = "sans") +
theme(legend.position = "right",
plot.title = element_text(face = "bold", size = 13)) +
labs(title = "Disease Transmission Network")Interpretation: Densely connected nodes (larger circles) act as potential super-spreaders. Cross-community edges (bridges) are critical conduits for inter-cluster transmission.
3 Community-Level Analysis
comm_summary <- nodes %>%
group_by(community, status) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(community) %>%
mutate(pct = n / sum(n))
ggplot(comm_summary, aes(x = community, y = n, fill = status)) +
geom_col(position = "stack", colour = "white", linewidth = 0.3) +
geom_text(
aes(label = ifelse(pct > 0.08, paste0(round(pct * 100), "%"), "")),
position = position_stack(vjust = 0.5),
size = 3.2, colour = "white", fontface = "bold"
) +
scale_fill_manual(values = status_cols, name = "Status") +
labs(
title = "Infection Status by Community",
x = NULL, y = "Number of Individuals"
) +
theme_minimal(base_size = 11) +
theme(legend.position = "top",
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold"))4 Centrality & Superspreader Risk
ggplot(nodes, aes(x = degree_cent, y = betweenness, colour = status, size = closeness)) +
geom_point(alpha = 0.85) +
geom_vline(xintercept = median(nodes$degree_cent), linetype = "dashed",
colour = "grey50", linewidth = 0.5) +
geom_hline(yintercept = median(nodes$betweenness), linetype = "dashed",
colour = "grey50", linewidth = 0.5) +
ggrepel::geom_text_repel(
data = nodes %>% filter(betweenness > quantile(betweenness, 0.75)),
aes(label = name), size = 2.8, max.overlaps = 10
) +
scale_colour_manual(values = status_cols, name = "Status") +
scale_size(range = c(2, 7), name = "Closeness") +
annotate("text", x = max(nodes$degree_cent) * 0.82,
y = max(nodes$betweenness) * 0.92,
label = "High-risk\nsuperspreaders", colour = "#D32F2F",
size = 3, fontface = "italic") +
labs(
title = "Centrality Analysis: Superspreader Identification",
x = "Degree Centrality", y = "Betweenness Centrality"
) +
theme_minimal(base_size = 11) +
theme(plot.title = element_text(face = "bold"),
legend.position = "right")5 Transmission Probability Distribution
ggplot(edges, aes(x = transmission_prob, fill = contact_type)) +
geom_histogram(binwidth = 0.08, colour = "white", linewidth = 0.25,
position = "identity", alpha = 0.75) +
scale_fill_manual(
values = c("Household" = "#E65100", "Workplace" = "#6A1B9A", "Social" = "#00838F"),
name = "Contact Type"
) +
facet_wrap(~contact_type, ncol = 3) +
labs(
title = "Transmission Probability by Contact Type",
x = "Transmission Probability", y = "Count"
) +
theme_minimal(base_size = 11) +
theme(legend.position = "none",
strip.text = element_text(face = "bold"),
plot.title = element_text(face = "bold"))6 Key Findings & Recommendations
top5 <- nodes %>%
arrange(desc(betweenness)) %>%
slice_head(n = 5) %>%
select(Individual = name, Community = community,
Status = status, Degree = degree_cent,
Betweenness = betweenness) %>%
mutate(across(c(Degree, Betweenness), ~round(., 3)))
kable(top5, caption = "Top 5 individuals by betweenness centrality (highest intervention priority)")| Individual | Community | Status | Degree | Betweenness |
|---|---|---|---|---|
| P17 | Community 3 | Susceptible | 0.172 | 0.358 |
| P13 | Community 2 | Susceptible | 0.138 | 0.266 |
| P07 | Community 1 | Recovered | 0.172 | 0.236 |
| P10 | Community 2 | Recovered | 0.138 | 0.181 |
| P02 | Community 1 | Susceptible | 0.138 | 0.174 |
Key findings:
- Attack rate stands at 23.3% — indicating active community spread.
- Top superspreaders (Table above) bridge multiple communities and should be prioritised for contact tracing, testing, and isolation.
- Household contacts carry the highest per-contact risk; workplace contacts drive the most connections by volume.
- Cross-community bridges are the primary mechanism of inter-cluster spread; targeted intervention on these links can significantly reduce outbreak size.
Recommended actions:
- Immediately isolate/test the top 5 high-betweenness individuals.
- Strengthen household transmission controls (ventilation, masking) in communities with the highest infection rates.
- Deploy targeted contact tracing along workplace contact chains.
- Monitor and reinforce cross-community bridge edges with ring-vaccination or prophylactic treatment.