# =========================
# 1. LOAD PACKAGES
# =========================
# if (!requireNamespace("igraph", quietly = TRUE)) install.packages("igraph")
# if (!requireNamespace("visNetwork", quietly = TRUE)) install.packages("visNetwork")
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(visNetwork)
# =========================
# 2. FILE NAMES
# =========================
files <- c(
"control_01.report",
"control_02.report",
"treatment_01.report",
"treatment_02_report"
)
# =========================
# 3. PARSE KRAKEN REPORTS (GENUS LEVEL)
# =========================
data_list <- lapply(files, function(f) {
lines <- readLines(f)
parsed <- lapply(lines, function(line) {
parts <- strsplit(line, "\t")[[1]]
if (length(parts) < 6) return(NULL)
data.frame(
percent = as.numeric(parts[1]),
rank = parts[4],
name = trimws(parts[6]),
stringsAsFactors = FALSE
)
})
df <- do.call(rbind, parsed)
# keep genus level
df <- df[df$rank == "G", ]
df <- df[, c("name", "percent")]
colnames(df) <- c("Taxon", f)
df
})
# =========================
# 4. MERGE INTO MATRIX
# =========================
merged <- Reduce(function(x, y) merge(x, y, by="Taxon", all=TRUE), data_list)
merged[is.na(merged)] <- 0
rownames(merged) <- merged$Taxon
merged$Taxon <- NULL
data_t <- t(merged)
# =========================
# 2. START FROM ALL TAXA
# data_t should already exist:
# rows = samples, cols = taxa
# =========================
mat <- data_t
# =========================
# SELECT MOST RELEVANT TAXA (~25%)
# =========================
# remove zero-variance taxa
vars <- apply(mat, 2, var, na.rm = TRUE)
mat <- mat[, vars > 0, drop = FALSE]
# compute abundance + variance scores
means <- colMeans(mat, na.rm = TRUE)
variances <- apply(mat, 2, var, na.rm = TRUE)
# normalize both to same scale
means_scaled <- (means - min(means)) / (max(means) - min(means))
vars_scaled <- (variances - min(variances)) / (max(variances) - min(variances))
# combined score (equal weight)
score <- means_scaled + vars_scaled
# keep top 25%
n_keep <- ceiling(length(score) * 0.25)
top_taxa <- names(sort(score, decreasing = TRUE))[1:n_keep]
mat <- mat[, top_taxa, drop = FALSE]
# check result
dim(mat)
## [1] 4 41
# remove taxa with zero variance across samples
vars <- apply(mat, 2, var, na.rm = TRUE)
mat <- mat[, vars > 0, drop = FALSE]
# keep only the most informative taxa so the graph is readable
# change 80 to 50 or 100 if needed
taxa_means <- colMeans(mat, na.rm = TRUE)
top_n <- min(80, ncol(mat))
top_taxa <- names(sort(taxa_means, decreasing = TRUE))[1:top_n]
mat <- mat[, top_taxa, drop = FALSE]
# =========================
# 3. CORRELATION MATRIX
# =========================
cor_matrix <- cor(mat, method = "spearman", use = "pairwise.complete.obs")
# =========================
# 4. THRESHOLD EDGES
# =========================
threshold <- 0.6
adj_signed <- cor_matrix
adj_signed[abs(adj_signed) < threshold] <- 0
diag(adj_signed) <- 0
# remove isolated nodes
keep_nodes <- colSums(abs(adj_signed)) > 0
adj_signed <- adj_signed[keep_nodes, keep_nodes, drop = FALSE]
# safety check
if (ncol(adj_signed) == 0) {
stop("No nodes survived the threshold. Lower 'threshold' to 0.5 or 0.4.")
}
# =========================
# 5. BUILD GRAPH
# use absolute weights for layout only
# =========================
adj_abs <- abs(adj_signed)
graph <- graph_from_adjacency_matrix(
adj_abs,
mode = "undirected",
weighted = TRUE,
diag = FALSE
)
# =========================
# 6. EDGE COLORS
# blue = positive, red = negative
# =========================
edge_list <- as_edgelist(graph)
edge_colors <- apply(edge_list, 1, function(e) {
val <- adj_signed[e[1], e[2]]
if (val > 0) {
"#2563EB" # blue
} else {
"#DC2626" # red
}
})
E(graph)$color <- edge_colors
E(graph)$width <- pmax(E(graph)$weight * 6, 1)
# =========================
# 7. NODE COLORS
# red = higher in ACVD
# blue = higher in control
# =========================
group <- c("control", "control", "ACVD", "ACVD")
diff <- colMeans(mat[group == "ACVD", , drop = FALSE], na.rm = TRUE) -
colMeans(mat[group == "control", , drop = FALSE], na.rm = TRUE)
node_diff <- diff[match(V(graph)$name, names(diff))]
V(graph)$color <- ifelse(node_diff > 0, "#DC2626", "#2563EB")
# =========================
# 8. NODE SIZE + LABELS
# =========================
deg <- degree(graph)
V(graph)$size <- scales::rescale(deg, to = c(4, 10))
V(graph)$label <- V(graph)$name
V(graph)$label.cex <- 1.0
# =========================
# 9. LAYOUT
# more iterations = more spread
# =========================
set.seed(1)
lay <- layout_with_fr(graph, niter = 5000)
# expand layout a bit more
lay <- lay * 1.8
# =========================
# 10. STATIC PLOT
# =========================
plot(
graph,
layout = lay,
vertex.size = V(graph)$size,
vertex.color = V(graph)$color,
vertex.label = V(graph)$label,
vertex.label.cex = 1.0,
vertex.label.color = "black",
vertex.frame.color = NA,
edge.width = E(graph)$width,
edge.color = E(graph)$color,
edge.curved = 0.15,
margin = 0.2
)
# =========================
# 11. INTERACTIVE VERSION
# hover / click / zoom
# =========================
nodes <- data.frame(
id = V(graph)$name,
label = V(graph)$name,
color = V(graph)$color,
value = V(graph)$size,
title = paste0(
"<b>", V(graph)$name, "</b><br>",
"Degree: ", degree(graph)
),
font.size = 40
)
edges <- data.frame(
from = edge_list[, 1],
to = edge_list[, 2],
color = E(graph)$color,
width = E(graph)$width,
title = paste0("Correlation sign: ", ifelse(E(graph)$color == "#2563EB", "positive", "negative"))
)
visNetwork(nodes, edges, width = "100%", height = "800px") %>%
visEdges(smooth = list(enabled = TRUE, type = "dynamic")) %>%
visNodes(font = list(size = 28, face = "arial")) %>%
visPhysics(
solver = "forceAtlas2Based",
forceAtlas2Based = list(
gravitationalConstant = -80,
centralGravity = 0.005,
springLength = 180,
springConstant = 0.08,
damping = 0.9,
avoidOverlap = 1
),
stabilization = list(enabled = TRUE, iterations = 2000)
) %>%
visInteraction(
hover = TRUE,
navigationButtons = TRUE,
zoomView = TRUE,
dragView = TRUE
) %>%
visOptions(
highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
nodesIdSelection = TRUE
)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.