# =========================
# 1. LOAD PACKAGES
# =========================
# if (!requireNamespace("igraph", quietly = TRUE)) install.packages("igraph")
# if (!requireNamespace("visNetwork", quietly = TRUE)) install.packages("visNetwork")

library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(visNetwork)


# =========================
# 2. FILE NAMES
# =========================
files <- c(
  "control_01.report",
  "control_02.report",
  "treatment_01.report",
  "treatment_02_report"
)

# =========================
# 3. PARSE KRAKEN REPORTS (GENUS LEVEL)
# =========================
data_list <- lapply(files, function(f) {
  
  lines <- readLines(f)
  
  parsed <- lapply(lines, function(line) {
    parts <- strsplit(line, "\t")[[1]]
    
    if (length(parts) < 6) return(NULL)
    
    data.frame(
      percent = as.numeric(parts[1]),
      rank = parts[4],
      name = trimws(parts[6]),
      stringsAsFactors = FALSE
    )
  })
  
  df <- do.call(rbind, parsed)
  
  # keep genus level
  df <- df[df$rank == "G", ]
  
  df <- df[, c("name", "percent")]
  colnames(df) <- c("Taxon", f)
  
  df
})

# =========================
# 4. MERGE INTO MATRIX
# =========================
merged <- Reduce(function(x, y) merge(x, y, by="Taxon", all=TRUE), data_list)
merged[is.na(merged)] <- 0

rownames(merged) <- merged$Taxon
merged$Taxon <- NULL

data_t <- t(merged)

# =========================
# 2. START FROM ALL TAXA
#    data_t should already exist:
#    rows = samples, cols = taxa
# =========================
mat <- data_t

# =========================
# SELECT MOST RELEVANT TAXA (~25%)
# =========================

# remove zero-variance taxa
vars <- apply(mat, 2, var, na.rm = TRUE)
mat <- mat[, vars > 0, drop = FALSE]

# compute abundance + variance scores
means <- colMeans(mat, na.rm = TRUE)
variances <- apply(mat, 2, var, na.rm = TRUE)

# normalize both to same scale
means_scaled <- (means - min(means)) / (max(means) - min(means))
vars_scaled  <- (variances - min(variances)) / (max(variances) - min(variances))

# combined score (equal weight)
score <- means_scaled + vars_scaled

# keep top 25%
n_keep <- ceiling(length(score) * 0.25)
top_taxa <- names(sort(score, decreasing = TRUE))[1:n_keep]

mat <- mat[, top_taxa, drop = FALSE]

# check result
dim(mat)
## [1]  4 41
# remove taxa with zero variance across samples
vars <- apply(mat, 2, var, na.rm = TRUE)
mat <- mat[, vars > 0, drop = FALSE]


# keep only the most informative taxa so the graph is readable
# change 80 to 50 or 100 if needed
taxa_means <- colMeans(mat, na.rm = TRUE)
top_n <- min(80, ncol(mat))
top_taxa <- names(sort(taxa_means, decreasing = TRUE))[1:top_n]
mat <- mat[, top_taxa, drop = FALSE]

# =========================
# 3. CORRELATION MATRIX
# =========================
cor_matrix <- cor(mat, method = "spearman", use = "pairwise.complete.obs")

# =========================
# 4. THRESHOLD EDGES
# =========================
threshold <- 0.6

adj_signed <- cor_matrix
adj_signed[abs(adj_signed) < threshold] <- 0
diag(adj_signed) <- 0

# remove isolated nodes
keep_nodes <- colSums(abs(adj_signed)) > 0
adj_signed <- adj_signed[keep_nodes, keep_nodes, drop = FALSE]

# safety check
if (ncol(adj_signed) == 0) {
  stop("No nodes survived the threshold. Lower 'threshold' to 0.5 or 0.4.")
}

# =========================
# 5. BUILD GRAPH
#    use absolute weights for layout only
# =========================
adj_abs <- abs(adj_signed)

graph <- graph_from_adjacency_matrix(
  adj_abs,
  mode = "undirected",
  weighted = TRUE,
  diag = FALSE
)

# =========================
# 6. EDGE COLORS
#    blue = positive, red = negative
# =========================
edge_list <- as_edgelist(graph)

edge_colors <- apply(edge_list, 1, function(e) {
  val <- adj_signed[e[1], e[2]]
  if (val > 0) {
    "#2563EB"   # blue
  } else {
    "#DC2626"   # red
  }
})

E(graph)$color <- edge_colors
E(graph)$width <- pmax(E(graph)$weight * 6, 1)

# =========================
# 7. NODE COLORS
#    red = higher in ACVD
#    blue = higher in control
# =========================
group <- c("control", "control", "ACVD", "ACVD")

diff <- colMeans(mat[group == "ACVD", , drop = FALSE], na.rm = TRUE) -
  colMeans(mat[group == "control", , drop = FALSE], na.rm = TRUE)

node_diff <- diff[match(V(graph)$name, names(diff))]
V(graph)$color <- ifelse(node_diff > 0, "#DC2626", "#2563EB")

# =========================
# 8. NODE SIZE + LABELS
# =========================
deg <- degree(graph)
V(graph)$size <- scales::rescale(deg, to = c(4, 10))
V(graph)$label <- V(graph)$name
V(graph)$label.cex <- 1.0

# =========================
# 9. LAYOUT
#    more iterations = more spread
# =========================
set.seed(1)
lay <- layout_with_fr(graph, niter = 5000)

# expand layout a bit more
lay <- lay * 1.8

# =========================
# 10. STATIC PLOT
# =========================
plot(
  graph,
  layout = lay,
  vertex.size = V(graph)$size,
  vertex.color = V(graph)$color,
  vertex.label = V(graph)$label,
  vertex.label.cex = 1.0,
  vertex.label.color = "black",
  vertex.frame.color = NA,
  edge.width = E(graph)$width,
  edge.color = E(graph)$color,
  edge.curved = 0.15,
  margin = 0.2
)

# =========================
# 11. INTERACTIVE VERSION
#    hover / click / zoom
# =========================
nodes <- data.frame(
  id = V(graph)$name,
  label = V(graph)$name,
  color = V(graph)$color,
  value = V(graph)$size,
  title = paste0(
    "<b>", V(graph)$name, "</b><br>",
    "Degree: ", degree(graph)
  ),
  font.size = 40
)

edges <- data.frame(
  from = edge_list[, 1],
  to = edge_list[, 2],
  color = E(graph)$color,
  width = E(graph)$width,
  title = paste0("Correlation sign: ", ifelse(E(graph)$color == "#2563EB", "positive", "negative"))
)

visNetwork(nodes, edges, width = "100%", height = "800px") %>%
  visEdges(smooth = list(enabled = TRUE, type = "dynamic")) %>%
  visNodes(font = list(size = 28, face = "arial")) %>%
  visPhysics(
    solver = "forceAtlas2Based",
    forceAtlas2Based = list(
      gravitationalConstant = -80,
      centralGravity = 0.005,
      springLength = 180,
      springConstant = 0.08,
      damping = 0.9,
      avoidOverlap = 1
    ),
    stabilization = list(enabled = TRUE, iterations = 2000)
  ) %>%
  visInteraction(
    hover = TRUE,
    navigationButtons = TRUE,
    zoomView = TRUE,
    dragView = TRUE
  ) %>%
  visOptions(
    highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
    nodesIdSelection = TRUE
  )

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.