Code
library(gggenomes)
library(ggnewscale)
library(dplyr)
library(ggplot2)library(gggenomes)
library(ggnewscale)
library(dplyr)
library(ggplot2)We have 3 bacterial genomes (strains A, B, C) with:
# ── SEQUENCES (one row per genome/contig) ──────────────────────────
seqs <- tibble(
seq_id = c("GenomeA", "GenomeB", "GenomeC"),
seq_desc = c("Strain A", "Strain B", "Strain C"),
length = c(12000, 11500, 13000)
)
# ── GENES (features on each genome) ───────────────────────────────
genes <- tibble(
seq_id = c(
rep("GenomeA", 6),
rep("GenomeB", 5),
rep("GenomeC", 6)
),
start = c(
500, 2000, 4000, 6000, 8500, 10500,
400, 2200, 4500, 7000, 9500,
300, 2100, 4200, 6500, 8800, 11000
),
end = c(
1200, 3000, 5200, 7200, 9800, 11500,
1100, 3300, 5800, 8200, 11000,
1100, 3100, 5500, 7700, 10200, 12500
),
strand = c(
1, -1, 1, 1, -1, 1,
1, -1, 1, -1, 1,
1, 1, -1, 1, -1, 1
),
name = c(
"dnaA","gyrB","recA","mutS","rpoB","16S",
"dnaA","gyrB","recA","rpoB","16S",
"dnaA","gyrB","recA","mutS","rpoB","16S"
),
feat_id = paste0("g", seq_len(17))
)
links <- tibble(
seq_id = c("GenomeA","GenomeA","GenomeA","GenomeB","GenomeB"),
start = c(500, 2000, 4000, 400, 2200),
end = c(1200, 3000, 5200, 1100, 3300),
seq_id2 = c("GenomeB","GenomeB","GenomeC","GenomeC","GenomeC"),
start2 = c(400, 2200, 300, 2100, 4200),
end2 = c(1100, 3300, 1100, 3100, 5500),
identity = c(98.2, 91.5, 95.0, 88.3, 92.1)
)gene_colors <- c(
dnaA = "#E63946",
gyrB = "#F4A261",
recA = "#2A9D8F",
mutS = "#457B9D",
rpoB = "#8338EC",
`16S` = "#06D6A0"
)
gggenomes(seqs = seqs, genes = genes, links = links) +
geom_link(aes(fill = identity), alpha = 0.6) +
scale_fill_gradient(
low = "#cce5f6", high = "#084c8d",
name = "BLAST Identity (%)",
guide = guide_colorbar(order = 2)
) +
geom_seq() +
geom_seq_label() +
new_scale_fill() +
geom_gene(aes(fill = name), size = 5) +
scale_fill_manual(
values = gene_colors,
name = "Gene / Function",
guide = guide_legend(order = 1)
) +
geom_gene_tag(aes(label = name), size = 2.8, nudge_y = 0.12) +
theme_gggenomes_clean() +
labs(
title = "Comparative Genomics: Strains A, B & C"
) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10, color = "grey40"),
legend.position = "right"
)| Link | Identity | Interpretation |
|---|---|---|
A ↔︎ B (dnaA) |
98.2% | Near-identical replication origin, very close strains |
A ↔︎ B (gyrB) |
91.5% | Moderate divergence in DNA gyrase |
A ↔︎ C (dnaA) |
95.0% | Good conservation but more distant than A–B |
B ↔︎ C (gyrB) |
88.3% | Lowest identity, likely different clade |
B ↔︎ C (recA) |
92.1% | Moderate conservation of recombination gene |
dnaA, gyrB, recA, rpoB, 16S) present in all 3 genomes, housekeeping genes under purifying selection.mutS (mismatch repair) absent in Genome B → possible loss-of-function, elevated mutation rate predicted.gyrB and mutS across strains suggest genomic rearrangements or inversion events.Strains A and B are most closely related (highest synteny identity). Strain C shares conserved core functions but has diverged in gene order and genome size, consistent with a more distant phylogenetic position.