# ==== EDIT THESE THREE LINES ====
filename <- "ClassProject_votus_6samples_coverm.tsv" # Excel file name
tpm_threshold <- 10 # keep vOTUs with max TPM > this
heatmap_colors <- c("#440154", "#31688e", "#35b779", "#fde725")
# =================================
library(readr)
library(readxl)
library(pheatmap)
# 1. Read the Excel file
cov <- read_tsv(filename)
## Rows: 72 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Contig
## dbl (18): sample1_jbs172_sorted Mean, sample1_jbs172_sorted RPKM, sample1_jb...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 2. Keep Contig and TPM columns only
tpm_cols <- grepl("TPM$", names(cov))
cov_tpm <- cov[ , c("Contig", names(cov)[tpm_cols])]
# Remove S1k141_26921_full
cov_tpm <- subset(cov_tpm, Contig != "S1k141_26921||full")
# 3. Optional filter: drop very low-abundance vOTUs
cov_tpm$max_tpm <- apply(cov_tpm[ , -1], 1, max, na.rm = TRUE)
cov_tpm <- subset(cov_tpm, max_tpm > tpm_threshold)
cov_tpm$max_tpm <- NULL
# If everything got filtered out (threshold too high), warn and stop
if (nrow(cov_tpm) == 0) {
stop("No vOTUs passed the TPM threshold. Try lowering tpm_threshold.")
}
# 4. Make matrix for heatmap (rows = vOTUs, cols = samples)
mat <- as.matrix(cov_tpm[ , -1])
rownames(mat) <- cov_tpm$Contig
# 5. Log-transform for nicer color scaling
mat_log <- log10(mat + 1)
# 6. Draw heat map
pheatmap(mat_log,
cluster_rows = TRUE,
cluster_cols = TRUE,
scale = "none",
color = colorRampPalette(heatmap_colors)(100),
fontsize_row = 8,
fontsize_col = 10,
main = "vOTU relative abundance (log10 TPM + 1)")

## Each session:
library(vegan)
## Loading required package: permute
library(pheatmap)
library(RColorBrewer)
library(readr)
## 1. Read your exact file
abund_raw <- read_tsv(
"ClassProject_votus_6samples_coverm.tsv"
)
## Rows: 72 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Contig
## dbl (18): sample1_jbs172_sorted Mean, sample1_jbs172_sorted RPKM, sample1_jb...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## 2. First column is vOTU / Contig ID. Make that the rownames.
rownames(abund_raw) <- abund_raw$Contig
## Warning: Setting row names on a tibble is deprecated.
## 3. Drop the Contig column so we have a numeric matrix only
abund <- abund_raw[, -1]
## 4. Quick sanity check
dim(abund)
## [1] 72 18
head(rownames(abund)) # vOTU IDs
## [1] "1" "2" "3" "4" "5" "6"
colnames(abund) # sample1_jbs172_sorted TPM, etc.
## [1] "sample1_jbs172_sorted Mean" "sample1_jbs172_sorted RPKM"
## [3] "sample1_jbs172_sorted TPM" "sample2_mjd356_sorted Mean"
## [5] "sample2_mjd356_sorted RPKM" "sample2_mjd356_sorted TPM"
## [7] "sample3_mrk143_sorted Mean" "sample3_mrk143_sorted RPKM"
## [9] "sample3_mrk143_sorted TPM" "sample6_mmw162_sorted Mean"
## [11] "sample6_mmw162_sorted RPKM" "sample6_mmw162_sorted TPM"
## [13] "sample7_pj288_sorted Mean" "sample7_pj288_sorted RPKM"
## [15] "sample7_pj288_sorted TPM" "SRR37587572_sorted Mean"
## [17] "SRR37587572_sorted RPKM" "SRR37587572_sorted TPM"
##per sample richness and diversity
## Transpose: now rows = samples, columns = vOTUs
abund_t <- t(abund)
## Optional: drop samples with total abundance = 0
abund_t <- abund_t[rowSums(abund_t) > 0, , drop = FALSE]
## 1. Species richness per sample (number of vOTUs with >0)
richness <- specnumber(abund_t) # named vector, names are sample IDs
## 2. Shannon diversity per sample
shannon <- diversity(abund_t, index = "shannon")
abund <- abund_raw[, -1]
# Force numeric
abund <- as.data.frame(lapply(abund, as.numeric))
sapply(abund, is.numeric)
## sample1_jbs172_sorted.Mean sample1_jbs172_sorted.RPKM
## TRUE TRUE
## sample1_jbs172_sorted.TPM sample2_mjd356_sorted.Mean
## TRUE TRUE
## sample2_mjd356_sorted.RPKM sample2_mjd356_sorted.TPM
## TRUE TRUE
## sample3_mrk143_sorted.Mean sample3_mrk143_sorted.RPKM
## TRUE TRUE
## sample3_mrk143_sorted.TPM sample6_mmw162_sorted.Mean
## TRUE TRUE
## sample6_mmw162_sorted.RPKM sample6_mmw162_sorted.TPM
## TRUE TRUE
## sample7_pj288_sorted.Mean sample7_pj288_sorted.RPKM
## TRUE TRUE
## sample7_pj288_sorted.TPM SRR37587572_sorted.Mean
## TRUE TRUE
## SRR37587572_sorted.RPKM SRR37587572_sorted.TPM
## TRUE TRUE
## 3. Combine into a data.frame
alpha_div <- data.frame(
Sample = rownames(abund_t),
Richness = richness,
Shannon = shannon
)
alpha_div
## Sample Richness Shannon
## sample1_jbs172_sorted Mean sample1_jbs172_sorted Mean 17 0.07833997
## sample1_jbs172_sorted RPKM sample1_jbs172_sorted RPKM 17 0.08150328
## sample1_jbs172_sorted TPM sample1_jbs172_sorted TPM 17 0.08150325
## sample2_mjd356_sorted Mean sample2_mjd356_sorted Mean 15 0.05610716
## sample2_mjd356_sorted RPKM sample2_mjd356_sorted RPKM 15 0.05830023
## sample2_mjd356_sorted TPM sample2_mjd356_sorted TPM 15 0.05830022
## sample3_mrk143_sorted Mean sample3_mrk143_sorted Mean 47 0.39097504
## sample3_mrk143_sorted RPKM sample3_mrk143_sorted RPKM 48 0.41819529
## sample3_mrk143_sorted TPM sample3_mrk143_sorted TPM 48 0.41819532
## sample6_mmw162_sorted Mean sample6_mmw162_sorted Mean 27 1.67629073
## sample6_mmw162_sorted RPKM sample6_mmw162_sorted RPKM 27 1.77657424
## sample6_mmw162_sorted TPM sample6_mmw162_sorted TPM 27 1.77657421
## sample7_pj288_sorted Mean sample7_pj288_sorted Mean 31 2.14928297
## sample7_pj288_sorted RPKM sample7_pj288_sorted RPKM 31 2.20659071
## sample7_pj288_sorted TPM sample7_pj288_sorted TPM 31 2.20659070
## Barplot of richness PER SAMPLE
barplot(
alpha_div$Richness,
names.arg = alpha_div$Sample, # force sample IDs on x-axis
las = 2,
ylab = "vOTU richness (count)",
main = "Per-sample viral richness"
)

## Barplot of Shannon PER SAMPLE
barplot(
alpha_div$Shannon,
names.arg = alpha_div$Sample,
las = 2,
ylab = "Shannon diversity",
main = "Per-sample viral diversity"
)
