This analysis performs differential gene expression analysis on RNA-seq data from Crow et al. (2020), examining maize apical tissue samples across multiple experimental factors:
The analysis uses the limma-voom pipeline to identify genes that respond to:
This model describes the log-transformed expression \(y_{g,i}\) of gene \(g\) in sample \(i\):
\[ \begin{aligned} y_{g,i} &= \beta_{g,0} + \beta_{g,D}\,D_i + \sum_{t=1}^{4}\beta_{g,T_t}\,T_{t,i} + \beta_{g,\text{temp}}\,\text{temp}_i \\ &\quad + \beta_{g,G}\,G_i + \beta_{g,\text{temp} \times G}\,(\text{temp}_i \times G_i) + \varepsilon_{g,i} \end{aligned} \]
with \(\varepsilon_{g,i} \sim N(0, \phi_i\sigma^2_{g})\)
| Symbol | Description | Notes |
|---|---|---|
| \(y_{g,i}\) | log\(_2\)(CPM) expression | Response variable |
| \(\beta_{g,0}\) | Intercept | Baseline: PT, V1_Root, CTRL temp, CTRL genotype |
| \(\beta_{g,D}\) | Donor effect (Mi21 vs PT) | Batch correction |
| \(\beta_{g,T_t}\) | Tissue effects | 4 dummy variables (V1_Root is reference) |
| \(\beta_{g,\text{temp}}\) | Cold temperature effect | Main effect |
| \(\beta_{g,G}\) | Inv4m genotype effect | Main effect |
| \(\beta_{g,\text{temp} \times G}\) | temperature × Genotype | Interaction (key effect) |
| \(\varepsilon_{g,i}\) | Residual error | Voom-weighted |
is_DEG)is_hiconf_DEG)is_selected_DEG)library(edgeR) # Differential expression analysis
library(limma) # Linear models for RNA-seq
library(rtracklayer) # Genomic annotation handling
library(GenomicRanges) # Genomic ranges operations
library(dplyr) # Data manipulation
library(tidyr) # Data manipulation
library(stringr) # String manipulation
library(ggplot2) # Plotting
library(ggpubr) # Publication ready plots
library(ggfx) # Extra effects for ggplots (shadows)
library(ggtext) # Formatted text in plots
library(robustbase) # Robust statistics (MCD for Mahalanobis)
counts <- read.csv("~/Desktop/crow2020_gene_xp.csv")
{
cat("Loaded expression data:\n")
cat(" Dimensions:", dim(counts), "\n")
cat(" Genes:", nrow(counts), "\n")
cat(" Samples:", ncol(counts) - 1, "\n")
}
## Loaded expression data:
## Dimensions: 39756 347
## Genes: 39756
## Samples: 346
quant_dir <- "/Volumes/DOE_CAREER/inv4m/run/results_quantify/kallisto_quant"
# Define paths
sra_file <- "/Volumes/DOE_CAREER/inv4m/crow2020/SraRunTable.csv"
sample_submission_file <- "/Volumes/DOE_CAREER/inv4m/crow2020/gc7_sample_submission.csv"
# Load data
sra_meta <- read.csv(sra_file, stringsAsFactors = FALSE, check.names = FALSE)
submission_meta <- read.csv(sample_submission_file, stringsAsFactors = FALSE)
# Standardize SRA metadata
sra_meta <- sra_meta %>%
rename(
samp = `Library Name`,
genotype = genotype,
temp = temp,
tissue = tissue,
Run = Run
)
# Join core metadata and filter to NIL lines of interest
metadata <- submission_meta %>%
filter(old_line %in% c("PT_NIL", "Mi21_NIL")) %>%
select(samp, old_line, ID, source, harvest_date, tube_num_full) %>%
inner_join(sra_meta, by = "samp") %>%
arrange(Run)
# Create analysis factors and sampling covariates
metadata <- metadata %>%
mutate(
# === Biological factors ===
# Donor
Donor = case_when(
grepl("^PT_", old_line) ~ "PT",
grepl("^Mi21_", old_line) ~ "Mi21",
TRUE ~ NA_character_
),
Donor = factor(Donor, levels = c("PT", "Mi21")),
# Genotype
Genotype = factor(genotype, levels = c("B73", "Inv4m")),
Genotype = recode(Genotype, "B73" = "CTRL", "Inv4m" = "Inv4m"),
# temperature
temp = factor(temp, levels = c("warm", "cold")),
temp = recode(temp, "warm" = "CTRL", "cold" = "cold"),
# Tissue — with leaf subtypes
Tissue = case_when(
tissue == "V1_Leaf_tissue" ~ "V1_Leaf",
tissue == "V1_Primary_root" ~ "V1_Root",
tissue == "V3_Leaf_tissue" ~ "V3_Leaf",
tissue == "V3_leaf_base" ~ "V3_Leaf_Base",
tissue == "V3_leaf_sheath" ~ "V3_Leaf_Sheath",
tissue == "V3_S2_leaf_base" ~ "V3_S2_Leaf_Base",
tissue == "V3_S2_leaf_tip" ~ "V3_S2_Leaf_Tip",
tissue == "V3_Primary_root" ~ "V3_Root",
tissue == "V3_Stem_SAM" ~ "V3_SAM",
TRUE ~ NA_character_
),
Tissue = factor(Tissue, levels = c(
"V1_Root", "V3_Root", "V1_Leaf", "V3_Leaf",
"V3_Leaf_Base", "V3_Leaf_Sheath",
"V3_S2_Leaf_Base", "V3_S2_Leaf_Tip", "V3_SAM"
)),
# === Sampling & technical covariates ===
# Unique plant identifier (from submission metadata)
plant_id = ID,
# Collection team (source column = technician/team ID)
collector_team = as.factor(source),
# Harvest date (biological sampling day)
harvest_date = as.Date(harvest_date, format = "%m/%d/%Y"),
# Plant serial number (numeric suffix of ID; proxy for planting/sampling order)
plant_serial_number = as.numeric(stringr::str_sub(ID, 5)),
# Within-plant tissue order (0 = leaf, 1 = root, 2 = base, etc.)
tissue_order_index = as.numeric(
stringr::str_extract(tube_num_full, "\\.[0-9]+")
) %>%
dplyr::coalesce(0) %>%
abs(),
# SRA upload time (proxy for library prep + sequencing batch)
sra_upload_time = as.POSIXct(create_date, format = "%Y-%m-%dT%H:%M:%OS", tz = "UTC"),
# Combined harvest batch (date + team)
harvest_batch = interaction(harvest_date, collector_team, drop = TRUE)
) %>%
# Remove samples with unmapped tissue
filter(!is.na(Tissue))
metadata$harvest_date_f <- factor(metadata$harvest_date)
{
cat("\nSample metadata:\n")
cat(" Total samples:", nrow(metadata), "\n")
cat(" Genotypes:", paste(unique(metadata$Genotype), collapse = ", "), "\n")
cat(" temperatures:", paste(unique(metadata$temp), collapse = ", "), "\n")
cat(" Donors:", paste(unique(metadata$Donor), collapse = ", "), "\n")
cat(" Tissues:", paste(levels(metadata$Tissue), collapse = ", "), "\n")
cat(" Collection teams:", paste(levels(metadata$collector_team), collapse = ", "), "\n")
cat(" Harvest dates:", paste(sort(unique(metadata$harvest_date)), collapse = ", "), "\n")
cat("\nTissue counts:\n")
print(sort(table(metadata$Tissue), decreasing = TRUE))
}
##
## Sample metadata:
## Total samples: 346
## Genotypes: CTRL, Inv4m
## temperatures: CTRL, cold
## Donors: PT, Mi21
## Tissues: V1_Root, V3_Root, V1_Leaf, V3_Leaf, V3_Leaf_Base, V3_Leaf_Sheath, V3_S2_Leaf_Base, V3_S2_Leaf_Tip, V3_SAM
## Collection teams: 1, 2
## Harvest dates: 2017-03-07, 2017-03-08, 2017-03-09, 2017-03-13, 2017-03-14, 2017-03-15, 2017-03-16, 2017-03-17, 2017-03-24, 2017-03-28, 2017-04-27, 2017-04-28, 2017-05-02, 2017-05-03, 2017-05-04, 2017-05-12, 2017-05-15, 2017-05-16
##
## Tissue counts:
##
## V3_Root V1_Root V3_SAM V1_Leaf V3_S2_Leaf_Base
## 42 41 40 39 38
## V3_Leaf V3_Leaf_Sheath V3_Leaf_Base V3_S2_Leaf_Tip
## 37 37 36 36
{
cat("\n=== Experimental Design Balance ===\n")
cat("\nDonor × Genotype × temperature:\n")
print(with(metadata, table(Donor, Genotype, temp)))
cat("\nTissue × Genotype × temperature:\n")
print(with(metadata, table(Tissue, Genotype, temp)))
cat("\nDonor × Genotype (overall):\n")
print(with(metadata, table(Donor, Genotype)))
}
##
## === Experimental Design Balance ===
##
## Donor × Genotype × temperature:
## , , temp = CTRL
##
## Genotype
## Donor CTRL Inv4m
## PT 58 40
## Mi21 51 50
##
## , , temp = cold
##
## Genotype
## Donor CTRL Inv4m
## PT 42 31
## Mi21 39 35
##
##
## Tissue × Genotype × temperature:
## , , temp = CTRL
##
## Genotype
## Tissue CTRL Inv4m
## V1_Root 11 10
## V3_Root 13 11
## V1_Leaf 10 9
## V3_Leaf 12 9
## V3_Leaf_Base 12 11
## V3_Leaf_Sheath 13 11
## V3_S2_Leaf_Base 13 9
## V3_S2_Leaf_Tip 12 9
## V3_SAM 13 11
##
## , , temp = cold
##
## Genotype
## Tissue CTRL Inv4m
## V1_Root 11 9
## V3_Root 11 7
## V1_Leaf 12 8
## V3_Leaf 10 6
## V3_Leaf_Base 6 7
## V3_Leaf_Sheath 7 6
## V3_S2_Leaf_Base 8 8
## V3_S2_Leaf_Tip 8 7
## V3_SAM 8 8
##
##
## Donor × Genotype (overall):
## Genotype
## Donor CTRL Inv4m
## PT 100 71
## Mi21 90 85
# Extract gene IDs
gene_ids <- counts[, 1]
# Convert to matrix
counts_matrix <- as.matrix(counts[, -1])
rownames(counts_matrix) <- gene_ids
# Match sample names to metadata
# Assuming column names in counts are Run IDs
sample_order <- match(colnames(counts_matrix), metadata$Run)
metadata <- metadata[sample_order, ]
{
cat("\nCount matrix prepared:\n")
cat(" Genes:", nrow(counts_matrix), "\n")
cat(" Samples:", ncol(counts_matrix), "\n")
cat(" All samples matched:",
all(colnames(counts_matrix) == metadata$Run), "\n")
}
##
## Count matrix prepared:
## Genes: 39756
## Samples: 346
## All samples matched: TRUE
# Create DGEList with counts and sample information
y <- DGEList(counts = counts_matrix, samples = metadata)
# Define groups from experimental factors
y$samples$group <- interaction(
y$samples$temp,
y$samples$Genotype,
y$samples$Tissue
)
{
cat("\nDGEList object created\n")
cat(" Number of groups:", nlevels(y$samples$group), "\n")
head(y$samples)
}
##
## DGEList object created
## Number of groups: 36
# Keep genes with sufficient expression
keep <- filterByExpr(y, group = y$samples$group)
y_filtered <- y[keep, ]
{
cat("\nExpression filtering:\n")
cat(" Genes before:", nrow(y), "\n")
cat(" Genes after:", nrow(y_filtered), "\n")
cat(" Genes removed:", sum(!keep), "\n")
}
##
## Expression filtering:
## Genes before: 39756
## Genes after: 26490
## Genes removed: 13266
# Histogram of library sizes
hist(
y_filtered$samples$lib.size / 1e6,
main = "Library Size Distribution",
xlab = "Library Size (millions of reads)",
breaks = 20
)
{
cat("\nLibrary size summary:\n")
print(summary(y_filtered$samples$lib.size))
cat("\nSamples with lib.size < 10 million:",
sum(y_filtered$samples$lib.size < 1e7), "\n")
}
##
## Library size summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 203085 1797064 3155701 3597346 4886950 18016248
##
## Samples with lib.size < 10 million: 336
# Flag and remove low count libraries (< 10M reads)
y_filtered$samples$lowCount <- y_filtered$samples$lib.size < 1e5
y_filtered <- y_filtered[, !y_filtered$samples$lowCount]
{
cat("\nLow quality libraries removed:\n")
cat(" Retained:", ncol(y_filtered), "\n")
cat(" Removed:", sum(y$samples$lib.size < 1e5), "\n")
}
##
## Low quality libraries removed:
## Retained: 346
## Removed: 0
{
with(y_filtered$samples, {
cat("\n=== Sample Distribution After QC ===\n")
cat("\n-- Genotype --\n")
print(table(Genotype))
cat("\n-- Temperature --\n")
print(table(temp))
cat("\n-- Tissue --\n")
print(table(Tissue))
cat("\n-- Donor --\n")
print(table(Donor))
})
}
##
## === Sample Distribution After QC ===
##
## -- Genotype --
## Genotype
## CTRL Inv4m
## 190 156
##
## -- Temperature --
## temp
## CTRL cold
## 199 147
##
## -- Tissue --
## Tissue
## V1_Root V3_Root V1_Leaf V3_Leaf V3_Leaf_Base
## 41 42 39 37 36
## V3_Leaf_Sheath V3_S2_Leaf_Base V3_S2_Leaf_Tip V3_SAM
## 37 38 36 40
##
## -- Donor --
## Donor
## PT Mi21
## 171 175
# TMM normalization
y_filtered <- calcNormFactors(y_filtered, method = "TMM")
# Compute MDS
mds <- plotMDS(y_filtered, plot = FALSE)
# Use 10 dimensions based on scree plot
n_mds_dims <- 10
# Step 1: Prepare metadata columns (from your processed metadata)
meta_cols <- metadata %>%
select(
Run,
temp, Genotype, Donor, Tissue,
plant_id, collector_team, harvest_date,
plant_serial_number, tissue_order_index,
sra_upload_time, harvest_batch
)
# Step 2: Prepare DGE sample info (from y_filtered)
dge_cols <- y_filtered$samples %>%
select(Run, lib.size, norm.factors, group)
# Step 3: Prepare MDS coordinates
mds_coords <- tibble(Run = y_filtered$samples$Run)
for (i in 1:n_mds_dims) {
mds_coords[[paste0("dim", i)]] <- mds$eigen.vectors[, i] * sqrt(mds$var.explained[i])
}
# Step 4: Merge all three cleanly
d <- meta_cols %>%
inner_join(dge_cols, by = "Run") %>%
inner_join(mds_coords, by = "Run")
# Report variance explained
var_explained <- mds$var.explained[1:n_mds_dims]
cumvar_pct <- cumsum(var_explained) / sum(mds$var.explained[mds$var.explained > 0]) * 100
var_explained_df <- tibble(
dimension = paste0("dim", 1:n_mds_dims),
var_explained = round(var_explained, 4),
cum_var_pct = round(cumvar_pct, 1)
)
cat("Variance explained by first", n_mds_dims, "MDS dimensions:\n")
## Variance explained by first 10 MDS dimensions:
print(var_explained_df)
## # A tibble: 10 × 3
## dimension var_explained cum_var_pct
## <chr> <dbl> <dbl>
## 1 dim1 0.305 30.5
## 2 dim2 0.112 41.7
## 3 dim3 0.0524 47
## 4 dim4 0.0416 51.1
## 5 dim5 0.0292 54
## 6 dim6 0.0243 56.5
## 7 dim7 0.0201 58.5
## 8 dim8 0.0119 59.7
## 9 dim9 0.0107 60.7
## 10 dim10 0.0104 61.8
# Define all covariates to test
biological_covs <- c("temp", "Genotype", "Donor", "Tissue")
technical_covs <- c(
"collector_team",
"harvest_date",
"plant_serial_number",
"tissue_order_index",
"sra_upload_time"
)
all_covs <- c(biological_covs, technical_covs)
# Prepare a numeric version of d for correlation
d_numeric <- d %>%
mutate(
# Convert factors to numeric (ordered levels)
temp_n = as.numeric(temp),
Genotype_n = as.numeric(Genotype),
Donor_n = as.numeric(Donor),
Tissue_n = as.numeric(Tissue),
collector_team_n = as.numeric(collector_team),
# Date/time as numeric (days since origin)
harvest_date_n = as.numeric(harvest_date),
sra_upload_time_n = as.numeric(sra_upload_time)
# plant_serial_number and tissue_order_index are already numeric
)
# Get MDS dimension names (dim1 to dim10)
mds_dims <- paste0("dim", 1:10)
# Initialize results tibble
cor_results <- tibble(
dimension = character(),
covariate = character(),
correlation = numeric(),
p_value = numeric()
)
# Perform correlations
for (dim in mds_dims) {
for (cov in all_covs) {
cov_n <- paste0(cov, "_n")
# Skip if numeric version doesn't exist (e.g., already numeric)
if (!cov_n %in% names(d_numeric)) {
if (cov %in% c("plant_serial_number", "tissue_order_index")) {
cov_n <- cov # these are already numeric
} else {
next
}
}
x <- d_numeric[[dim]]
y <- d_numeric[[cov_n]]
# Skip if no variation
if (sd(x, na.rm = TRUE) == 0 || sd(y, na.rm = TRUE) == 0) {
next
}
# Spearman correlation (robust, handles ties)
test_result <- tryCatch({
cor.test(x, y, method = "spearman", exact = FALSE)
}, error = function(e) {
NULL
})
if (!is.null(test_result)) {
cor_results <- cor_results %>%
add_row(
dimension = dim,
covariate = cov,
correlation = test_result$estimate,
p_value = test_result$p.value
)
}
}
}
# Adjust p-values (FDR across all tests)
cor_results <- cor_results %>%
mutate(
adj_p_value = p.adjust(p_value, method = "fdr"),
significant = adj_p_value < 0.05,
correlation = round(correlation, 3),
p_value = signif(p_value, 3),
adj_p_value = signif(adj_p_value, 3)
) %>%
arrange(desc(abs(correlation)))
# Display top significant results
cat("\nTop significant MDS correlations (FDR < 0.05):\n")
##
## Top significant MDS correlations (FDR < 0.05):
print(cor_results %>% filter(significant) %>% head(15))
## # A tibble: 15 × 6
## dimension covariate correlation p_value adj_p_value significant
## <chr> <chr> <dbl> <dbl> <dbl> <lgl>
## 1 dim2 tissue_order_index -0.702 1.03e-52 9.25e-51 TRUE
## 2 dim7 temp 0.578 3.29e-32 1.48e-30 TRUE
## 3 dim2 Tissue -0.563 2.35e-30 7.05e-29 TRUE
## 4 dim5 temp -0.557 1.35e-29 3.05e-28 TRUE
## 5 dim7 plant_serial_number 0.545 3.38e-28 6.08e-27 TRUE
## 6 dim3 Tissue 0.535 5.10e-27 7.65e-26 TRUE
## 7 dim6 harvest_date 0.479 2.97e-21 3.82e-20 TRUE
## 8 dim4 temp -0.397 1.7 e-14 1.72e-13 TRUE
## 9 dim5 plant_serial_number -0.397 1.72e-14 1.72e-13 TRUE
## 10 dim7 harvest_date 0.352 1.54e-11 1.38e-10 TRUE
## 11 dim4 plant_serial_number -0.35 2.15e-11 1.76e-10 TRUE
## 12 dim8 harvest_date 0.321 1.04e- 9 7.81e- 9 TRUE
## 13 dim1 Tissue 0.307 5.50e- 9 3.81e- 8 TRUE
## 14 dim10 sra_upload_time -0.244 4.20e- 6 2.70e- 5 TRUE
## 15 dim5 harvest_date -0.22 3.66e- 5 2.2 e- 4 TRUE
# Optional: Save full results for later
mds_cor_full <- cor_results
library(ggpubr)
# 1. strongest effect
p1 <- ggplot(d, aes(x = dim1, y = dim2, color = harvest_date)) +
geom_point(size = 3) +
scale_color_viridis_c(option = "plasma", name = "Harvest Date") +
theme_classic(base_size = 14) +
theme(legend.position = "top")
# 2. Collection team (technical)
p2 <- ggplot(d, aes(x = dim1, y = dim2, color = collector_team)) +
geom_point(size = 3) +
theme_classic(base_size = 14) +
theme(legend.position = "top",
plot.margin = margin(r = 60))
# 3. Temperature (biological)
p3 <- ggplot(d, aes(x = dim1, y = dim2, color = temp)) +
geom_point(size = 3) +
theme_classic(base_size = 14) +
theme(legend.position = "top")
# 4. Genotype (biological)
p4 <- ggplot(d, aes(x = dim1, y = dim2, color = Genotype)) +
geom_point(size = 3) +
theme_classic(base_size = 14) +
theme(legend.position = "top",
plot.margin = margin(r = 60))
# 5. Donor (biological)
p5 <- ggplot(d, aes(x = dim1, y = dim2, color = Donor)) +
geom_point(size = 3) +
theme_classic(base_size = 14) +
theme(legend.position = "top",
plot.margin = margin(r = 60))
# 6. Tissue (biological)
p6 <- ggplot(d, aes(x = dim1, y = dim2, color = Tissue)) +
geom_point(size = 3) +
theme_classic(base_size = 14) +
theme(legend.position = "top",
plot.margin = margin(r = 80))
# Arrange in 2 columns, 3 rows
ggarrange(p1, p2, p3, p4, p5, p6, ncol = 2, nrow = 3)
# Design matrix
design <- model.matrix(
~ Donor + Tissue*Genotype+ temp*Genotype,
data = y_filtered$samples
)
# Voom transformation with precision weights
voomR <- voom(y_filtered, design = design, plot = TRUE)
# Save normalized expression
saveRDS(voomR$E, file = "../results/normalized_expression_logCPM_crow2020.rds")
saveRDS(voomR, file = "../results/normalized_expression_voom_object_crow2020.rds")
{
cat("Normalization factors range:",
range(y_filtered$samples$norm.factors), "\n\n")
cat("Design matrix:", nrow(design), "samples ×", ncol(design),
"coefficients\n")
cat("\nCoefficients:\n")
print(colnames(design))
cat("\nVoom expression matrix:", nrow(voomR$E), "genes ×",
ncol(voomR$E), "samples\n")
}
## Normalization factors range: 0.5098395 1.649046
##
## Design matrix: 346 samples × 21 coefficients
##
## Coefficients:
## [1] "(Intercept)" "DonorMi21"
## [3] "TissueV3_Root" "TissueV1_Leaf"
## [5] "TissueV3_Leaf" "TissueV3_Leaf_Base"
## [7] "TissueV3_Leaf_Sheath" "TissueV3_S2_Leaf_Base"
## [9] "TissueV3_S2_Leaf_Tip" "TissueV3_SAM"
## [11] "GenotypeInv4m" "tempcold"
## [13] "TissueV3_Root:GenotypeInv4m" "TissueV1_Leaf:GenotypeInv4m"
## [15] "TissueV3_Leaf:GenotypeInv4m" "TissueV3_Leaf_Base:GenotypeInv4m"
## [17] "TissueV3_Leaf_Sheath:GenotypeInv4m" "TissueV3_S2_Leaf_Base:GenotypeInv4m"
## [19] "TissueV3_S2_Leaf_Tip:GenotypeInv4m" "TissueV3_SAM:GenotypeInv4m"
## [21] "GenotypeInv4m:tempcold"
##
## Voom expression matrix: 26490 genes × 346 samples
# Remove collection order and donor batch effect for visualization
# Design matrix with ONLY technical effects to remove
design_batch <- model.matrix(
~ Donor,
data = metadata
)
# Remove ONLY these effects
corrected_logCPM <- removeBatchEffect(
voomR$E,
design = design_batch
)
# Save for downstream visualization
saveRDS(
corrected_logCPM,
"../results/normalized_expression_batch_corrected_crow2020.rds"
)
{
cat("\nBatch-corrected expression saved (for visualization only)\n")
cat("Use voomR for differential expression analysis\n")
}
##
## Batch-corrected expression saved (for visualization only)
## Use voomR for differential expression analysis
# Fit linear model
fit <- lmFit(voomR, design)
# Apply empirical Bayes moderation
ebfit <- eBayes(fit)
{
cat("Model fitted:", nrow(fit$coefficients), "genes ×",
ncol(fit$coefficients), "coefficients\n")
cat("\nSignificant genes per coefficient (FDR < 0.05):\n")
print(colSums(abs(decideTests(ebfit))))
}
## Model fitted: 26490 genes × 21 coefficients
##
## Significant genes per coefficient (FDR < 0.05):
## (Intercept) DonorMi21
## 24030 2009
## TissueV3_Root TissueV1_Leaf
## 197 20368
## TissueV3_Leaf TissueV3_Leaf_Base
## 20153 19640
## TissueV3_Leaf_Sheath TissueV3_S2_Leaf_Base
## 18818 18863
## TissueV3_S2_Leaf_Tip TissueV3_SAM
## 20418 14898
## GenotypeInv4m tempcold
## 175 14600
## TissueV3_Root:GenotypeInv4m TissueV1_Leaf:GenotypeInv4m
## 0 26
## TissueV3_Leaf:GenotypeInv4m TissueV3_Leaf_Base:GenotypeInv4m
## 29 28
## TissueV3_Leaf_Sheath:GenotypeInv4m TissueV3_S2_Leaf_Base:GenotypeInv4m
## 19 44
## TissueV3_S2_Leaf_Tip:GenotypeInv4m TissueV3_SAM:GenotypeInv4m
## 30 8
## GenotypeInv4m:tempcold
## 14
# Map coefficients (excluding Intercept and Donor batch)
predictor_map <- c(
"DonorMi21" = "Mi21",
"TissueV1_Leaf" = "V1L",
"TissueV3_Root" = "V3R",
"TissueV3_Leaf" = "V3L",
"TissueV3_Leaf_Base" = "V3LBase",
"TissueV3_Leaf_Sheath" = "V3LSheath",
"TissueV3_S2_Leaf_Base" = "V3L2Base",
"TissueV3_S2_Leaf_Tip" = "V3L2Tip",
"TissueV3_SAM" = "V3SAM",
"GenotypeInv4m" = "Inv4m",
"tempcold" = "cold",
"TissueV3_Root:GenotypeInv4m" = "V3R:Inv4m",
"TissueV1_Leaf:GenotypeInv4m" = "V1L:Inv4m",
"TissueV3_Leaf:GenotypeInv4m" = "V3L:Inv4m",
"TissueV3_Leaf_Base:GenotypeInv4m"= "V3LB:Inv4m",
"TissueV3_Leaf_Sheath:GenotypeInv4m" = "V3LS:Inv4m",
"TissueV3_S2_Leaf_Base:GenotypeInv4m" = "V3L2B:Inv4m",
"TissueV3_S2_Leaf_Tip:GenotypeInv4m" = "V3L2T:Inv4m",
"TissueV3_SAM:GenotypeInv4m" = "V3SAM:Inv4m",
"GenotypeInv4m:tempcold" = "Inv4m:cold" # ← FIXED: matches actual coef name
)
{
cat("\nExtracting coefficients:\n")
for (i in seq_along(predictor_map)) {
cat(" ", names(predictor_map)[i], "→", predictor_map[i], "\n")
}
}
##
## Extracting coefficients:
## DonorMi21 → Mi21
## TissueV1_Leaf → V1L
## TissueV3_Root → V3R
## TissueV3_Leaf → V3L
## TissueV3_Leaf_Base → V3LBase
## TissueV3_Leaf_Sheath → V3LSheath
## TissueV3_S2_Leaf_Base → V3L2Base
## TissueV3_S2_Leaf_Tip → V3L2Tip
## TissueV3_SAM → V3SAM
## GenotypeInv4m → Inv4m
## tempcold → cold
## TissueV3_Root:GenotypeInv4m → V3R:Inv4m
## TissueV1_Leaf:GenotypeInv4m → V1L:Inv4m
## TissueV3_Leaf:GenotypeInv4m → V3L:Inv4m
## TissueV3_Leaf_Base:GenotypeInv4m → V3LB:Inv4m
## TissueV3_Leaf_Sheath:GenotypeInv4m → V3LS:Inv4m
## TissueV3_S2_Leaf_Base:GenotypeInv4m → V3L2B:Inv4m
## TissueV3_S2_Leaf_Tip:GenotypeInv4m → V3L2T:Inv4m
## TissueV3_SAM:GenotypeInv4m → V3SAM:Inv4m
## GenotypeInv4m:tempcold → Inv4m:cold
#' Extract differential expression results for specified predictors
#'
#' @param ebfit An eBayes fitted model object from limma
#' @param predictor_map Named vector mapping coefficient names to display names
#'
#' @return A data frame with DE results for all specified predictors
extract_predictor_effects <- function(ebfit, predictor_map) {
coef_names <- colnames(coef(ebfit))
coef_indices <- match(names(predictor_map), coef_names)
if (any(is.na(coef_indices))) {
missing <- names(predictor_map)[is.na(coef_indices)]
stop("Coefficients not found: ", paste(missing, collapse = ", "),
"\nAvailable: ", paste(coef_names, collapse = ", "))
}
result_list <- lapply(seq_along(coef_indices), function(i) {
idx <- coef_indices[i]
tt <- topTable(ebfit, coef = idx, sort.by = "none", n = Inf)
# Calculate 95% confidence intervals
crit_value <- qt(0.975, ebfit$df.residual + ebfit$df.prior)
std_errors <- ebfit$stdev.unscaled[, idx] * sqrt(ebfit$s2.post)
tt$std_err <- std_errors
tt$upper <- tt$logFC + crit_value * std_errors
tt$lower <- tt$logFC - crit_value * std_errors
tt$predictor <- predictor_map[i]
tt$response <- rownames(tt)
tt
})
effects <- do.call(rbind, result_list)
rownames(effects) <- NULL
effects %>%
select(predictor, response, everything()) %>%
arrange(adj.P.Val)
}
# Define effect order by number of DEGs detected
effect_order <- c(
"V3L2Tip",
"V1L",
"V3L",
"V3LBase",
"V3L2Base",
"V3LSheath",
"V3SAM",
"cold",
"Mi21",
"V3R",
"Inv4m",
"V3L2B:Inv4m",
"V3L2T:Inv4m",
"V3L:Inv4m",
"V3LB:Inv4m",
"V1L:Inv4m",
"V3LS:Inv4m",
"Inv4m:cold",
"V3SAM:Inv4m",
"V3R:Inv4m"
)
effects_df <- extract_predictor_effects(ebfit, predictor_map) %>%
dplyr::rename(gene = response) %>%
mutate(predictor = factor(predictor, levels = effect_order)) %>%
mutate(neglogP = -log10(adj.P.Val))
{
cat("\nTotal tests:", nrow(effects_df), "\n")
cat("Tests per predictor:\n")
print(table(effects_df$predictor))
}
##
## Total tests: 529800
## Tests per predictor:
##
## V3L2Tip V1L V3L V3LBase V3L2Base V3LSheath
## 26490 26490 26490 26490 26490 26490
## V3SAM cold Mi21 V3R Inv4m V3L2B:Inv4m
## 26490 26490 26490 26490 26490 26490
## V3L2T:Inv4m V3L:Inv4m V3LB:Inv4m V1L:Inv4m V3LS:Inv4m Inv4m:cold
## 26490 26490 26490 26490 26490 26490
## V3SAM:Inv4m V3R:Inv4m
## 26490 26490
effects_df <- effects_df %>%
mutate(is_DEG = adj.P.Val < 0.05) %>%
mutate(regulation = case_when(
is_DEG & logFC > 0 ~ "Upregulated",
is_DEG & logFC < 0 ~ "Downregulated",
.default = "Unregulated"
))
{
cat("\nDEG counts:\n")
print(with(effects_df, table(predictor, is_DEG)))
cat("\nRegulation:\n")
print(with(effects_df, table(predictor, regulation)))
}
##
## DEG counts:
## is_DEG
## predictor FALSE TRUE
## V3L2Tip 6072 20418
## V1L 6122 20368
## V3L 6337 20153
## V3LBase 6850 19640
## V3L2Base 7627 18863
## V3LSheath 7672 18818
## V3SAM 11592 14898
## cold 11890 14600
## Mi21 24481 2009
## V3R 26293 197
## Inv4m 26315 175
## V3L2B:Inv4m 26446 44
## V3L2T:Inv4m 26460 30
## V3L:Inv4m 26461 29
## V3LB:Inv4m 26462 28
## V1L:Inv4m 26464 26
## V3LS:Inv4m 26471 19
## Inv4m:cold 26476 14
## V3SAM:Inv4m 26482 8
## V3R:Inv4m 26490 0
##
## Regulation:
## regulation
## predictor Downregulated Unregulated Upregulated
## V3L2Tip 11313 6072 9105
## V1L 11139 6122 9229
## V3L 11112 6337 9041
## V3LBase 10819 6850 8821
## V3L2Base 10230 7627 8633
## V3LSheath 10227 7672 8591
## V3SAM 6729 11592 8169
## cold 7719 11890 6881
## Mi21 913 24481 1096
## V3R 99 26293 98
## Inv4m 98 26315 77
## V3L2B:Inv4m 16 26446 28
## V3L2T:Inv4m 17 26460 13
## V3L:Inv4m 10 26461 19
## V3LB:Inv4m 9 26462 19
## V1L:Inv4m 8 26464 18
## V3LS:Inv4m 10 26471 9
## Inv4m:cold 5 26476 9
## V3SAM:Inv4m 6 26482 2
## V3R:Inv4m 0 26490 0
# Modified helper function with sample size check
calculate_robust_distance <- function(per_predictor, mcd_alpha) {
# Require at least 10 significant DEGs for robust estimation
if (nrow(per_predictor) < 10) {
per_predictor$mahalanobis <- NA_real_
return(per_predictor)
}
bivariate <- per_predictor %>%
select(logFC, neglogP) %>%
as.matrix()
# Check for variance (all same values would cause singularity)
if (var(bivariate[, 1]) < 1e-10 || var(bivariate[, 2]) < 1e-10) {
per_predictor$mahalanobis <- NA_real_
return(per_predictor)
}
tryCatch({
mcd_result <- covMcd(bivariate, alpha = mcd_alpha)
per_predictor$mahalanobis <- mahalanobis(
x = bivariate,
center = mcd_result$center,
cov = mcd_result$cov
)
}, error = function(e) {
warning("Could not calculate Mahalanobis distance for group: ",
unique(per_predictor$predictor), " - ",
unique(per_predictor$regulation))
per_predictor$mahalanobis <- NA_real_
})
per_predictor
}
# Modified main function
add_mahalanobis_outliers <- function(
data = NULL,
distance_quantile = 0.05,
FDR = 0.05,
mcd_alpha = 0.75
) {
# Calculate distances per predictor AND regulation
data <- data %>%
group_by(predictor, regulation) %>%
group_split() %>%
lapply(calculate_robust_distance, mcd_alpha = mcd_alpha) %>%
bind_rows()
cutoff <- qchisq(p = 1 - distance_quantile, df = 2)
data$is_mh_outlier <- (data$adj.P.Val < FDR) &
(data$mahalanobis > cutoff) &
!is.na(data$mahalanobis)
data %>%
ungroup() %>%
group_by(predictor, regulation) %>%
arrange(-mahalanobis, .by_group = TRUE) %>%
ungroup()
}
effects_df <- add_mahalanobis_outliers(
effects_df,
distance_quantile = 0.05,
FDR = 0.05
)
{
cat("\nMahalanobis outliers detected:\n")
print(with(effects_df, table(predictor, is_mh_outlier, useNA = "ifany")))
cat("\nGroups with insufficient data (NA):\n")
na_summary <- effects_df %>%
filter(is.na(mahalanobis)) %>%
count(predictor, regulation)
print(na_summary)
}
##
## Mahalanobis outliers detected:
## is_mh_outlier
## predictor FALSE TRUE
## V3L2Tip 21859 4631
## V1L 21701 4789
## V3L 21866 4624
## V3LBase 21966 4524
## V3L2Base 21604 4886
## V3LSheath 22051 4439
## V3SAM 22789 3701
## cold 23183 3307
## Mi21 25963 527
## V3R 26445 45
## Inv4m 26446 44
## V3L2B:Inv4m 26486 4
## V3L2T:Inv4m 26488 2
## V3L:Inv4m 26487 3
## V3LB:Inv4m 26489 1
## V1L:Inv4m 26488 2
## V3LS:Inv4m 26489 1
## Inv4m:cold 26490 0
## V3SAM:Inv4m 26490 0
## V3R:Inv4m 26490 0
##
## Groups with insufficient data (NA):
## # A tibble: 15 × 3
## predictor regulation n
## <fct> <chr> <int>
## 1 V3L2B:Inv4m Unregulated 26446
## 2 V3L2T:Inv4m Unregulated 26460
## 3 V3L:Inv4m Unregulated 26461
## 4 V3LB:Inv4m Downregulated 9
## 5 V3LB:Inv4m Unregulated 26462
## 6 V1L:Inv4m Downregulated 8
## 7 V1L:Inv4m Unregulated 26464
## 8 V3LS:Inv4m Unregulated 26471
## 9 V3LS:Inv4m Upregulated 9
## 10 Inv4m:cold Downregulated 5
## 11 Inv4m:cold Upregulated 9
## 12 V3SAM:Inv4m Downregulated 6
## 13 V3SAM:Inv4m Unregulated 26482
## 14 V3SAM:Inv4m Upregulated 2
## 15 V3R:Inv4m Unregulated 26490
# Gene symbols and locus names
gene_symbol <- read.table(
"../data/gene_symbol.tab",
quote = "",
header = TRUE,
sep = "\t",
na.strings = ""
)
# Pannzer functional annotations
pannzer <- read.table(
"../data/PANNZER_DESC.tab",
quote = "",
header = TRUE,
sep = "\t"
) %>%
group_by(gene_model) %>%
dplyr::slice(1) %>%
select(gene_model, desc)
# Merge annotations
gene_symbol_unique <- gene_symbol %>%
group_by(gene_model, locus_symbol) %>%
dplyr::slice(1) %>%
ungroup()
gene_pannzer <- gene_symbol_unique %>%
left_join(pannzer, by = "gene_model") %>%
group_by(gene_model) %>%
dplyr::slice(1) %>%
ungroup() %>%
select(gene_model, locus_symbol, locus_name, desc)
# Genomic coordinates
v5_gff_file <- "/Users/fvrodriguez/ref/zea/Zea_mays.Zm-B73-REFERENCE-NAM-5.0.59.chr.gff3"
genes_gr <- rtracklayer::import(v5_gff_file) %>%
subset(type == "gene" & seqnames %in% 1:10)
genes <- as.data.frame(genes_gr)
genes$ID <- gsub("gene:", "", genes$ID)
{
cat("Annotations loaded:\n")
cat(" Gene symbols:", nrow(gene_symbol), "\n")
cat(" Pannzer descriptions:", nrow(pannzer), "\n")
cat(" Genomic features:", nrow(genes), "genes\n")
}
## Annotations loaded:
## Gene symbols: 44364
## Pannzer descriptions: 28308
## Genomic features: 43459 genes
# Inv4m inversion boundaries
inv4m_start <- genes[genes$ID == "Zm00001eb190470", "start"]
inv4m_end <- genes[genes$ID == "Zm00001eb194800", "end"]
# Shared introgression boundaries
introgression_start <- 157012149
introgression_end <- 195900523
# Extract gene IDs
inv4m_gene_ids <- genes %>%
filter(seqnames == 4, start >= inv4m_start, end <= inv4m_end) %>%
pull(ID)
shared_introgression_gene_ids <- genes %>%
filter(seqnames == 4, start >= introgression_start, end <= introgression_end) %>%
pull(ID)
flanking_introgression_gene_ids <- shared_introgression_gene_ids[
!(shared_introgression_gene_ids %in% inv4m_gene_ids)
]
{
cat("Inv4m inversion:", length(inv4m_gene_ids), "genes\n")
cat("Shared introgression:", length(shared_introgression_gene_ids), "genes\n")
cat("Flanking:", length(flanking_introgression_gene_ids), "genes\n")
}
## Inv4m inversion: 434 genes
## Shared introgression: 1099 genes
## Flanking: 665 genes
# Define fold change thresholds
is_large_effect <- rep(FALSE, nrow(effects_df))
is_interaction <- effects_df$predictor == "cold:Inv4m"
is_large_effect[is_interaction & abs(effects_df$logFC) > 0.5] <- TRUE
is_large_effect[!is_interaction & abs(effects_df$logFC) > 1.5] <- TRUE
effects_df <- effects_df %>%
mutate(is_hiconf_DEG = is_DEG & is_large_effect)
{
cat("\nHigh-Confidence DEGs:\n")
print(with(effects_df, table(predictor, is_hiconf_DEG)))
}
##
## High-Confidence DEGs:
## is_hiconf_DEG
## predictor FALSE TRUE
## V3L2Tip 15181 11309
## V1L 15236 11254
## V3L 15368 11122
## V3LBase 15898 10592
## V3L2Base 17614 8876
## V3LSheath 17189 9301
## V3SAM 21776 4714
## cold 26172 318
## Mi21 26388 102
## V3R 26436 54
## Inv4m 26392 98
## V3L2B:Inv4m 26456 34
## V3L2T:Inv4m 26465 25
## V3L:Inv4m 26469 21
## V3LB:Inv4m 26467 23
## V1L:Inv4m 26466 24
## V3LS:Inv4m 26476 14
## Inv4m:cold 26482 8
## V3SAM:Inv4m 26483 7
## V3R:Inv4m 26490 0
rank_threshold <- 10
effects_df <- effects_df %>%
group_by(is_hiconf_DEG, predictor, regulation) %>%
mutate(
pval_rank = row_number(dplyr::desc(neglogP)),
mahal_rank = row_number(dplyr::desc(mahalanobis))
) %>%
ungroup() %>%
mutate(
is_selected_DEG = (pval_rank <= rank_threshold & is_hiconf_DEG) |
(mahal_rank <= rank_threshold & is_hiconf_DEG)
)
{
cat("\nSelected DEGs (Top", rank_threshold, "by rank):\n")
print(with(
effects_df %>% filter(is_selected_DEG & regulation != "Unregulated"),
table(regulation, predictor)
))
}
##
## Selected DEGs (Top 10 by rank):
## predictor
## regulation V3L2Tip V1L V3L V3LBase V3L2Base V3LSheath V3SAM cold Mi21 V3R
## Downregulated 14 14 13 12 13 11 11 10 11 10
## Upregulated 10 12 11 11 14 11 10 11 11 11
## predictor
## regulation Inv4m V3L2B:Inv4m V3L2T:Inv4m V3L:Inv4m V3LB:Inv4m V1L:Inv4m
## Downregulated 11 12 12 7 5 6
## Upregulated 10 13 11 12 14 12
## predictor
## regulation V3LS:Inv4m Inv4m:cold V3SAM:Inv4m V3R:Inv4m
## Downregulated 6 1 5 0
## Upregulated 8 7 2 0
effects_df <- effects_df %>%
left_join(
gene_pannzer,
by = c(gene = "gene_model"),
relationship = "many-to-many"
) %>%
mutate(desc_merged = coalesce(locus_name, desc)) %>%
select(predictor, regulation, gene, locus_symbol, desc_merged, everything()) %>%
inner_join(
genes %>%
select(gene = ID, CHR = seqnames, BP = start) %>%
mutate(CHR = as.character(CHR) %>% as.integer()),
by = "gene"
) %>%
mutate(
locus_label = case_when(
is.na(locus_symbol) ~ NA_character_,
grepl("^si\\d*[a-h]", locus_symbol) ~ NA_character_,
grepl("^umc", locus_symbol) ~ NA_character_,
grepl("^Zm00001eb", locus_symbol) ~ NA_character_,
TRUE ~ locus_symbol
)
)
{
cat("\nAnnotations added:", ncol(effects_df), "columns\n")
}
##
## Annotations added: 27 columns
effects_df <- effects_df %>%
mutate(
in_Inv4m = gene %in% inv4m_gene_ids,
in_cis = gene %in% shared_introgression_gene_ids,
in_flank = gene %in% flanking_introgression_gene_ids,
in_trans = !in_cis
)
# Summarize genomic region classification by predictor
region_summary <- effects_df %>%
group_by(predictor) %>%
summarise(
in_Inv4m = sum(in_Inv4m, na.rm = TRUE),
in_cis = sum(in_cis, na.rm = TRUE),
in_flank = sum(in_flank, na.rm = TRUE),
in_trans = sum(in_trans, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(match(predictor, effect_order)) # keep your desired order
# Print as a clean table
{
cat("\nRegion classification by predictor:\n")
print(region_summary, row.names = FALSE)
}
##
## Region classification by predictor:
## # A tibble: 20 × 5
## predictor in_Inv4m in_cis in_flank in_trans
## <fct> <int> <int> <int> <int>
## 1 V3L2Tip 293 682 389 25587
## 2 V1L 293 682 389 25587
## 3 V3L 293 682 389 25587
## 4 V3LBase 293 682 389 25587
## 5 V3L2Base 293 682 389 25587
## 6 V3LSheath 293 682 389 25587
## 7 V3SAM 293 682 389 25587
## 8 cold 293 682 389 25587
## 9 Mi21 293 682 389 25587
## 10 V3R 293 682 389 25587
## 11 Inv4m 293 682 389 25587
## 12 V3L2B:Inv4m 293 682 389 25587
## 13 V3L2T:Inv4m 293 682 389 25587
## 14 V3L:Inv4m 293 682 389 25587
## 15 V3LB:Inv4m 293 682 389 25587
## 16 V1L:Inv4m 293 682 389 25587
## 17 V3LS:Inv4m 293 682 389 25587
## 18 Inv4m:cold 293 682 389 25587
## 19 V3SAM:Inv4m 293 682 389 25587
## 20 V3R:Inv4m 293 682 389 25587
top_degs_qc <- effects_df %>%
filter(is_DEG, regulation != "Unregulated") %>%
group_by(predictor, regulation) %>%
arrange(-neglogP, .by_group = TRUE) %>%
dplyr::slice(1:10) %>%
select(predictor, gene, locus_symbol, desc_merged, logFC, neglogP) %>%
arrange(predictor, regulation, -neglogP)
top_degs_qc
overall_summary <- effects_df %>%
group_by(predictor) %>%
summarise(
total_genes = n(),
n_DEG = sum(is_DEG),
n_hiconf_DEG = sum(is_hiconf_DEG),
n_selected_DEG = sum(is_selected_DEG),
pct_DEG = round(100 * n_DEG / total_genes, 2)
)
overall_summary
trans_effects <- read.csv("~/Desktop/network_effects.csv")
selected_degs <- effects_df %>%
filter(is_selected_DEG) %>%
select(
predictor,
regulation,
gene,
locus_symbol,
locus_label,
desc_merged,
logFC,
neglogP,
mahalanobis,
pval_rank,
mahal_rank
) %>%
arrange(predictor, regulation, -neglogP)
{
cat("\nSelected DEGs for manuscript:", nrow(selected_degs), "\n")
print(with(selected_degs, table(predictor, regulation)))
}
##
## Selected DEGs for manuscript: 383
## regulation
## predictor Downregulated Upregulated
## V3L2Tip 14 10
## V1L 14 12
## V3L 13 11
## V3LBase 12 11
## V3L2Base 13 14
## V3LSheath 11 11
## V3SAM 11 10
## cold 10 11
## Mi21 11 11
## V3R 10 11
## Inv4m 11 10
## V3L2B:Inv4m 12 13
## V3L2T:Inv4m 12 11
## V3L:Inv4m 7 12
## V3LB:Inv4m 5 14
## V1L:Inv4m 6 12
## V3LS:Inv4m 6 8
## Inv4m:cold 1 5
## V3SAM:Inv4m 5 2
## V3R:Inv4m 0 0
# Full effects table
write.csv(
effects_df,
file = "../results/predictor_effects_crow2020.csv",
row.names = FALSE
)
# Selected DEGs
write.csv(
selected_degs,
file = "../results/selected_DEGs_crow2020.csv",
row.names = FALSE
)
{
cat("\nResults exported:\n")
cat(" predictor_effects_crow2020.csv\n")
cat(" selected_DEGs_crow2020.csv\n")
}
This analysis identified differentially expressed genes across three main effects:
Donor batch effect was controlled as a fixed effect in the model. Expression data was normalized using TMM and transformed with voom for variance stabilization.
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] robustbase_0.99-6 ggtext_0.1.2 ggfx_1.0.3
## [4] ggpubr_0.6.2 ggplot2_4.0.1 stringr_1.6.0
## [7] tidyr_1.3.1 dplyr_1.1.4 rtracklayer_1.70.0
## [10] GenomicRanges_1.62.0 Seqinfo_1.0.0 IRanges_2.44.0
## [13] S4Vectors_0.48.0 BiocGenerics_0.56.0 generics_0.1.4
## [16] edgeR_4.8.0 limma_3.66.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 viridisLite_0.4.2
## [3] farver_2.1.2 Biostrings_2.78.0
## [5] S7_0.2.1 bitops_1.0-9
## [7] fastmap_1.2.0 RCurl_1.98-1.17
## [9] GenomicAlignments_1.46.0 XML_3.99-0.20
## [11] digest_0.6.39 lifecycle_1.0.4
## [13] statmod_1.5.1 magrittr_2.0.4
## [15] compiler_4.5.1 rlang_1.1.6
## [17] sass_0.4.10 tools_4.5.1
## [19] utf8_1.2.6 yaml_2.3.10
## [21] knitr_1.50 ggsignif_0.6.4
## [23] labeling_0.4.3 S4Arrays_1.10.0
## [25] curl_7.0.0 DelayedArray_0.36.0
## [27] xml2_1.5.0 RColorBrewer_1.1-3
## [29] abind_1.4-8 BiocParallel_1.44.0
## [31] withr_3.0.2 purrr_1.2.0
## [33] grid_4.5.1 scales_1.4.0
## [35] SummarizedExperiment_1.40.0 cli_3.6.5
## [37] rmarkdown_2.30 crayon_1.5.3
## [39] ragg_1.5.0 rstudioapi_0.17.1
## [41] httr_1.4.7 rjson_0.2.23
## [43] cachem_1.1.0 parallel_4.5.1
## [45] XVector_0.50.0 restfulr_0.0.16
## [47] matrixStats_1.5.0 vctrs_0.6.5
## [49] Matrix_1.7-4 jsonlite_2.0.0
## [51] carData_3.0-5 car_3.1-3
## [53] rstatix_0.7.3 Formula_1.2-5
## [55] systemfonts_1.3.1 magick_2.9.0
## [57] locfit_1.5-9.12 jquerylib_0.1.4
## [59] glue_1.8.0 DEoptimR_1.1-4
## [61] codetools_0.2-20 cowplot_1.2.0
## [63] stringi_1.8.7 gtable_0.3.6
## [65] BiocIO_1.20.0 tibble_3.3.0
## [67] pillar_1.11.1 htmltools_0.5.8.1
## [69] R6_2.6.1 textshaping_1.0.4
## [71] evaluate_1.0.5 lattice_0.22-7
## [73] Biobase_2.70.0 backports_1.5.0
## [75] Rsamtools_2.26.0 cigarillo_1.0.0
## [77] gridtext_0.1.5 broom_1.0.10
## [79] bslib_0.9.0 Rcpp_1.1.0
## [81] SparseArray_1.10.2 xfun_0.54
## [83] MatrixGenerics_1.22.0 pkgconfig_2.0.3