library(GenomicRanges)
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
setwd("/Users/joelbrunomendes/Documents/Methylation_data/run0/Control_DMP")
# Function to read BED.gz file
read_bed_gz <- function(bed_file) {
# Read the BED file (assuming standard BED format: chr, start, end, ...)
bed_data <- read.table(bed_file, header = FALSE, sep = "\t", stringsAsFactors = FALSE)
# Create GRanges object (assuming first 3 columns are chr, start, end)
gr <- GRanges(
seqnames = bed_data[,1],
ranges = IRanges(start = bed_data[,2] + 1, end = bed_data[,3]), # Convert to 1-based
strand = "*"
)
# Add additional columns if they exist
if(ncol(bed_data) > 3) {
mcols(gr) <- bed_data[,4:ncol(bed_data)]
}
return(gr)
}
# Function to analyze DMR distances
analyze_dmr_distances <- function(dmrs, genes, dataset_name) {
cat(sprintf("Analyzing %s dataset...\n", dataset_name))
# Use distanceToNearest which returns a Hits object
nearest_results <- distanceToNearest(dmrs, genes, ignore.strand = TRUE)
# Extract the components
query_hits <- queryHits(nearest_results)
subject_hits <- subjectHits(nearest_results)
distances <- mcols(nearest_results)$distance
cat(sprintf("%s: DMRs with nearest genes found: %d out of %d\n",
dataset_name, length(query_hits), length(dmrs)))
# Check for missing matches
missing_dmrs <- setdiff(1:length(dmrs), query_hits)
if(length(missing_dmrs) > 0) {
cat(sprintf("Warning: %d DMRs in %s had no nearest gene match\n",
length(missing_dmrs), dataset_name))
}
# Create results data frame
results <- data.frame(
dataset = dataset_name,
dmr_index = query_hits,
nearest_gene_index = subject_hits,
distance = distances,
gene_id = mcols(genes)$gene_id[subject_hits],
gene_name = ifelse(is.null(mcols(genes)$gene_name),
mcols(genes)$gene_id[subject_hits],
mcols(genes)$gene_name[subject_hits])
)
return(list(results = results, distances = distances))
}
# Function to create distance categories
create_distance_categories <- function(distances, dataset_name) {
data.frame(
Dataset = dataset_name,
Category = c("Overlapping (0 bp)", "≤1 kb", "≤5 kb", "≤10 kb", "≤50 kb", ">50 kb"),
Count = c(
sum(distances == 0),
sum(distances <= 1000),
sum(distances <= 5000),
sum(distances <= 10000),
sum(distances <= 50000),
sum(distances > 50000)
),
Total = length(distances)
)
}
# Read your files
cat("Reading first DMR BED file...\n")
## Reading first DMR BED file...
dmrs1 <- read_bed_gz("/Users/joelbrunomendes/Documents/Methylation_data/run0/Control_DMP/GSE186458_blocks.s205.bed.gz") # Replace with your first DMR file path
dataset1_name <- "Reference" # Change this name as needed
seqlevels(dmrs1) <- sub("^chr", "", seqlevels(dmrs1))
cat("Reading second DMR BED file...\n")
## Reading second DMR BED file...
dmrs2 <- read_bed_gz("/Users/joelbrunomendes/Documents/Methylation_data/run0/Control_DMP/dmp_gr_for_ucsc1.bed") # Replace with your second DMR file path
dataset2_name <- "My DMPs" # Change this name as needed
seqlevels(dmrs2) <- sub("^chr", "", seqlevels(dmrs2))
cat("Reading GTF file...\n")
## Reading GTF file...
gtf <- import("/Users/joelbrunomendes/Documents/Methylation_data/run0/Control_DMP/GRCh38.gtf") # Replace with your actual file path
# Filter GTF for genes only (not transcripts/exons)
genes <- gtf[gtf$type == "gene"]
# If no 'gene' entries, try 'transcript' and collapse to gene level
if(length(genes) == 0) {
transcripts <- gtf[gtf$type == "transcript"]
# Group by gene_id to get gene boundaries
genes_df <- as.data.frame(transcripts) %>%
group_by(gene_id) %>%
summarise(
seqnames = first(seqnames),
start = min(start),
end = max(end),
strand = first(strand),
gene_name = first(gene_name),
.groups = 'drop'
)
genes <- GRanges(
seqnames = genes_df$seqnames,
ranges = IRanges(start = genes_df$start, end = genes_df$end),
strand = genes_df$strand,
gene_id = genes_df$gene_id,
gene_name = genes_df$gene_name
)
}
cat(sprintf("Found %d DMRs in dataset 1, %d DMRs in dataset 2, and %d genes\n",
length(dmrs1), length(dmrs2), length(genes)))
## Found 7104162 DMRs in dataset 1, 8042 DMRs in dataset 2, and 78686 genes
# Analyze both datasets
analysis1 <- analyze_dmr_distances(dmrs1, genes, dataset1_name)
## Analyzing Reference dataset...
## Warning in .merge_two_Seqinfo_objects(x, y): Each of the 2 combined objects has sequence levels not in the other:
## - in 'x': M
## - in 'y': MT
## Make sure to always combine/compare objects based on the same reference
## genome (use suppressWarnings() to suppress this warning).
## Reference: DMRs with nearest genes found: 7104055 out of 7104162
## Warning: 107 DMRs in Reference had no nearest gene match
analysis2 <- analyze_dmr_distances(dmrs2, genes, dataset2_name)
## Analyzing My DMPs dataset...
## My DMPs: DMRs with nearest genes found: 8042 out of 8042
distances1 <- analysis1$distances
distances2 <- analysis2$distances
# Combine results
combined_results <- rbind(analysis1$results, analysis2$results)
# Summary statistics comparison
cat("\n=== COMPARATIVE SUMMARY STATISTICS ===\n")
##
## === COMPARATIVE SUMMARY STATISTICS ===
cat(sprintf("Dataset 1 (%s):\n", dataset1_name))
## Dataset 1 (Reference):
cat(sprintf(" Count: %d DMRs\n", length(distances1)))
## Count: 7104055 DMRs
cat(sprintf(" Median: %.0f bp\n", median(distances1)))
## Median: 0 bp
cat(sprintf(" Mean: %.0f bp\n", mean(distances1)))
## Mean: 18539 bp
cat(sprintf(" Q1: %.0f bp, Q3: %.0f bp\n", quantile(distances1, 0.25), quantile(distances1, 0.75)))
## Q1: 0 bp, Q3: 2001 bp
cat(sprintf("\nDataset 2 (%s):\n", dataset2_name))
##
## Dataset 2 (My DMPs):
cat(sprintf(" Count: %d DMRs\n", length(distances2)))
## Count: 8042 DMRs
cat(sprintf(" Median: %.0f bp\n", median(distances2)))
## Median: 0 bp
cat(sprintf(" Mean: %.0f bp\n", mean(distances2)))
## Mean: 63 bp
cat(sprintf(" Q1: %.0f bp, Q3: %.0f bp\n", quantile(distances2, 0.25), quantile(distances2, 0.75)))
## Q1: 0 bp, Q3: 0 bp
# Statistical test
wilcox_test <- wilcox.test(distances1, distances2)
cat(sprintf("\nWilcoxon rank-sum test p-value: %.2e\n", wilcox_test$p.value))
##
## Wilcoxon rank-sum test p-value: 1.62e-202
# Distance categories comparison
cat_data1 <- create_distance_categories(distances1, dataset1_name)
cat_data2 <- create_distance_categories(distances2, dataset2_name)
cat_data1$Percentage <- round(cat_data1$Count / cat_data1$Total * 100, 1)
cat_data2$Percentage <- round(cat_data2$Count / cat_data2$Total * 100, 1)
combined_categories <- rbind(cat_data1, cat_data2)
cat("\n=== DISTANCE CATEGORIES COMPARISON ===\n")
##
## === DISTANCE CATEGORIES COMPARISON ===
print(combined_categories)
## Dataset Category Count Total Percentage
## 1 Reference Overlapping (0 bp) 5052329 7104055 71.1
## 2 Reference ≤1 kb 5201746 7104055 73.2
## 3 Reference ≤5 kb 5619392 7104055 79.1
## 4 Reference ≤10 kb 5939978 7104055 83.6
## 5 Reference ≤50 kb 6719248 7104055 94.6
## 6 Reference >50 kb 384807 7104055 5.4
## 7 My DMPs Overlapping (0 bp) 6576 8042 81.8
## 8 My DMPs ≤1 kb 7927 8042 98.6
## 9 My DMPs ≤5 kb 8041 8042 100.0
## 10 My DMPs ≤10 kb 8041 8042 100.0
## 11 My DMPs ≤50 kb 8042 8042 100.0
## 12 My DMPs >50 kb 0 8042 0.0
# Create comparative visualizations
cat("\nCreating comparative plots...\n")
##
## Creating comparative plots...
# Prepare data for plotting
plot_data <- data.frame(
distance = c(distances1, distances2),
dataset = c(rep(dataset1_name, length(distances1)),
rep(dataset2_name, length(distances2)))
)
# 1. Comparative histogram
p1 <- ggplot(plot_data, aes(x = distance, fill = dataset)) +
geom_histogram(bins = 50, alpha = 0.7, position = "identity") +
scale_x_log10(labels = scales::comma) +
scale_fill_manual(values = c("steelblue", "coral")) +
labs(title = "Comparative Distribution of DMR-to-Gene Distances",
subtitle = paste("Dataset 1: n =", length(distances1),
"| Dataset 2: n =", length(distances2)),
x = "Distance to nearest gene (bp, log scale)",
y = "Count",
fill = "Dataset") +
theme_minimal() +
theme(legend.position = "top")
# 2. Comparative density plots
p2 <- ggplot(plot_data, aes(x = distance, fill = dataset)) +
geom_density(alpha = 0.5) +
scale_x_log10(labels = scales::comma) +
scale_fill_manual(values = c("steelblue", "coral")) +
labs(title = "Comparative Density Distribution",
x = "Distance to nearest gene (bp, log scale)",
y = "Density",
fill = "Dataset") +
theme_minimal() +
theme(legend.position = "top")
# 3. Side-by-side boxplots
p3 <- ggplot(plot_data, aes(x = dataset, y = distance, fill = dataset)) +
geom_boxplot(alpha = 0.7) +
scale_y_log10(labels = scales::comma) +
scale_fill_manual(values = c("steelblue", "coral")) +
labs(title = "Distance Distribution Comparison",
subtitle = paste("Wilcoxon test p-value:", format(wilcox_test$p.value, scientific = TRUE)),
x = "Dataset",
y = "Distance (bp, log scale)") +
theme_minimal() +
theme(legend.position = "none")
# 4. Cumulative distribution comparison
distances1_sorted <- sort(distances1)
distances2_sorted <- sort(distances2)
cum_data <- data.frame(
distance = c(distances1_sorted, distances2_sorted),
cumulative = c((1:length(distances1_sorted)) / length(distances1_sorted),
(1:length(distances2_sorted)) / length(distances2_sorted)),
dataset = c(rep(dataset1_name, length(distances1_sorted)),
rep(dataset2_name, length(distances2_sorted)))
)
p4 <- ggplot(cum_data, aes(x = distance, y = cumulative, color = dataset)) +
geom_line(size = 1) +
scale_x_log10(labels = scales::comma) +
scale_y_continuous(labels = scales::percent) +
scale_color_manual(values = c("steelblue", "coral")) +
labs(title = "Cumulative Distribution Comparison",
x = "Distance to nearest gene (bp, log scale)",
y = "Cumulative percentage of DMRs",
color = "Dataset") +
theme_minimal() +
theme(legend.position = "top")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# 5. Category comparison barplot
p5 <- ggplot(combined_categories, aes(x = Category, y = Percentage, fill = Dataset)) +
geom_bar(stat = "identity", position = "dodge", alpha = 0.8) +
scale_fill_manual(values = c("steelblue", "coral")) +
labs(title = "Distance Categories Comparison",
x = "Distance Category",
y = "Percentage of DMRs",
fill = "Dataset") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top")
# Display plots
print(p1)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_bin()`).

print(p2)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_density()`).

print(p3)
## Warning in scale_y_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

print(p4)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.

print(p5)

# Create a summary plot combining key comparisons
p_summary <- grid.arrange(p3, p4, ncol = 2,
top = "DMR Distance Distribution Summary")
## Warning in scale_y_log10(labels = scales::comma): log-10 transformation introduced infinite values.
## Removed 5058905 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.

# Save results
cat("\nSaving results...\n")
##
## Saving results...
write.csv(combined_results, "comparative_dmr_gene_distances.csv", row.names = FALSE)
write.csv(combined_categories, "comparative_distance_summary.csv", row.names = FALSE)
# Save plots
ggsave("comparative_histogram.png", p1, width = 12, height = 8, dpi = 300)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggsave("comparative_density.png", p2, width = 12, height = 8, dpi = 300)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_density()`).
ggsave("comparative_boxplot.png", p3, width = 10, height = 8, dpi = 300)
## Warning in scale_y_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggsave("comparative_cumulative.png", p4, width = 12, height = 8, dpi = 300)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
ggsave("comparative_categories.png", p5, width = 12, height = 8, dpi = 300)
ggsave("comparative_summary.png", p_summary, width = 16, height = 8, dpi = 300)
# Additional analysis: overlap between datasets
cat("\n=== OVERLAP ANALYSIS ===\n")
##
## === OVERLAP ANALYSIS ===
overlap <- findOverlaps(dmrs1, dmrs2)
overlapping_dmrs1 <- unique(queryHits(overlap))
overlapping_dmrs2 <- unique(subjectHits(overlap))
cat(sprintf("DMRs overlapping between datasets:\n"))
## DMRs overlapping between datasets:
cat(sprintf(" Dataset 1: %d/%d (%.1f%%)\n",
length(overlapping_dmrs1), length(dmrs1),
length(overlapping_dmrs1)/length(dmrs1)*100))
## Dataset 1: 4426/7104162 (0.1%)
cat(sprintf(" Dataset 2: %d/%d (%.1f%%)\n",
length(overlapping_dmrs2), length(dmrs2),
length(overlapping_dmrs2)/length(dmrs2)*100))
## Dataset 2: 4932/8042 (61.3%)
cat("Analysis complete! Check the generated comparative plots and CSV files.\n")
## Analysis complete! Check the generated comparative plots and CSV files.
# Create comparative visualizations
cat("\nCreating comparative plots...\n")
##
## Creating comparative plots...
# Prepare data for plotting
plot_data <- data.frame(
distance = c(distances1, distances2),
dataset = c(rep(dataset1_name, length(distances1)),
rep(dataset2_name, length(distances2)))
)
# 1. Comparative histogram
p1 <- ggplot(plot_data, aes(x = distance, fill = dataset)) +
geom_histogram(bins = 50, alpha = 0.7, position = "identity") +
scale_x_log10(labels = scales::comma) +
scale_fill_manual(values = c("steelblue", "coral")) +
labs(title = "Comparative Distribution of DMR-to-Gene Distances",
subtitle = paste("Dataset 1: n =", length(distances1),
"| Dataset 2: n =", length(distances2)),
x = "Distance to nearest gene (bp, log scale)",
y = "Count",
fill = "Dataset") +
theme_minimal() +
theme(legend.position = "top")
# 2. Comparative density plots
p2 <- ggplot(plot_data, aes(x = distance, fill = dataset)) +
geom_density(alpha = 0.5) +
scale_x_log10(labels = scales::comma) +
scale_fill_manual(values = c("steelblue", "coral")) +
labs(title = "Comparative Density Distribution",
x = "Distance to nearest gene (bp, log scale)",
y = "Density",
fill = "Dataset") +
theme_minimal() +
theme(legend.position = "top")
# 3. Side-by-side boxplots
p3 <- ggplot(plot_data, aes(x = dataset, y = distance, fill = dataset)) +
geom_boxplot(alpha = 0.7) +
scale_y_log10(labels = scales::comma) +
scale_fill_manual(values = c("steelblue", "coral")) +
labs(title = "Distance Distribution Comparison",
subtitle = paste("Wilcoxon test p-value:", format(wilcox_test$p.value, scientific = TRUE)),
x = "Dataset",
y = "Distance (bp, log scale)") +
theme_minimal() +
theme(legend.position = "none")
# 4. Cumulative distribution comparison
distances1_sorted <- sort(distances1)
distances2_sorted <- sort(distances2)
cum_data <- data.frame(
distance = c(distances1_sorted, distances2_sorted),
cumulative = c((1:length(distances1_sorted)) / length(distances1_sorted),
(1:length(distances2_sorted)) / length(distances2_sorted)),
dataset = c(rep(dataset1_name, length(distances1_sorted)),
rep(dataset2_name, length(distances2_sorted)))
)
p4 <- ggplot(cum_data, aes(x = distance, y = cumulative, color = dataset)) +
geom_line(size = 1) +
scale_x_log10(labels = scales::comma) +
scale_y_continuous(labels = scales::percent) +
scale_color_manual(values = c("steelblue", "coral")) +
labs(title = "Cumulative Distribution Comparison",
x = "Distance to nearest gene (bp, log scale)",
y = "Cumulative percentage of DMRs",
color = "Dataset") +
theme_minimal() +
theme(legend.position = "top")
# 5. Category comparison barplot
p5 <- ggplot(combined_categories, aes(x = Category, y = Percentage, fill = Dataset)) +
geom_bar(stat = "identity", position = "dodge", alpha = 0.8) +
scale_fill_manual(values = c("steelblue", "coral")) +
labs(title = "Distance Categories Comparison",
x = "Distance Category",
y = "Percentage of DMRs",
fill = "Dataset") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top")
# Display plots
print(p1)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_bin()`).

print(p2)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_density()`).

print(p3)
## Warning in scale_y_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

print(p4)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.

print(p5)

# Create a summary plot combining key comparisons
p_summary <- grid.arrange(p3, p4, ncol = 2,
top = "DMR Distance Distribution Summary")
## Warning in scale_y_log10(labels = scales::comma): log-10 transformation introduced infinite values.
## Removed 5058905 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.

# Save results
cat("\nSaving results...\n")
##
## Saving results...
write.csv(combined_results, "comparative_dmr_gene_distances.csv", row.names = FALSE)
write.csv(combined_categories, "comparative_distance_summary.csv", row.names = FALSE)
# Save plots
ggsave("comparative_histogram.png", p1, width = 12, height = 8, dpi = 300)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggsave("comparative_density.png", p2, width = 12, height = 8, dpi = 300)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_density()`).
ggsave("comparative_boxplot.png", p3, width = 10, height = 8, dpi = 300)
## Warning in scale_y_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
## Warning: Removed 5058905 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggsave("comparative_cumulative.png", p4, width = 12, height = 8, dpi = 300)
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation
## introduced infinite values.
ggsave("comparative_categories.png", p5, width = 12, height = 8, dpi = 300)
ggsave("comparative_summary.png", p_summary, width = 16, height = 8, dpi = 300)
# Additional analysis: overlap between datasets
cat("\n=== OVERLAP ANALYSIS ===\n")
##
## === OVERLAP ANALYSIS ===
overlap <- findOverlaps(dmrs1, dmrs2)
overlapping_dmrs1 <- unique(queryHits(overlap))
overlapping_dmrs2 <- unique(subjectHits(overlap))
cat(sprintf("DMRs overlapping between datasets:\n"))
## DMRs overlapping between datasets:
cat(sprintf(" Dataset 1: %d/%d (%.1f%%)\n",
length(overlapping_dmrs1), length(dmrs1),
length(overlapping_dmrs1)/length(dmrs1)*100))
## Dataset 1: 4426/7104162 (0.1%)
cat(sprintf(" Dataset 2: %d/%d (%.1f%%)\n",
length(overlapping_dmrs2), length(dmrs2),
length(overlapping_dmrs2)/length(dmrs2)*100))
## Dataset 2: 4932/8042 (61.3%)
cat("Analysis complete! Check the generated comparative plots and CSV files.\n")
## Analysis complete! Check the generated comparative plots and CSV files.