library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(readr)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0.9000 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
library(ggVennDiagram)
## Warning: package 'ggVennDiagram' was built under R version 4.3.2
##
## Attaching package: 'ggVennDiagram'
##
## The following object is masked from 'package:tidyr':
##
## unite
updated_Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION_UNIQUE <- read.csv("updated.Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION.UNIQUE.csv", row.names = 1)
# Load counts matrix
# Replace with your actual R object name if different
Astral.BV2.Turbo <- updated_Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION_UNIQUE
counts_mat <- Astral.BV2.Turbo
# ------------------------------------------
# Load Metadata
# Make sure your path is correct!
Astral_Metadata <- read_csv("~/Desktop/ERIC.NEWDATA.ANALYSIS.11JULY/ASTRAL.11JULY/Astral.Metadata.csv")
## New names:
## Rows: 28 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): ...1, Sample, Experiment, Group, Beads, Replicates
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
Astral_Metadata <- as.data.frame(Astral_Metadata)
# ------------------------------------------
# Clean metadata to match count matrix column names
meta <- Astral_Metadata %>%
mutate(
Sample_clean = gsub("\\.", "_", Sample), # Replace . with _
Sample_clean = gsub("\\s+", "", Sample_clean) # Remove spaces
)
# ------------------------------------------
# Helper function: Compute log2FC
# ------------------------------------------
calculate_log2fc <- function(bv2_samples, turbo_samples, counts_mat) {
mean_bv2 <- rowMeans(counts_mat[, bv2_samples, drop = FALSE], na.rm = TRUE)
mean_turbo <- rowMeans(counts_mat[, turbo_samples, drop = FALSE], na.rm = TRUE)
log2fc <- log2(mean_bv2 + 1e-6) - log2(mean_turbo + 1e-6)
return(log2fc)
}
# ------------------------------------------
# Define Comparisons
# ------------------------------------------
experiments <- c("Exp.3", "Exp.4", "Exp.5")
beads_conditions <- c("ON.Beads", "OFF.Beads")
replicates <- c("Rep1", "Rep2")
# ------------------------------------------
# Compute log2FC for all combinations
# ------------------------------------------
fc_results <- list()
for (exp in experiments) {
for (beads in beads_conditions) {
for (rep in replicates) {
# Label for this comparison
label <- paste(exp, beads, rep, sep = "_")
# Filter metadata for this combination
subset_meta <- meta %>%
filter(Experiment == exp, Beads == beads, Replicates == rep)
# Extract BV2 and Turbo samples using cleaned names
bv2_samples <- subset_meta$Sample_clean[grepl("Bv2", subset_meta$Sample_clean, ignore.case = TRUE)]
turbo_samples <- subset_meta$Sample_clean[grepl("Turbo", subset_meta$Sample_clean, ignore.case = TRUE)]
# Check if samples exist
if (length(bv2_samples) == 0 | length(turbo_samples) == 0) {
warning(paste("No samples found for", label))
fc_results[[label]] <- rep(NA, nrow(counts_mat))
} else {
fc_results[[label]] <- calculate_log2fc(bv2_samples, turbo_samples, counts_mat)
}
}
}
}
# ------------------------------------------
# Compute Fold-Change (FC) and Direction
# ------------------------------------------
fc_results_fc <- lapply(fc_results, function(x) 2^x)
fc_results_dir <- lapply(fc_results, function(x) {
case_when(
is.na(x) ~ NA_character_,
x > 0 ~ "Up",
x < 0 ~ "Down",
TRUE ~ "NoChange"
)
})
# ------------------------------------------
# Combine ALL Results into One DataFrame
# ------------------------------------------
combined_df <- data.frame(Gene = rownames(counts_mat))
for (name in names(fc_results)) {
combined_df[[paste0(name, "_log2FC")]] <- fc_results[[name]]
combined_df[[paste0(name, "_FC")]] <- fc_results_fc[[name]]
combined_df[[paste0(name, "_Direction")]] <- fc_results_dir[[name]]
}
# ------------------------------------------
# Write the final combined CSV
# ------------------------------------------
write.csv(combined_df, "Fold_Changes_log2FC_FC_Direction.csv", row.names = FALSE)
# ------------------------------------------
# Load Libraries
# ------------------------------------------
library(dplyr)
library(readr)
# ------------------------------------------
# Load counts matrix
Astral.BV2.Turbo <- updated_Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION_UNIQUE
counts_mat <- Astral.BV2.Turbo
# ------------------------------------------
# Load Metadata
Astral_Metadata <- read_csv("~/Desktop/ERIC.NEWDATA.ANALYSIS.11JULY/ASTRAL.11JULY/Astral.Metadata.csv")
## New names:
## Rows: 28 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): ...1, Sample, Experiment, Group, Beads, Replicates
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
Astral_Metadata <- as.data.frame(Astral_Metadata)
# ------------------------------------------
# Clean metadata
meta <- Astral_Metadata %>%
mutate(
Sample_clean = gsub("\\.", "_", Sample),
Sample_clean = gsub("\\s+", "", Sample_clean)
)
# ------------------------------------------
# Helper function: Compute log2FC
calculate_log2fc <- function(bv2_samples, turbo_samples, counts_mat) {
mean_bv2 <- rowMeans(counts_mat[, bv2_samples, drop = FALSE], na.rm = TRUE)
mean_turbo <- rowMeans(counts_mat[, turbo_samples, drop = FALSE], na.rm = TRUE)
log2fc <- log2(mean_bv2 + 1e-6) - log2(mean_turbo + 1e-6)
return(log2fc)
}
# ------------------------------------------
# Define conditions
experiments <- c("Exp.3", "Exp.4", "Exp.5")
beads_conditions <- c("ON.Beads", "OFF.Beads")
replicates <- c("Rep1", "Rep2")
# ------------------------------------------
# Compute log2FC
fc_results <- list()
for (exp in experiments) {
for (beads in beads_conditions) {
for (rep in replicates) {
label <- paste(exp, beads, rep, sep = "_")
subset_meta <- meta %>%
filter(Experiment == exp, Beads == beads, Replicates == rep)
bv2_samples <- subset_meta$Sample_clean[grepl("Bv2", subset_meta$Sample_clean, ignore.case = TRUE)]
turbo_samples <- subset_meta$Sample_clean[grepl("Turbo", subset_meta$Sample_clean, ignore.case = TRUE)]
if (length(bv2_samples) == 0 | length(turbo_samples) == 0) {
warning(paste("No samples found for", label))
fc_results[[label]] <- rep(NA, nrow(counts_mat))
} else {
fc_results[[label]] <- calculate_log2fc(bv2_samples, turbo_samples, counts_mat)
}
}
}
}
# ------------------------------------------
# Compute Fold-Change (FC) and Direction
# ------------------------------------------
fc_results_fc <- lapply(fc_results, function(x) 2^x)
fc_results_dir <- lapply(fc_results, function(x) {
case_when(
is.na(x) ~ NA_character_,
x > 0 ~ "Up",
x < 0 ~ "Down",
TRUE ~ "NoChange"
)
})
# ------------------------------------------
# Combine into one dataframe
# ------------------------------------------
combined_df <- data.frame(Gene = rownames(counts_mat))
for (name in names(fc_results)) {
combined_df[[paste0(name, "_log2FC")]] <- fc_results[[name]]
combined_df[[paste0(name, "_FC")]] <- fc_results_fc[[name]]
combined_df[[paste0(name, "_Direction")]] <- fc_results_dir[[name]]
}
# ------------------------------------------
# Write CSV
# ------------------------------------------
write.csv(combined_df, "Fold_Changes_log2FC_FC_Direction.csv", row.names = FALSE)
library(readr)
combined_df <- read_csv("Fold_Changes_log2FC_FC_Direction.csv")
## Rows: 7200 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): Gene, Exp.3_ON.Beads_Rep1_Direction, Exp.3_ON.Beads_Rep2_Direction...
## dbl (24): Exp.3_ON.Beads_Rep1_log2FC, Exp.3_ON.Beads_Rep1_FC, Exp.3_ON.Beads...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Define conditions
# -------------------------------------------
experiments <- c("Exp.3", "Exp.4", "Exp.5")
beads_conditions <- c("ON.Beads", "OFF.Beads")
# -------------------------------------------
# Create output directory
# -------------------------------------------
dir.create("plots2", showWarnings = FALSE)
# Define Experiments and Beads
# -------------------------------------------
experiments <- c("Exp.3", "Exp.4", "Exp.5")
beads_conditions <- c("ON.Beads", "OFF.Beads")
# -------------------------------------------
# Create Output Directory
# -------------------------------------------
dir.create("plots", showWarnings = FALSE)
# -------------------------------------------
# Loop over Experiments and Beads
# -------------------------------------------
for (exp in experiments) {
for (beads in beads_conditions) {
message("Processing: ", exp, " ", beads)
# -----------------------
# Build column base
base1 <- paste0(exp, "_", beads, "_Rep1")
base2 <- paste0(exp, "_", beads, "_Rep2")
# -----------------------
# Extract relevant columns
df <- combined_df %>%
select(
Gene,
log2FC_1 = paste0(base1, "_log2FC"),
Dir_1 = paste0(base1, "_Direction"),
log2FC_2 = paste0(base2, "_log2FC"),
Dir_2 = paste0(base2, "_Direction")
)
# -----------------------
# Count Up/Down calls
count_rep1 <- df %>%
count(Dir_1) %>%
complete(Dir_1 = c("Up", "Down"), fill = list(n=0))
count_rep2 <- df %>%
count(Dir_2) %>%
complete(Dir_2 = c("Up", "Down"), fill = list(n=0))
summary_label <- paste0(
exp, " ", beads, "\n",
"Replicate 1 ... Up: ", count_rep1$n[count_rep1$Dir_1 == "Up"],
" | Down: ", count_rep1$n[count_rep1$Dir_1 == "Down"],
"\nReplicate 2 ... Up: ", count_rep2$n[count_rep2$Dir_2 == "Up"],
" | Down: ", count_rep2$n[count_rep2$Dir_2 == "Down"]
)
# -----------------------
# Prepare data for Barplot
df_long <- df %>%
pivot_longer(cols = c(log2FC_1, log2FC_2), names_to = "Replicate", values_to = "log2FC") %>%
mutate(Replicate = ifelse(grepl("1", Replicate), "Replicate 1", "Replicate 2"),
Direction = case_when(
log2FC > 0 ~ "Up-regulated",
log2FC < 0 ~ "Down-regulated",
TRUE ~ NA_character_
)) %>%
filter(!is.na(Direction))
# -----------------------
# Barplot: top genes
top_genes <- df_long %>%
group_by(Replicate, Direction) %>%
slice_max(abs(log2FC), n = 10) %>%
ungroup()
p <- ggplot(top_genes, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
geom_col(width = 0.7) +
facet_wrap(~Replicate, scales = "free") +
coord_flip() +
scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
theme_minimal() +
labs(title = paste0(exp, " ", beads),
subtitle = summary_label,
y = "log2 Fold Change",
x = "Protein Symbol",
fill = "Direction")
ggsave(filename = paste0("plots/Barplot_", exp, "_", beads, ".png"), plot = p, width = 10, height = 6)
# -----------------------
# Prepare sets for Venn
sets <- list(
REP1_UP = df$Gene[df$Dir_1 == "Up"],
REP2_UP = df$Gene[df$Dir_2 == "Up"],
REP1_DOWN = df$Gene[df$Dir_1 == "Down"],
REP2_DOWN = df$Gene[df$Dir_2 == "Down"]
)
# Compute all overlaps
get_overlap <- function(indices) Reduce(intersect, sets[indices])
n12 <- length(get_overlap(c("REP1_UP", "REP2_UP")))
n13 <- length(get_overlap(c("REP1_UP", "REP1_DOWN")))
n14 <- length(get_overlap(c("REP1_UP", "REP2_DOWN")))
n23 <- length(get_overlap(c("REP2_UP", "REP1_DOWN")))
n24 <- length(get_overlap(c("REP2_UP", "REP2_DOWN")))
n34 <- length(get_overlap(c("REP1_DOWN", "REP2_DOWN")))
n123 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN")))
n124 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP2_DOWN")))
n134 <- length(get_overlap(c("REP1_UP", "REP1_DOWN", "REP2_DOWN")))
n234 <- length(get_overlap(c("REP2_UP", "REP1_DOWN", "REP2_DOWN")))
n1234 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN", "REP2_DOWN")))
# -----------------------
# Draw the Venn
venn_plot_file <- paste0("plots/Venn_", exp, "_", beads, ".png")
png(venn_plot_file, width = 800, height = 800)
draw.quad.venn(
area1 = length(sets$REP1_UP),
area2 = length(sets$REP2_UP),
area3 = length(sets$REP1_DOWN),
area4 = length(sets$REP2_DOWN),
n12 = n12,
n13 = n13,
n14 = n14,
n23 = n23,
n24 = n24,
n34 = n34,
n123 = n123,
n124 = n124,
n134 = n134,
n234 = n234,
n1234 = n1234,
category = names(sets),
fill = c("red", "pink", "blue", "lightblue"),
cex = 1.5, cat.cex = 1.5, main = paste0(exp, " ", beads, ": Up/Down Rep1 & Rep2")
)
dev.off()
}
}
## Processing: Exp.3 ON.Beads
## Processing: Exp.3 OFF.Beads
## Processing: Exp.4 ON.Beads
## Processing: Exp.4 OFF.Beads
## Processing: Exp.5 ON.Beads
## Processing: Exp.5 OFF.Beads
Exp_3_OFF_counts <- read.csv("~/Desktop/ERIC.NEWDATA.ANALYSIS.11JULY/ASTRAL.11JULY/Exp.3.OFF.counts.csv", row.names = 1)
dim(Exp_3_OFF_counts)
## [1] 7200 4
Exp3_meta <- read_csv("~/Desktop/ERIC.NEWDATA.ANALYSIS.11JULY/ASTRAL.11JULY/Exp3.meta.csv")
## New names:
## Rows: 4 Columns: 3
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): ...1, Replicate, Group
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
Exp3_meta <- as.data.frame(Exp3_meta)
row.names(Exp3_meta) <- colnames(Exp_3_OFF_counts)
# Get sample columns
bv2_samples <- Exp3_meta %>% filter(Group == "BV2") %>% pull(...1)
turbo_samples <- Exp3_meta %>% filter(Group == "Turbo") %>% pull(...1)
# ------------------------------------------
# Compute log2FC
log2fc_df <- data.frame(Gene = rownames(Exp_3_OFF_counts))
for (rep in 1:2) {
bv2_col <- bv2_samples[rep]
turbo_col <- turbo_samples[rep]
log2fc <- log2(Exp_3_OFF_counts[[bv2_col]] + 1e-6) - log2(Exp_3_OFF_counts[[turbo_col]] + 1e-6)
log2fc_df[[paste0("Rep", rep, "_log2FC")]] <- log2fc
log2fc_df[[paste0("Rep", rep, "_Direction")]] <- ifelse(
is.na(log2fc), NA,
ifelse(log2fc > 0, "Up", "Down")
)
}
# ------------------------------------------
# Count Up/Down
updown_counts <- log2fc_df %>%
summarise(
Rep1_Up = sum(Rep1_Direction == "Up", na.rm = TRUE),
Rep1_Down = sum(Rep1_Direction == "Down", na.rm = TRUE),
Rep2_Up = sum(Rep2_Direction == "Up", na.rm = TRUE),
Rep2_Down = sum(Rep2_Direction == "Down", na.rm = TRUE)
)
print(updown_counts)
## Rep1_Up Rep1_Down Rep2_Up Rep2_Down
## 1 3854 3346 1356 5844
# ------------------------------------------
# Barplot Data
plot_data <- log2fc_df %>%
pivot_longer(
cols = c(Rep1_log2FC, Rep2_log2FC, Rep1_Direction, Rep2_Direction),
names_to = c("Replicate", ".value"),
names_pattern = "(Rep\\d)_(.*)"
) %>%
filter(Direction %in% c("Up", "Down"))
# Top 10 up/down per replicate
top_plot_data <- plot_data %>%
group_by(Replicate, Direction) %>%
slice_max(abs(log2FC), n = 10) %>%
ungroup() %>%
mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))
# ------------------------------------------
# Make Barplot
barplot <- ggplot(top_plot_data, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
geom_col(width = 0.7) +
facet_wrap(~Replicate, scales = "free") +
coord_flip() +
scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
theme_minimal() +
labs(
title = "Exp.3 OFF Beads - BV2 vs Turbo",
subtitle = paste0(
"Replicate 1 ... Up: ", updown_counts$Rep1_Up, " | Down: ", updown_counts$Rep1_Down, "\n",
"Replicate 2 ... Up: ", updown_counts$Rep2_Up, " | Down: ", updown_counts$Rep2_Down
),
y = "log2 Fold Change",
x = "Protein Symbol",
fill = "Direction"
)
# ------------------------------------------
# Venn Diagram Sets
sets <- list(
REP1_UP = log2fc_df$Gene[log2fc_df$Rep1_Direction == "Up"],
REP2_UP = log2fc_df$Gene[log2fc_df$Rep2_Direction == "Up"],
REP1_DOWN = log2fc_df$Gene[log2fc_df$Rep1_Direction == "Down"],
REP2_DOWN = log2fc_df$Gene[log2fc_df$Rep2_Direction == "Down"]
)
# Compute overlaps
get_overlap <- function(indices) Reduce(intersect, sets[indices])
n12 <- length(get_overlap(c("REP1_UP", "REP2_UP")))
n13 <- length(get_overlap(c("REP1_UP", "REP1_DOWN")))
n14 <- length(get_overlap(c("REP1_UP", "REP2_DOWN")))
n23 <- length(get_overlap(c("REP2_UP", "REP1_DOWN")))
n24 <- length(get_overlap(c("REP2_UP", "REP2_DOWN")))
n34 <- length(get_overlap(c("REP1_DOWN", "REP2_DOWN")))
n123 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN")))
n124 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP2_DOWN")))
n134 <- length(get_overlap(c("REP1_UP", "REP1_DOWN", "REP2_DOWN")))
n234 <- length(get_overlap(c("REP2_UP", "REP1_DOWN", "REP2_DOWN")))
n1234 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN", "REP2_DOWN")))
# ------------------------------------------
# Save Plots
dir.create("EXP3.OFFBeads_plots", showWarnings = FALSE)
ggsave("EXP3.OFFBeads_plots/Exp3_OFF_Barplot.png", barplot, width = 10, height = 6)
png("EXP3.OFFBeads_plots/Exp3_OFF_Venn.png", width = 800, height = 800)
draw.quad.venn(
area1 = length(sets$REP1_UP),
area2 = length(sets$REP2_UP),
area3 = length(sets$REP1_DOWN),
area4 = length(sets$REP2_DOWN),
n12 = n12,
n13 = n13,
n14 = n14,
n23 = n23,
n24 = n24,
n34 = n34,
n123 = n123,
n124 = n124,
n134 = n134,
n234 = n234,
n1234 = n1234,
category = names(sets),
fill = c("red", "pink", "blue", "lightblue"),
cex = 1.5,
cat.cex = 1.5,
main = "Exp.3-OFF Beads:UP/Down Rep1 & Rep2"
)
## (polygon[GRID.polygon.943], polygon[GRID.polygon.944], polygon[GRID.polygon.945], polygon[GRID.polygon.946], polygon[GRID.polygon.947], polygon[GRID.polygon.948], polygon[GRID.polygon.949], polygon[GRID.polygon.950], text[GRID.text.951], text[GRID.text.952], text[GRID.text.953], text[GRID.text.954], text[GRID.text.955], text[GRID.text.956], text[GRID.text.957], text[GRID.text.958], text[GRID.text.959], text[GRID.text.960], text[GRID.text.961], text[GRID.text.962], text[GRID.text.963], text[GRID.text.964], text[GRID.text.965], text[GRID.text.966], text[GRID.text.967], text[GRID.text.968], text[GRID.text.969])
dev.off()
## quartz_off_screen
## 2
Astral.BV2.Turbo <- updated_Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION_UNIQUE
counts_matrix <- Astral.BV2.Turbo
#Get Exp.3 ON
exp3_on_cols <- grep("^Exp_3.*On$", colnames(counts_matrix), value = TRUE)
Exp3_ON_counts <- counts_matrix[, exp3_on_cols]
#Same for Exp.4
exp4_off_cols <- grep("^Exp_4.*Off$", colnames(counts_matrix), value = TRUE)
Exp4_OFF_counts <- counts_matrix[, exp4_off_cols]
exp4_on_cols <- grep("^Exp_4.*On$", colnames(counts_matrix), value = TRUE)
Exp4_ON_counts <- counts_matrix[, exp4_on_cols]
#Same for Exp.5
exp5_off_cols <- grep("^Exp_5.*Off$", colnames(counts_matrix), value = TRUE)
Exp5_OFF_counts <- counts_matrix[, exp5_off_cols]
exp5_on_cols <- grep("^Exp_5.*On$", colnames(counts_matrix), value = TRUE)
Exp5_ON_counts <- counts_matrix[, exp5_on_cols]
write.csv(Exp3_ON_counts, "Exp3_ON_counts.csv")
write.csv(Exp4_OFF_counts, "Exp4_OFF_counts.csv")
write.csv(Exp4_ON_counts, "Exp4_ON_counts.csv")
write.csv(Exp5_OFF_counts, "Exp5_OFF_counts.csv")
write.csv(Exp5_ON_counts, "Exp5_ON_counts.csv")
##Subset metadata
# Assume your metadata is in:
meta <- Astral_Metadata
# ------------------------------------------
# Subset: Exp.3 OFF.Beads
meta_Exp3_OFF <- meta %>%
filter(Experiment == "Exp.3", Beads == "OFF.Beads")
# ------------------------------------------
# Subset: Exp.3 ON.Beads
meta_Exp3_ON <- meta %>%
filter(Experiment == "Exp.3", Beads == "ON.Beads")
# ------------------------------------------
# Subset: Exp.4 OFF.Beads
meta_Exp4_OFF <- meta %>%
filter(Experiment == "Exp.4", Beads == "OFF.Beads")
# ------------------------------------------
# Subset: Exp.4 ON.Beads
meta_Exp4_ON <- meta %>%
filter(Experiment == "Exp.4", Beads == "ON.Beads")
# ------------------------------------------
# Subset: Exp.5 OFF.Beads
meta_Exp5_OFF <- meta %>%
filter(Experiment == "Exp.5", Beads == "OFF.Beads")
# ------------------------------------------
# Subset: Exp.5 ON.Beads
meta_Exp5_ON <- meta %>%
filter(Experiment == "Exp.5", Beads == "ON.Beads")
##save files
write.csv(meta_Exp3_OFF, "meta_Exp3_OFF.csv", row.names = FALSE)
write.csv(meta_Exp3_ON, "meta_Exp3_ON.csv", row.names = FALSE)
write.csv(meta_Exp4_OFF, "meta_Exp4_OFF.csv", row.names = FALSE)
write.csv(meta_Exp4_ON, "meta_Exp4_ON.csv", row.names = FALSE)
write.csv(meta_Exp5_OFF, "meta_Exp5_OFF.csv", row.names = FALSE)
write.csv(meta_Exp5_ON, "meta_Exp5_ON.csv", row.names = FALSE)
#COUNT
dim(Exp3_ON_counts)
## [1] 7200 4
##Metadata
dim(meta_Exp3_ON)
## [1] 4 6
# ------------------------------------------
# Check data shapes
# ------------------------------------------
print(dim(Exp3_ON_counts))
## [1] 7200 4
print(dim(meta_Exp3_ON))
## [1] 4 6
meta_Exp3_ON <- meta_Exp3_ON %>%
mutate(
Sample_clean = gsub("\\.", "_", ...1),
Group = case_when(
grepl("Bv2", Sample_clean, ignore.case = TRUE) ~ "BV2",
grepl("Turbo", Sample_clean, ignore.case = TRUE) ~ "Turbo",
TRUE ~ NA_character_
)
)
# ------------------------------------------
# Make sure metadata sample names match count columns
# ------------------------------------------
#stopifnot(all(colnames(Exp3_ON_counts) %in% meta_Exp3_ON$Sample_clean))
# ------------------------------------------
# Get BV2 and Turbo sample columns
# ------------------------------------------
bv2_samples <- meta_Exp3_ON %>%
filter(Group == "BV2") %>%
pull(Sample_clean)
turbo_samples <- meta_Exp3_ON %>%
filter(Group == "Turbo") %>%
pull(Sample_clean)
# ------------------------------------------
# Compute log2FC
# ------------------------------------------
log2fc_df <- data.frame(Gene = rownames(Exp3_ON_counts))
for (rep in 1:2) {
bv2_col <- bv2_samples[rep]
turbo_col <- turbo_samples[rep]
log2fc <- log2(Exp3_ON_counts[[bv2_col]] + 1e-6) - log2(Exp3_ON_counts[[turbo_col]] + 1e-6)
log2fc_df[[paste0("Rep", rep, "_log2FC")]] <- log2fc
log2fc_df[[paste0("Rep", rep, "_Direction")]] <- ifelse(
is.na(log2fc), NA,
ifelse(log2fc > 0, "Up", "Down")
)
}
# ------------------------------------------
# Count Up/Down
# ------------------------------------------
updown_counts <- log2fc_df %>%
summarise(
Rep1_Up = sum(Rep1_Direction == "Up", na.rm = TRUE),
Rep1_Down = sum(Rep1_Direction == "Down", na.rm = TRUE),
Rep2_Up = sum(Rep2_Direction == "Up", na.rm = TRUE),
Rep2_Down = sum(Rep2_Direction == "Down", na.rm = TRUE)
)
print(updown_counts)
## Rep1_Up Rep1_Down Rep2_Up Rep2_Down
## 1 3829 3371 4654 2546
# ------------------------------------------
# Prepare Barplot Data
# ------------------------------------------
plot_data <- log2fc_df %>%
pivot_longer(
cols = c(Rep1_log2FC, Rep2_log2FC, Rep1_Direction, Rep2_Direction),
names_to = c("Replicate", ".value"),
names_pattern = "(Rep\\d)_(.*)"
) %>%
filter(Direction %in% c("Up", "Down"))
# Get top 10 genes by absolute log2FC per replicate/direction
top_plot_data <- plot_data %>%
group_by(Replicate, Direction) %>%
slice_max(abs(log2FC), n = 10, with_ties = FALSE) %>%
ungroup() %>%
mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))
# ------------------------------------------
# Create Barplot
# ------------------------------------------
barplot <- ggplot(top_plot_data, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
geom_col(width = 0.7) +
facet_wrap(~Replicate, scales = "free") +
coord_flip() +
scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
theme_minimal() +
labs(
title = "Exp.3 ON Beads - BV2 vs Turbo",
subtitle = paste0(
"Replicate 1 ... Up: ", updown_counts$Rep1_Up, " | Down: ", updown_counts$Rep1_Down, "\n",
"Replicate 2 ... Up: ", updown_counts$Rep2_Up, " | Down: ", updown_counts$Rep2_Down
),
y = "log2 Fold Change",
x = "Protein Symbol",
fill = "Direction"
)
# ------------------------------------------
# Venn Diagram Sets
# ------------------------------------------
sets <- list(
REP1_UP = log2fc_df$Gene[log2fc_df$Rep1_Direction == "Up"],
REP2_UP = log2fc_df$Gene[log2fc_df$Rep2_Direction == "Up"],
REP1_DOWN = log2fc_df$Gene[log2fc_df$Rep1_Direction == "Down"],
REP2_DOWN = log2fc_df$Gene[log2fc_df$Rep2_Direction == "Down"]
)
# Compute intersections
get_overlap <- function(indices) Reduce(intersect, sets[indices])
n12 <- length(get_overlap(c("REP1_UP", "REP2_UP")))
n13 <- length(get_overlap(c("REP1_UP", "REP1_DOWN")))
n14 <- length(get_overlap(c("REP1_UP", "REP2_DOWN")))
n23 <- length(get_overlap(c("REP2_UP", "REP1_DOWN")))
n24 <- length(get_overlap(c("REP2_UP", "REP2_DOWN")))
n34 <- length(get_overlap(c("REP1_DOWN", "REP2_DOWN")))
n123 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN")))
n124 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP2_DOWN")))
n134 <- length(get_overlap(c("REP1_UP", "REP1_DOWN", "REP2_DOWN")))
n234 <- length(get_overlap(c("REP2_UP", "REP1_DOWN", "REP2_DOWN")))
n1234 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN", "REP2_DOWN")))
# ------------------------------------------
# Save Plots
# ------------------------------------------
dir.create("EXP3.ONBeads_plots", showWarnings = FALSE)
ggsave("EXP3.ONBeads_plots/Exp3_ON_Barplot.png", barplot, width = 10, height = 6)
png("EXP3.ONBeads_plots/Exp3_ON_Venn.png", width = 800, height = 800)
draw.quad.venn(
area1 = length(sets$REP1_UP),
area2 = length(sets$REP2_UP),
area3 = length(sets$REP1_DOWN),
area4 = length(sets$REP2_DOWN),
n12 = n12,
n13 = n13,
n14 = n14,
n23 = n23,
n24 = n24,
n34 = n34,
n123 = n123,
n124 = n124,
n134 = n134,
n234 = n234,
n1234 = n1234,
category = names(sets),
fill = c("red", "pink", "blue", "lightblue"),
cex = 1.5,
cat.cex = 1.5,
main = "Exp.3 ON Beads: UP/Down Rep1 & Rep2"
)
## (polygon[GRID.polygon.1081], polygon[GRID.polygon.1082], polygon[GRID.polygon.1083], polygon[GRID.polygon.1084], polygon[GRID.polygon.1085], polygon[GRID.polygon.1086], polygon[GRID.polygon.1087], polygon[GRID.polygon.1088], text[GRID.text.1089], text[GRID.text.1090], text[GRID.text.1091], text[GRID.text.1092], text[GRID.text.1093], text[GRID.text.1094], text[GRID.text.1095], text[GRID.text.1096], text[GRID.text.1097], text[GRID.text.1098], text[GRID.text.1099], text[GRID.text.1100], text[GRID.text.1101], text[GRID.text.1102], text[GRID.text.1103], text[GRID.text.1104], text[GRID.text.1105], text[GRID.text.1106], text[GRID.text.1107])
dev.off()
## quartz_off_screen
## 2
Exp1.counts <- read.csv("Exp.1.counts.csv", row.names = 1)
dim(Exp1.counts)
## [1] 7200 2
EXP1.Meta <- read.csv("EXP1.Meta.csv", row.names = 1)
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on 'EXP1.Meta.csv'
EXP1.Meta
## Group
## Exp.1.Bv2_rep1_On BV2
## Exp.1.Turbo_rep1_On Turbo
row.names(EXP1.Meta) <- colnames(Exp1.counts)
# Libraries
library(ggplot2)
library(dplyr)
# Check
print(colnames(Exp1.counts))
## [1] "Exp.1.Bv2_rep1_On" "Exp.1.Turbo_rep1_On"
print(rownames(EXP1.Meta))
## [1] "Exp.1.Bv2_rep1_On" "Exp.1.Turbo_rep1_On"
# Make sure they match (they do here!)
stopifnot(all(colnames(Exp1.counts) %in% rownames(EXP1.Meta)))
# Identify columns
bv2_col <- rownames(EXP1.Meta)[EXP1.Meta$Group == "BV2"]
turbo_col <- rownames(EXP1.Meta)[EXP1.Meta$Group == "Turbo"]
print(bv2_col)
## [1] "Exp.1.Bv2_rep1_On"
print(turbo_col)
## [1] "Exp.1.Turbo_rep1_On"
# Compute log2FC
log2fc <- log2(Exp1.counts[[bv2_col]] + 1e-6) - log2(Exp1.counts[[turbo_col]] + 1e-6)
# Label direction
direction <- ifelse(log2fc > 0, "Up", "Down")
# Make dataframe
log2fc_df <- data.frame(
Gene = rownames(Exp1.counts),
log2FC = log2fc,
Direction = direction
)
head(log2fc_df)
## Gene log2FC Direction
## 1 Fcgr4 -0.50204817 Down
## 2 Qser1 0.01471213 Up
## 3 Afg2b 0.02560255 Up
## 4 Pglyrp3;Pglyrp4 -0.01945902 Down
## 5 Tbc1d25 0.30569050 Up
## 6 Mpeg1 0.22617434 Up
# Top 20 genes by absolute log2FC
top_log2fc_df <- log2fc_df %>%
arrange(desc(abs(log2FC))) %>%
slice_head(n = 20) %>%
mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))
top_up <- log2fc_df %>%
filter(Direction == "Up") %>%
slice_max(order_by = log2FC, n = 15)
top_down <- log2fc_df %>%
filter(Direction == "Down") %>%
slice_min(order_by = log2FC, n = 15)
top_plot_data <- bind_rows(top_up, top_down) %>%
mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))
# Plot
ggplot(top_plot_data, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
geom_col(width = 0.7) +
coord_flip() +
scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
theme_minimal() +
labs(
title = "Exp.1BV2 vs Turbo",
y = "log2 FC",
x = "Gene",
fill = "Direction"
)

# Load libraries
library(dplyr)
library(ggplot2)
INPUT <- read.csv("INPUT.csv", row.names = 1)
library(readr)
Input_Metadata <- read_csv("Input.Metadata.csv")
## New names:
## Rows: 2 Columns: 2
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): ...1, Group
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Inspect
head(INPUT)
## ISD S.trap
## Fcgr4 24.28207 23.84805
## Qser1 17.74668 17.78941
## Afg2b 18.95749 19.21245
## Pglyrp3;Pglyrp4 15.45789 16.51110
## Tbc1d25 12.58099 12.68490
## Mpeg1 15.94742 15.78730
Input_Metadata
## # A tibble: 2 × 2
## ...1 Group
## <chr> <chr>
## 1 ISD ISD
## 2 S-trap S-trap
# ------------------------------------------
# Extract columns
isd_col <- "ISD"
strap_col <- "S.trap"
# ------------------------------------------
# Compute log2FC
log2fc <- log2(INPUT[[strap_col]] + 1e-6) - log2(INPUT[[isd_col]] + 1e-6)
# Label direction
direction <- ifelse(log2fc > 0, "Up", "Down")
# Create dataframe
log2fc_df <- data.frame(
Gene = rownames(INPUT),
log2FC = log2fc,
Direction = direction
)
# Check results
head(log2fc_df)
## Gene log2FC Direction
## 1 Fcgr4 -0.026020172 Down
## 2 Qser1 0.003469381 Up
## 3 Afg2b 0.019273331 Up
## 4 Pglyrp3;Pglyrp4 0.095092413 Up
## 5 Tbc1d25 0.011866856 Up
## 6 Mpeg1 -0.014558238 Down
table(log2fc_df$Direction)
##
## Down Up
## 1802 5398
# ------------------------------------------
# Get top genes for plot
top_up <- log2fc_df %>%
filter(Direction == "Up") %>%
slice_max(order_by = log2FC, n = 10)
top_down <- log2fc_df %>%
filter(Direction == "Down") %>%
slice_min(order_by = log2FC, n = 10)
top_plot_data <- bind_rows(top_up, top_down) %>%
mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))
# ------------------------------------------
# Plot
ggplot(top_plot_data, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
geom_col(width = 0.7) +
coord_flip() +
scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
theme_minimal() +
labs(
title = "S-trap vs ISD - log2 Fold Change",
y = "log2 Fold Change",
x = "Gene",
fill = "Direction"
)
