Astral.BV2.vs.Turbo.11july

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
library(readr)
library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tidyr' was built under R version 4.3.2

## Warning: package 'purrr' was built under R version 4.3.3

## Warning: package 'lubridate' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0.9000     ✔ stringr   1.5.1     
## ✔ ggplot2   3.5.2          ✔ tibble    3.2.1     
## ✔ lubridate 1.9.4          ✔ tidyr     1.3.1     
## ✔ purrr     1.0.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(VennDiagram)

## Loading required package: grid
## Loading required package: futile.logger

library(ggVennDiagram)

## Warning: package 'ggVennDiagram' was built under R version 4.3.2

## 
## Attaching package: 'ggVennDiagram'
## 
## The following object is masked from 'package:tidyr':
## 
##     unite

updated_Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION_UNIQUE <- read.csv("updated.Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION.UNIQUE.csv", row.names = 1)

# Load counts matrix
# Replace with your actual R object name if different
Astral.BV2.Turbo <- updated_Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION_UNIQUE
counts_mat <- Astral.BV2.Turbo

# ------------------------------------------
# Load Metadata
# Make sure your path is correct!
Astral_Metadata <- read_csv("~/Desktop/ERIC.NEWDATA.ANALYSIS.11JULY/ASTRAL.11JULY/Astral.Metadata.csv")

## New names:
## Rows: 28 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): ...1, Sample, Experiment, Group, Beads, Replicates
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

Astral_Metadata <- as.data.frame(Astral_Metadata)

# ------------------------------------------
# Clean metadata to match count matrix column names
meta <- Astral_Metadata %>%
  mutate(
    Sample_clean = gsub("\\.", "_", Sample),    # Replace . with _
    Sample_clean = gsub("\\s+", "", Sample_clean)  # Remove spaces
  )

# ------------------------------------------
# Helper function: Compute log2FC
# ------------------------------------------
calculate_log2fc <- function(bv2_samples, turbo_samples, counts_mat) {
  mean_bv2 <- rowMeans(counts_mat[, bv2_samples, drop = FALSE], na.rm = TRUE)
  mean_turbo <- rowMeans(counts_mat[, turbo_samples, drop = FALSE], na.rm = TRUE)
  log2fc <- log2(mean_bv2 + 1e-6) - log2(mean_turbo + 1e-6)
  return(log2fc)
}

# ------------------------------------------
# Define Comparisons
# ------------------------------------------
experiments <- c("Exp.3", "Exp.4", "Exp.5")
beads_conditions <- c("ON.Beads", "OFF.Beads")
replicates <- c("Rep1", "Rep2")

# ------------------------------------------
# Compute log2FC for all combinations
# ------------------------------------------
fc_results <- list()

for (exp in experiments) {
  for (beads in beads_conditions) {
    for (rep in replicates) {
      
      # Label for this comparison
      label <- paste(exp, beads, rep, sep = "_")
      
      # Filter metadata for this combination
      subset_meta <- meta %>%
        filter(Experiment == exp, Beads == beads, Replicates == rep)
      
      # Extract BV2 and Turbo samples using cleaned names
      bv2_samples <- subset_meta$Sample_clean[grepl("Bv2", subset_meta$Sample_clean, ignore.case = TRUE)]
      turbo_samples <- subset_meta$Sample_clean[grepl("Turbo", subset_meta$Sample_clean, ignore.case = TRUE)]
      
      # Check if samples exist
      if (length(bv2_samples) == 0 | length(turbo_samples) == 0) {
        warning(paste("No samples found for", label))
        fc_results[[label]] <- rep(NA, nrow(counts_mat))
      } else {
        fc_results[[label]] <- calculate_log2fc(bv2_samples, turbo_samples, counts_mat)
      }
    }
  }
}

# ------------------------------------------
# Compute Fold-Change (FC) and Direction
# ------------------------------------------
fc_results_fc <- lapply(fc_results, function(x) 2^x)

fc_results_dir <- lapply(fc_results, function(x) {
  case_when(
    is.na(x) ~ NA_character_,
    x > 0 ~ "Up",
    x < 0 ~ "Down",
    TRUE ~ "NoChange"
  )
})

# ------------------------------------------
# Combine ALL Results into One DataFrame
# ------------------------------------------
combined_df <- data.frame(Gene = rownames(counts_mat))

for (name in names(fc_results)) {
  combined_df[[paste0(name, "_log2FC")]] <- fc_results[[name]]
  combined_df[[paste0(name, "_FC")]] <- fc_results_fc[[name]]
  combined_df[[paste0(name, "_Direction")]] <- fc_results_dir[[name]]
}

# ------------------------------------------
# Write the final combined CSV
# ------------------------------------------
write.csv(combined_df, "Fold_Changes_log2FC_FC_Direction.csv", row.names = FALSE)

# ------------------------------------------
# Load Libraries
# ------------------------------------------
library(dplyr)
library(readr)

# ------------------------------------------
# Load counts matrix
Astral.BV2.Turbo <- updated_Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION_UNIQUE
counts_mat <- Astral.BV2.Turbo

# ------------------------------------------
# Load Metadata
Astral_Metadata <- read_csv("~/Desktop/ERIC.NEWDATA.ANALYSIS.11JULY/ASTRAL.11JULY/Astral.Metadata.csv")

## New names:
## Rows: 28 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): ...1, Sample, Experiment, Group, Beads, Replicates
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

Astral_Metadata <- as.data.frame(Astral_Metadata)

# ------------------------------------------
# Clean metadata
meta <- Astral_Metadata %>%
  mutate(
    Sample_clean = gsub("\\.", "_", Sample),
    Sample_clean = gsub("\\s+", "", Sample_clean)
  )

# ------------------------------------------
# Helper function: Compute log2FC
calculate_log2fc <- function(bv2_samples, turbo_samples, counts_mat) {
  mean_bv2 <- rowMeans(counts_mat[, bv2_samples, drop = FALSE], na.rm = TRUE)
  mean_turbo <- rowMeans(counts_mat[, turbo_samples, drop = FALSE], na.rm = TRUE)
  log2fc <- log2(mean_bv2 + 1e-6) - log2(mean_turbo + 1e-6)
  return(log2fc)
}

# ------------------------------------------
# Define conditions
experiments <- c("Exp.3", "Exp.4", "Exp.5")
beads_conditions <- c("ON.Beads", "OFF.Beads")
replicates <- c("Rep1", "Rep2")

# ------------------------------------------
# Compute log2FC
fc_results <- list()

for (exp in experiments) {
  for (beads in beads_conditions) {
    for (rep in replicates) {
      
      label <- paste(exp, beads, rep, sep = "_")
      
      subset_meta <- meta %>%
        filter(Experiment == exp, Beads == beads, Replicates == rep)
      
      bv2_samples <- subset_meta$Sample_clean[grepl("Bv2", subset_meta$Sample_clean, ignore.case = TRUE)]
      turbo_samples <- subset_meta$Sample_clean[grepl("Turbo", subset_meta$Sample_clean, ignore.case = TRUE)]
      
      if (length(bv2_samples) == 0 | length(turbo_samples) == 0) {
        warning(paste("No samples found for", label))
        fc_results[[label]] <- rep(NA, nrow(counts_mat))
      } else {
        fc_results[[label]] <- calculate_log2fc(bv2_samples, turbo_samples, counts_mat)
      }
    }
  }
}

# ------------------------------------------
# Compute Fold-Change (FC) and Direction
# ------------------------------------------
fc_results_fc <- lapply(fc_results, function(x) 2^x)

fc_results_dir <- lapply(fc_results, function(x) {
  case_when(
    is.na(x) ~ NA_character_,
    x > 0 ~ "Up",
    x < 0 ~ "Down",
    TRUE ~ "NoChange"
  )
})

# ------------------------------------------
# Combine into one dataframe
# ------------------------------------------
combined_df <- data.frame(Gene = rownames(counts_mat))

for (name in names(fc_results)) {
  combined_df[[paste0(name, "_log2FC")]] <- fc_results[[name]]
  combined_df[[paste0(name, "_FC")]] <- fc_results_fc[[name]]
  combined_df[[paste0(name, "_Direction")]] <- fc_results_dir[[name]]
}

# ------------------------------------------
# Write CSV
# ------------------------------------------
write.csv(combined_df, "Fold_Changes_log2FC_FC_Direction.csv", row.names = FALSE)

library(readr)
combined_df <- read_csv("Fold_Changes_log2FC_FC_Direction.csv")

## Rows: 7200 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): Gene, Exp.3_ON.Beads_Rep1_Direction, Exp.3_ON.Beads_Rep2_Direction...
## dbl (24): Exp.3_ON.Beads_Rep1_log2FC, Exp.3_ON.Beads_Rep1_FC, Exp.3_ON.Beads...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Define conditions
# -------------------------------------------
experiments <- c("Exp.3", "Exp.4", "Exp.5")
beads_conditions <- c("ON.Beads", "OFF.Beads")

# -------------------------------------------
# Create output directory
# -------------------------------------------
dir.create("plots2", showWarnings = FALSE)

# Define Experiments and Beads
# -------------------------------------------
experiments <- c("Exp.3", "Exp.4", "Exp.5")
beads_conditions <- c("ON.Beads", "OFF.Beads")

# -------------------------------------------
# Create Output Directory
# -------------------------------------------
dir.create("plots", showWarnings = FALSE)

# -------------------------------------------
# Loop over Experiments and Beads
# -------------------------------------------
for (exp in experiments) {
  for (beads in beads_conditions) {

    message("Processing: ", exp, " ", beads)

    # -----------------------
    # Build column base
    base1 <- paste0(exp, "_", beads, "_Rep1")
    base2 <- paste0(exp, "_", beads, "_Rep2")
    
    # -----------------------
    # Extract relevant columns
    df <- combined_df %>%
      select(
        Gene,
        log2FC_1 = paste0(base1, "_log2FC"),
        Dir_1 = paste0(base1, "_Direction"),
        log2FC_2 = paste0(base2, "_log2FC"),
        Dir_2 = paste0(base2, "_Direction")
      )
    
    # -----------------------
    # Count Up/Down calls
    count_rep1 <- df %>%
      count(Dir_1) %>%
      complete(Dir_1 = c("Up", "Down"), fill = list(n=0))

    count_rep2 <- df %>%
      count(Dir_2) %>%
      complete(Dir_2 = c("Up", "Down"), fill = list(n=0))
    
    summary_label <- paste0(
      exp, " ", beads, "\n",
      "Replicate 1 ... Up: ", count_rep1$n[count_rep1$Dir_1 == "Up"], 
      " | Down: ", count_rep1$n[count_rep1$Dir_1 == "Down"],
      "\nReplicate 2 ... Up: ", count_rep2$n[count_rep2$Dir_2 == "Up"], 
      " | Down: ", count_rep2$n[count_rep2$Dir_2 == "Down"]
    )
    
    # -----------------------
    # Prepare data for Barplot
    df_long <- df %>%
      pivot_longer(cols = c(log2FC_1, log2FC_2), names_to = "Replicate", values_to = "log2FC") %>%
      mutate(Replicate = ifelse(grepl("1", Replicate), "Replicate 1", "Replicate 2"),
             Direction = case_when(
               log2FC > 0 ~ "Up-regulated",
               log2FC < 0 ~ "Down-regulated",
               TRUE ~ NA_character_
             )) %>%
      filter(!is.na(Direction))
    
    # -----------------------
    # Barplot: top genes
    top_genes <- df_long %>%
      group_by(Replicate, Direction) %>%
      slice_max(abs(log2FC), n = 10) %>%
      ungroup()
    
    p <- ggplot(top_genes, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
      geom_col(width = 0.7) +
      facet_wrap(~Replicate, scales = "free") +
      coord_flip() +
      scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
      theme_minimal() +
      labs(title = paste0(exp, " ", beads),
           subtitle = summary_label,
           y = "log2 Fold Change",
           x = "Protein Symbol",
           fill = "Direction")
    
    ggsave(filename = paste0("plots/Barplot_", exp, "_", beads, ".png"), plot = p, width = 10, height = 6)
    
    # -----------------------
    # Prepare sets for Venn
    sets <- list(
      REP1_UP = df$Gene[df$Dir_1 == "Up"],
      REP2_UP = df$Gene[df$Dir_2 == "Up"],
      REP1_DOWN = df$Gene[df$Dir_1 == "Down"],
      REP2_DOWN = df$Gene[df$Dir_2 == "Down"]
    )
    
    # Compute all overlaps
    get_overlap <- function(indices) Reduce(intersect, sets[indices])
    
    n12 <- length(get_overlap(c("REP1_UP", "REP2_UP")))
    n13 <- length(get_overlap(c("REP1_UP", "REP1_DOWN")))
    n14 <- length(get_overlap(c("REP1_UP", "REP2_DOWN")))
    n23 <- length(get_overlap(c("REP2_UP", "REP1_DOWN")))
    n24 <- length(get_overlap(c("REP2_UP", "REP2_DOWN")))
    n34 <- length(get_overlap(c("REP1_DOWN", "REP2_DOWN")))
    
    n123 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN")))
    n124 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP2_DOWN")))
    n134 <- length(get_overlap(c("REP1_UP", "REP1_DOWN", "REP2_DOWN")))
    n234 <- length(get_overlap(c("REP2_UP", "REP1_DOWN", "REP2_DOWN")))
    n1234 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN", "REP2_DOWN")))
    
    # -----------------------
    # Draw the Venn
    venn_plot_file <- paste0("plots/Venn_", exp, "_", beads, ".png")
    png(venn_plot_file, width = 800, height = 800)
    draw.quad.venn(
      area1 = length(sets$REP1_UP),
      area2 = length(sets$REP2_UP),
      area3 = length(sets$REP1_DOWN),
      area4 = length(sets$REP2_DOWN),
      n12 = n12,
      n13 = n13,
      n14 = n14,
      n23 = n23,
      n24 = n24,
      n34 = n34,
      n123 = n123,
      n124 = n124,
      n134 = n134,
      n234 = n234,
      n1234 = n1234,
      category = names(sets),
      fill = c("red", "pink", "blue", "lightblue"),
      cex = 1.5, cat.cex = 1.5, main = paste0(exp, " ", beads, ": Up/Down Rep1 & Rep2")
    )
    dev.off()
  }
}

## Processing: Exp.3 ON.Beads
## Processing: Exp.3 OFF.Beads
## Processing: Exp.4 ON.Beads
## Processing: Exp.4 OFF.Beads
## Processing: Exp.5 ON.Beads
## Processing: Exp.5 OFF.Beads

Exp_3_OFF_counts <- read.csv("~/Desktop/ERIC.NEWDATA.ANALYSIS.11JULY/ASTRAL.11JULY/Exp.3.OFF.counts.csv", row.names = 1)
dim(Exp_3_OFF_counts)

## [1] 7200    4

Exp3_meta <- read_csv("~/Desktop/ERIC.NEWDATA.ANALYSIS.11JULY/ASTRAL.11JULY/Exp3.meta.csv")

## New names:
## Rows: 4 Columns: 3
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): ...1, Replicate, Group
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

Exp3_meta <- as.data.frame(Exp3_meta)
row.names(Exp3_meta) <- colnames(Exp_3_OFF_counts)

# Get sample columns
bv2_samples <- Exp3_meta %>% filter(Group == "BV2") %>% pull(...1)
turbo_samples <- Exp3_meta %>% filter(Group == "Turbo") %>% pull(...1)

# ------------------------------------------
# Compute log2FC
log2fc_df <- data.frame(Gene = rownames(Exp_3_OFF_counts))

for (rep in 1:2) {
  bv2_col <- bv2_samples[rep]
  turbo_col <- turbo_samples[rep]
  
  log2fc <- log2(Exp_3_OFF_counts[[bv2_col]] + 1e-6) - log2(Exp_3_OFF_counts[[turbo_col]] + 1e-6)
  
  log2fc_df[[paste0("Rep", rep, "_log2FC")]] <- log2fc
  log2fc_df[[paste0("Rep", rep, "_Direction")]] <- ifelse(
    is.na(log2fc), NA,
    ifelse(log2fc > 0, "Up", "Down")
  )
}

# ------------------------------------------
# Count Up/Down
updown_counts <- log2fc_df %>%
  summarise(
    Rep1_Up = sum(Rep1_Direction == "Up", na.rm = TRUE),
    Rep1_Down = sum(Rep1_Direction == "Down", na.rm = TRUE),
    Rep2_Up = sum(Rep2_Direction == "Up", na.rm = TRUE),
    Rep2_Down = sum(Rep2_Direction == "Down", na.rm = TRUE)
  )

print(updown_counts)

##   Rep1_Up Rep1_Down Rep2_Up Rep2_Down
## 1    3854      3346    1356      5844

# ------------------------------------------
# Barplot Data
plot_data <- log2fc_df %>%
  pivot_longer(
    cols = c(Rep1_log2FC, Rep2_log2FC, Rep1_Direction, Rep2_Direction),
    names_to = c("Replicate", ".value"),
    names_pattern = "(Rep\\d)_(.*)"
  ) %>%
  filter(Direction %in% c("Up", "Down"))

# Top 10 up/down per replicate
top_plot_data <- plot_data %>%
  group_by(Replicate, Direction) %>%
  slice_max(abs(log2FC), n = 10) %>%
  ungroup() %>%
  mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))

# ------------------------------------------
# Make Barplot
barplot <- ggplot(top_plot_data, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
  geom_col(width = 0.7) +
  facet_wrap(~Replicate, scales = "free") +
  coord_flip() +
  scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
  theme_minimal() +
  labs(
    title = "Exp.3 OFF Beads - BV2 vs Turbo",
    subtitle = paste0(
      "Replicate 1 ... Up: ", updown_counts$Rep1_Up, " | Down: ", updown_counts$Rep1_Down, "\n",
      "Replicate 2 ... Up: ", updown_counts$Rep2_Up, " | Down: ", updown_counts$Rep2_Down
    ),
    y = "log2 Fold Change",
    x = "Protein Symbol",
    fill = "Direction"
  )

# ------------------------------------------
# Venn Diagram Sets
sets <- list(
  REP1_UP = log2fc_df$Gene[log2fc_df$Rep1_Direction == "Up"],
  REP2_UP = log2fc_df$Gene[log2fc_df$Rep2_Direction == "Up"],
  REP1_DOWN = log2fc_df$Gene[log2fc_df$Rep1_Direction == "Down"],
  REP2_DOWN = log2fc_df$Gene[log2fc_df$Rep2_Direction == "Down"]
)

# Compute overlaps
get_overlap <- function(indices) Reduce(intersect, sets[indices])

n12 <- length(get_overlap(c("REP1_UP", "REP2_UP")))
n13 <- length(get_overlap(c("REP1_UP", "REP1_DOWN")))
n14 <- length(get_overlap(c("REP1_UP", "REP2_DOWN")))
n23 <- length(get_overlap(c("REP2_UP", "REP1_DOWN")))
n24 <- length(get_overlap(c("REP2_UP", "REP2_DOWN")))
n34 <- length(get_overlap(c("REP1_DOWN", "REP2_DOWN")))

n123 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN")))
n124 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP2_DOWN")))
n134 <- length(get_overlap(c("REP1_UP", "REP1_DOWN", "REP2_DOWN")))
n234 <- length(get_overlap(c("REP2_UP", "REP1_DOWN", "REP2_DOWN")))
n1234 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN", "REP2_DOWN")))

# ------------------------------------------
# Save Plots
dir.create("EXP3.OFFBeads_plots", showWarnings = FALSE)

ggsave("EXP3.OFFBeads_plots/Exp3_OFF_Barplot.png", barplot, width = 10, height = 6)

png("EXP3.OFFBeads_plots/Exp3_OFF_Venn.png", width = 800, height = 800)
draw.quad.venn(
  area1 = length(sets$REP1_UP),
  area2 = length(sets$REP2_UP),
  area3 = length(sets$REP1_DOWN),
  area4 = length(sets$REP2_DOWN),
  n12 = n12,
  n13 = n13,
  n14 = n14,
  n23 = n23,
  n24 = n24,
  n34 = n34,
  n123 = n123,
  n124 = n124,
  n134 = n134,
  n234 = n234,
  n1234 = n1234,
  category = names(sets),
  fill = c("red", "pink", "blue", "lightblue"),
  cex = 1.5,
  cat.cex = 1.5,
  main = "Exp.3-OFF Beads:UP/Down Rep1 & Rep2"
)

## (polygon[GRID.polygon.943], polygon[GRID.polygon.944], polygon[GRID.polygon.945], polygon[GRID.polygon.946], polygon[GRID.polygon.947], polygon[GRID.polygon.948], polygon[GRID.polygon.949], polygon[GRID.polygon.950], text[GRID.text.951], text[GRID.text.952], text[GRID.text.953], text[GRID.text.954], text[GRID.text.955], text[GRID.text.956], text[GRID.text.957], text[GRID.text.958], text[GRID.text.959], text[GRID.text.960], text[GRID.text.961], text[GRID.text.962], text[GRID.text.963], text[GRID.text.964], text[GRID.text.965], text[GRID.text.966], text[GRID.text.967], text[GRID.text.968], text[GRID.text.969])

dev.off()

## quartz_off_screen 
##                 2

Astral.BV2.Turbo <- updated_Astral_BV2_Turbo_OP_Proteins_11july_IMPUTATION_UNIQUE
counts_matrix <- Astral.BV2.Turbo
#Get Exp.3 ON
exp3_on_cols <- grep("^Exp_3.*On$", colnames(counts_matrix), value = TRUE)
Exp3_ON_counts <- counts_matrix[, exp3_on_cols]

#Same for Exp.4
exp4_off_cols <- grep("^Exp_4.*Off$", colnames(counts_matrix), value = TRUE)
Exp4_OFF_counts <- counts_matrix[, exp4_off_cols]

exp4_on_cols <- grep("^Exp_4.*On$", colnames(counts_matrix), value = TRUE)
Exp4_ON_counts <- counts_matrix[, exp4_on_cols]

#Same for Exp.5
exp5_off_cols <- grep("^Exp_5.*Off$", colnames(counts_matrix), value = TRUE)
Exp5_OFF_counts <- counts_matrix[, exp5_off_cols]

exp5_on_cols <- grep("^Exp_5.*On$", colnames(counts_matrix), value = TRUE)
Exp5_ON_counts <- counts_matrix[, exp5_on_cols]


write.csv(Exp3_ON_counts, "Exp3_ON_counts.csv")
write.csv(Exp4_OFF_counts, "Exp4_OFF_counts.csv")
write.csv(Exp4_ON_counts, "Exp4_ON_counts.csv")
write.csv(Exp5_OFF_counts, "Exp5_OFF_counts.csv")
write.csv(Exp5_ON_counts, "Exp5_ON_counts.csv")

##Subset metadata

# Assume your metadata is in:
meta <- Astral_Metadata

# ------------------------------------------
# Subset: Exp.3 OFF.Beads
meta_Exp3_OFF <- meta %>%
  filter(Experiment == "Exp.3", Beads == "OFF.Beads")

# ------------------------------------------
# Subset: Exp.3 ON.Beads
meta_Exp3_ON <- meta %>%
  filter(Experiment == "Exp.3", Beads == "ON.Beads")

# ------------------------------------------
# Subset: Exp.4 OFF.Beads
meta_Exp4_OFF <- meta %>%
  filter(Experiment == "Exp.4", Beads == "OFF.Beads")

# ------------------------------------------
# Subset: Exp.4 ON.Beads
meta_Exp4_ON <- meta %>%
  filter(Experiment == "Exp.4", Beads == "ON.Beads")

# ------------------------------------------
# Subset: Exp.5 OFF.Beads
meta_Exp5_OFF <- meta %>%
  filter(Experiment == "Exp.5", Beads == "OFF.Beads")

# ------------------------------------------
# Subset: Exp.5 ON.Beads
meta_Exp5_ON <- meta %>%
  filter(Experiment == "Exp.5", Beads == "ON.Beads")

##save files
write.csv(meta_Exp3_OFF, "meta_Exp3_OFF.csv", row.names = FALSE)
write.csv(meta_Exp3_ON,  "meta_Exp3_ON.csv",  row.names = FALSE)
write.csv(meta_Exp4_OFF, "meta_Exp4_OFF.csv", row.names = FALSE)
write.csv(meta_Exp4_ON,  "meta_Exp4_ON.csv",  row.names = FALSE)
write.csv(meta_Exp5_OFF, "meta_Exp5_OFF.csv", row.names = FALSE)
write.csv(meta_Exp5_ON,  "meta_Exp5_ON.csv",  row.names = FALSE)

#COUNT
dim(Exp3_ON_counts)

## [1] 7200    4

##Metadata
dim(meta_Exp3_ON)

## [1] 4 6

# ------------------------------------------
# Check data shapes
# ------------------------------------------
print(dim(Exp3_ON_counts))

## [1] 7200    4

print(dim(meta_Exp3_ON))

## [1] 4 6

meta_Exp3_ON <- meta_Exp3_ON %>%
  mutate(
    Sample_clean = gsub("\\.", "_", ...1),
    Group = case_when(
      grepl("Bv2", Sample_clean, ignore.case = TRUE) ~ "BV2",
      grepl("Turbo", Sample_clean, ignore.case = TRUE) ~ "Turbo",
      TRUE ~ NA_character_
    )
  )


# ------------------------------------------
# Make sure metadata sample names match count columns
# ------------------------------------------
#stopifnot(all(colnames(Exp3_ON_counts) %in% meta_Exp3_ON$Sample_clean))


# ------------------------------------------
# Get BV2 and Turbo sample columns
# ------------------------------------------
bv2_samples <- meta_Exp3_ON %>%
  filter(Group == "BV2") %>%
  pull(Sample_clean)

turbo_samples <- meta_Exp3_ON %>%
  filter(Group == "Turbo") %>%
  pull(Sample_clean)


# ------------------------------------------
# Compute log2FC
# ------------------------------------------
log2fc_df <- data.frame(Gene = rownames(Exp3_ON_counts))

for (rep in 1:2) {
  bv2_col <- bv2_samples[rep]
  turbo_col <- turbo_samples[rep]
  
  log2fc <- log2(Exp3_ON_counts[[bv2_col]] + 1e-6) - log2(Exp3_ON_counts[[turbo_col]] + 1e-6)
  
  log2fc_df[[paste0("Rep", rep, "_log2FC")]] <- log2fc
  log2fc_df[[paste0("Rep", rep, "_Direction")]] <- ifelse(
    is.na(log2fc), NA,
    ifelse(log2fc > 0, "Up", "Down")
  )
}

# ------------------------------------------
# Count Up/Down
# ------------------------------------------
updown_counts <- log2fc_df %>%
  summarise(
    Rep1_Up = sum(Rep1_Direction == "Up", na.rm = TRUE),
    Rep1_Down = sum(Rep1_Direction == "Down", na.rm = TRUE),
    Rep2_Up = sum(Rep2_Direction == "Up", na.rm = TRUE),
    Rep2_Down = sum(Rep2_Direction == "Down", na.rm = TRUE)
  )

print(updown_counts)

##   Rep1_Up Rep1_Down Rep2_Up Rep2_Down
## 1    3829      3371    4654      2546

# ------------------------------------------
# Prepare Barplot Data
# ------------------------------------------
plot_data <- log2fc_df %>%
  pivot_longer(
    cols = c(Rep1_log2FC, Rep2_log2FC, Rep1_Direction, Rep2_Direction),
    names_to = c("Replicate", ".value"),
    names_pattern = "(Rep\\d)_(.*)"
  ) %>%
  filter(Direction %in% c("Up", "Down"))

# Get top 10 genes by absolute log2FC per replicate/direction
top_plot_data <- plot_data %>%
  group_by(Replicate, Direction) %>%
  slice_max(abs(log2FC), n = 10, with_ties = FALSE) %>%
  ungroup() %>%
  mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))

# ------------------------------------------
# Create Barplot
# ------------------------------------------
barplot <- ggplot(top_plot_data, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
  geom_col(width = 0.7) +
  facet_wrap(~Replicate, scales = "free") +
  coord_flip() +
  scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
  theme_minimal() +
  labs(
    title = "Exp.3 ON Beads - BV2 vs Turbo",
    subtitle = paste0(
      "Replicate 1 ... Up: ", updown_counts$Rep1_Up, " | Down: ", updown_counts$Rep1_Down, "\n",
      "Replicate 2 ... Up: ", updown_counts$Rep2_Up, " | Down: ", updown_counts$Rep2_Down
    ),
    y = "log2 Fold Change",
    x = "Protein Symbol",
    fill = "Direction"
  )

# ------------------------------------------
# Venn Diagram Sets
# ------------------------------------------
sets <- list(
  REP1_UP = log2fc_df$Gene[log2fc_df$Rep1_Direction == "Up"],
  REP2_UP = log2fc_df$Gene[log2fc_df$Rep2_Direction == "Up"],
  REP1_DOWN = log2fc_df$Gene[log2fc_df$Rep1_Direction == "Down"],
  REP2_DOWN = log2fc_df$Gene[log2fc_df$Rep2_Direction == "Down"]
)

# Compute intersections
get_overlap <- function(indices) Reduce(intersect, sets[indices])

n12 <- length(get_overlap(c("REP1_UP", "REP2_UP")))
n13 <- length(get_overlap(c("REP1_UP", "REP1_DOWN")))
n14 <- length(get_overlap(c("REP1_UP", "REP2_DOWN")))
n23 <- length(get_overlap(c("REP2_UP", "REP1_DOWN")))
n24 <- length(get_overlap(c("REP2_UP", "REP2_DOWN")))
n34 <- length(get_overlap(c("REP1_DOWN", "REP2_DOWN")))

n123 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN")))
n124 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP2_DOWN")))
n134 <- length(get_overlap(c("REP1_UP", "REP1_DOWN", "REP2_DOWN")))
n234 <- length(get_overlap(c("REP2_UP", "REP1_DOWN", "REP2_DOWN")))
n1234 <- length(get_overlap(c("REP1_UP", "REP2_UP", "REP1_DOWN", "REP2_DOWN")))

# ------------------------------------------
# Save Plots
# ------------------------------------------
dir.create("EXP3.ONBeads_plots", showWarnings = FALSE)

ggsave("EXP3.ONBeads_plots/Exp3_ON_Barplot.png", barplot, width = 10, height = 6)

png("EXP3.ONBeads_plots/Exp3_ON_Venn.png", width = 800, height = 800)
draw.quad.venn(
  area1 = length(sets$REP1_UP),
  area2 = length(sets$REP2_UP),
  area3 = length(sets$REP1_DOWN),
  area4 = length(sets$REP2_DOWN),
  n12 = n12,
  n13 = n13,
  n14 = n14,
  n23 = n23,
  n24 = n24,
  n34 = n34,
  n123 = n123,
  n124 = n124,
  n134 = n134,
  n234 = n234,
  n1234 = n1234,
  category = names(sets),
  fill = c("red", "pink", "blue", "lightblue"),
  cex = 1.5,
  cat.cex = 1.5,
  main = "Exp.3 ON Beads: UP/Down Rep1 & Rep2"
)

## (polygon[GRID.polygon.1081], polygon[GRID.polygon.1082], polygon[GRID.polygon.1083], polygon[GRID.polygon.1084], polygon[GRID.polygon.1085], polygon[GRID.polygon.1086], polygon[GRID.polygon.1087], polygon[GRID.polygon.1088], text[GRID.text.1089], text[GRID.text.1090], text[GRID.text.1091], text[GRID.text.1092], text[GRID.text.1093], text[GRID.text.1094], text[GRID.text.1095], text[GRID.text.1096], text[GRID.text.1097], text[GRID.text.1098], text[GRID.text.1099], text[GRID.text.1100], text[GRID.text.1101], text[GRID.text.1102], text[GRID.text.1103], text[GRID.text.1104], text[GRID.text.1105], text[GRID.text.1106], text[GRID.text.1107])

dev.off()

## quartz_off_screen 
##                 2

Exp1.counts <- read.csv("Exp.1.counts.csv", row.names = 1)
dim(Exp1.counts)

## [1] 7200    2

EXP1.Meta <- read.csv("EXP1.Meta.csv", row.names = 1)

## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on 'EXP1.Meta.csv'

EXP1.Meta

##                     Group
## Exp.1.Bv2_rep1_On     BV2
## Exp.1.Turbo_rep1_On Turbo

row.names(EXP1.Meta) <- colnames(Exp1.counts)

# Libraries
library(ggplot2)
library(dplyr)

# Check
print(colnames(Exp1.counts))

## [1] "Exp.1.Bv2_rep1_On"   "Exp.1.Turbo_rep1_On"

print(rownames(EXP1.Meta))

## [1] "Exp.1.Bv2_rep1_On"   "Exp.1.Turbo_rep1_On"

# Make sure they match (they do here!)
stopifnot(all(colnames(Exp1.counts) %in% rownames(EXP1.Meta)))

# Identify columns
bv2_col <- rownames(EXP1.Meta)[EXP1.Meta$Group == "BV2"]
turbo_col <- rownames(EXP1.Meta)[EXP1.Meta$Group == "Turbo"]

print(bv2_col)

## [1] "Exp.1.Bv2_rep1_On"

print(turbo_col)

## [1] "Exp.1.Turbo_rep1_On"

# Compute log2FC
log2fc <- log2(Exp1.counts[[bv2_col]] + 1e-6) - log2(Exp1.counts[[turbo_col]] + 1e-6)

# Label direction
direction <- ifelse(log2fc > 0, "Up", "Down")

# Make dataframe
log2fc_df <- data.frame(
  Gene = rownames(Exp1.counts),
  log2FC = log2fc,
  Direction = direction
)

head(log2fc_df)

##              Gene      log2FC Direction
## 1           Fcgr4 -0.50204817      Down
## 2           Qser1  0.01471213        Up
## 3           Afg2b  0.02560255        Up
## 4 Pglyrp3;Pglyrp4 -0.01945902      Down
## 5         Tbc1d25  0.30569050        Up
## 6           Mpeg1  0.22617434        Up

# Top 20 genes by absolute log2FC
top_log2fc_df <- log2fc_df %>%
  arrange(desc(abs(log2FC))) %>%
  slice_head(n = 20) %>%
  mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))


top_up <- log2fc_df %>%
  filter(Direction == "Up") %>%
  slice_max(order_by = log2FC, n = 15)

top_down <- log2fc_df %>%
  filter(Direction == "Down") %>%
  slice_min(order_by = log2FC, n = 15)

top_plot_data <- bind_rows(top_up, top_down) %>%
  mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))


# Plot
ggplot(top_plot_data, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
  geom_col(width = 0.7) +
  coord_flip() +
  scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
  theme_minimal() +
  labs(
    title = "Exp.1BV2 vs Turbo",
    y = "log2 FC",
    x = "Gene",
    fill = "Direction"
  )

# Load libraries
library(dplyr)
library(ggplot2)

INPUT <- read.csv("INPUT.csv", row.names = 1)


library(readr)
Input_Metadata <- read_csv("Input.Metadata.csv")

## New names:
## Rows: 2 Columns: 2
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): ...1, Group
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

# Inspect
head(INPUT)

##                      ISD   S.trap
## Fcgr4           24.28207 23.84805
## Qser1           17.74668 17.78941
## Afg2b           18.95749 19.21245
## Pglyrp3;Pglyrp4 15.45789 16.51110
## Tbc1d25         12.58099 12.68490
## Mpeg1           15.94742 15.78730

Input_Metadata

## # A tibble: 2 × 2
##   ...1   Group 
##   <chr>  <chr> 
## 1 ISD    ISD   
## 2 S-trap S-trap

# ------------------------------------------
# Extract columns
isd_col <- "ISD"
strap_col <- "S.trap"

# ------------------------------------------
# Compute log2FC
log2fc <- log2(INPUT[[strap_col]] + 1e-6) - log2(INPUT[[isd_col]] + 1e-6)

# Label direction
direction <- ifelse(log2fc > 0, "Up", "Down")

# Create dataframe
log2fc_df <- data.frame(
  Gene = rownames(INPUT),
  log2FC = log2fc,
  Direction = direction
)

# Check results
head(log2fc_df)

##              Gene       log2FC Direction
## 1           Fcgr4 -0.026020172      Down
## 2           Qser1  0.003469381        Up
## 3           Afg2b  0.019273331        Up
## 4 Pglyrp3;Pglyrp4  0.095092413        Up
## 5         Tbc1d25  0.011866856        Up
## 6           Mpeg1 -0.014558238      Down

table(log2fc_df$Direction)

## 
## Down   Up 
## 1802 5398

# ------------------------------------------
# Get top genes for plot
top_up <- log2fc_df %>%
  filter(Direction == "Up") %>%
  slice_max(order_by = log2FC, n = 10)

top_down <- log2fc_df %>%
  filter(Direction == "Down") %>%
  slice_min(order_by = log2FC, n = 10)

top_plot_data <- bind_rows(top_up, top_down) %>%
  mutate(Direction = ifelse(Direction == "Up", "Up-regulated", "Down-regulated"))

# ------------------------------------------
# Plot
ggplot(top_plot_data, aes(x = reorder(Gene, log2FC), y = log2FC, fill = Direction)) +
  geom_col(width = 0.7) +
  coord_flip() +
  scale_fill_manual(values = c("Up-regulated" = "red", "Down-regulated" = "blue")) +
  theme_minimal() +
  labs(
    title = "S-trap vs ISD - log2 Fold Change",
    y = "log2 Fold Change",
    x = "Gene",
    fill = "Direction"
  )

Astral.BV2.vs.Turbo.11july

Dr UPASNA

2025-07-10