Aldh1.Turbo.filtration.Pvaluebased

## Import imputation 
library(readr)
Exp3_OnBeads_Aldh1_filtered_Imputation <- read.csv("~/Desktop/Eric_Aldha1l1_TurboID_Optimization.25july2025/Exp3.On.Beads/Exp3_OnBeads_Aldh1_filtered.Imputation.csv", row.names = 1)
dim(Exp3_OnBeads_Aldh1_filtered_Imputation)

## [1] 4920    4

boxplot(Exp3_OnBeads_Aldh1_filtered_Imputation)

Exp3_OnBeads_Meta <- read.csv("~/Desktop/Eric_Aldha1l1_TurboID_Optimization.25july2025/Exp3.On.Beads/Exp3.OnBeads.Meta.csv", row.names = 1)

## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on
## '~/Desktop/Eric_Aldha1l1_TurboID_Optimization.25july2025/Exp3.On.Beads/Exp3.OnBeads.Meta.csv'

row.names(Exp3_OnBeads_Meta) == colnames(Exp3_OnBeads_Aldh1_filtered_Imputation)

## [1] TRUE TRUE TRUE TRUE

## Median based normalization
#colsum median normalization 
col_sums <- colSums(Exp3_OnBeads_Aldh1_filtered_Imputation, na.rm = TRUE)
 # Step 2: Compute the median of column sums
median_sum <- median(col_sums)
 # Step 3: Compute normalization factors (scale factors) 
norm_factors <- median_sum / col_sums
 # Step 4: Multiply each column by its corresponding normalization factor 
Exp3_OnBeads_Aldh1_filtered_Imputation.normalized <- sweep(Exp3_OnBeads_Aldh1_filtered_Imputation, 2, norm_factors, FUN = "*")

dim(Exp3_OnBeads_Aldh1_filtered_Imputation.normalized)

## [1] 4920    4

boxplot(Exp3_OnBeads_Aldh1_filtered_Imputation.normalized)

saveRDS(Exp3_OnBeads_Aldh1_filtered_Imputation.normalized, file = 'Exp3_OnBeads_Aldh1_filtered_Imputation.normalized.rds')
write.csv(Exp3_OnBeads_Aldh1_filtered_Imputation.normalized, file = 'Exp3_OnBeads_Aldh1_filtered_Imputation.normalized.csv')


#########
## Median based normalization
#colsum median normalization 
col_sums <- colSums(Exp3_OnBeads_Aldh1_filtered_Imputation, na.rm = TRUE)
 # Step 2: Compute the median of column sums
median_sum <- median(col_sums)
 # Step 3: Compute normalization factors (scale factors) 
norm_factors <- median_sum / col_sums
 # Step 4: Multiply each column by its corresponding normalization factor 
Exp3_OnBeads_Aldh1_filtered_Imputation.normalized <- sweep(Exp3_OnBeads_Aldh1_filtered_Imputation, 2, norm_factors, FUN = "*")

dim(Exp3_OnBeads_Aldh1_filtered_Imputation.normalized)

## [1] 4920    4

#########


### Fold Cnage and P-values using filtration and imputation files no normalization

cleanDat <- Exp3_OnBeads_Aldh1_filtered_Imputation.normalized
numericMeta <- Exp3_OnBeads_Meta
Grouping <- numericMeta$Group
source("/Users/usri/Desktop/Screen/parANOVA.dex.R")
ANOVAout <- parANOVA.dex()

## - parallelThreads variable not set. Running with 2 threads only.
## - Network color assignment vector not supplied or not of length in rows of cleanDat; will not be included in output table and data frame.
## - twoGroupCorrMethod variable not set to a correction method for p.adjust when only 2 groups of samples specified in Grouping. Using Benjamini Hochberg 'BH' FDR.
## - fallbackIfSmallTukeyP variable not set. Using recommended Bonferroni t-test FDR for unreliable Tukey p values <10^-8.5

## 
## Attaching package: 'foreach'

## The following objects are masked from 'package:purrr':
## 
##     accumulate, when

## 
## ...Tukey p<10^-8.5 Fallback calculations using Bonferroni corrected T test: 2 [0.041%]

labelTop <- 15
FCmin= 0
flip = -3
signifP=0.05
plotVolc()

## - No comparison p value columns selected in selectComps. Using ALL comparisons.
##  - selectComps may not reference valid integer p value column indexes of ANOVAout (or CORout).
##    Output will be for all comparisons or correlation(s).
## - useNETcolors not set or not TRUE/FALSE. NETcolors not found in ANOVAout, so 3 color volcano(es) will be drawn.
## - Variable outputfigs not specified. Saving volcano plots to /Users/usri/Desktop/Eric_Aldha1l1_TurboID_Optimization.25july2025/Aldh1.Turbo.filtration.25july2025/Aldh.P.value.based/Aldh.pvalue.based.analysis .
## 
## ...Thresholds:
##    [x] Applying a 0% minimum fold change threshold at + and - x=0 .
##    [y] with minimum significance cutoff p < 0.05, equivalent to -log10(p) y=1.3 .
## 
## Processing ANOVA column 3 (Turbo vs Negative) for volcano ...

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Generating PDF volcano for ANOVAout column 3 (Turbo vs Negative) ...

## Generating HTML volcano for ANOVAout column 3 (Turbo vs Negative) ...

## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

###
Exp3_OnBeads.DEP.result <- readRDS("~/Desktop/Eric_Aldha1l1_TurboID_Optimization.25july2025/Aldh1.Turbo.filtration.25july2025/Aldh.P.value.based/Aldh.pvalue.based.analysis/Exp3_OnBeads.DEP.result.rds")


### Filter Results and add UP and Down as expression
# Step 4: Volcano plot with top genes
# ───────────────────────────────

library(ggplot2)
library(dplyr)
library(ggrepel)

# Step 4: Volcano plot with top genes
# ───────────────────────────────

# Prepare plot data
results_df <- Exp3_OnBeads.DEP.result
results_df$log10p <- -log10(results_df$P.value)  # Fix column name

# Set thresholds
fc_cutoff <- 0       # recommended fold change cutoff
p_cutoff <- 0.05

# Categorize significance
results_df$Significance <- "Not Sig"
results_df$Significance[results_df$log2FC >= fc_cutoff & results_df$P.value < p_cutoff] <- "Up"
results_df$Significance[results_df$log2FC <= -fc_cutoff & results_df$P.value < p_cutoff] <- "Down"

# Label top genes
top_up <- results_df %>% filter(Significance == "Up") %>% slice_max(order_by = log10p, n = 10)
top_down <- results_df %>% filter(Significance == "Down") %>% slice_max(order_by = log10p, n = 10)
top_genes <- bind_rows(top_up, top_down)

# Count number of genes per category safely
counts_summary <- table(results_df$Significance)
up_count <- ifelse("Up" %in% names(counts_summary), counts_summary["Up"], 0)
down_count <- ifelse("Down" %in% names(counts_summary), counts_summary["Down"], 0)
not_sig_count <- ifelse("Not Sig" %in% names(counts_summary), counts_summary["Not Sig"], 0)

# Plot
ggplot(results_df, aes(x = log2FC, y = log10p)) +
  geom_point(aes(color = Significance), alpha = 0.7, size = 1.8) +
  scale_color_manual(values = c("Down" = "blue", "Up" = "red", "Not Sig" = "grey")) +
  geom_vline(xintercept = c(-fc_cutoff, fc_cutoff), linetype = "dashed", color = "black") +
  geom_hline(yintercept = -log10(p_cutoff), linetype = "dashed", color = "black") +

  # Label UP genes in red
  geom_text_repel(
    data = top_genes %>% filter(Significance == "Up"),
    aes(label = Protein),
    size = 5,
    color = "red",
    box.padding = 0.4,
    max.overlaps = Inf
  ) +

  # Label DOWN genes in blue
  geom_text_repel(
    data = top_genes %>% filter(Significance == "Down"),
    aes(label = Protein),
    size = 5,
    color = "blue",
    box.padding = 0.4,
    max.overlaps = Inf
  ) +

  # Annotate counts
  annotate("text", x = 4, y = max(results_df$log10p, na.rm = TRUE),
           label = paste0("Upregulated: ", up_count), color = "red", hjust = 0) +
  annotate("text", x = -4, y = max(results_df$log10p, na.rm = TRUE),
           label = paste0("Downregulated: ", down_count), color = "blue", hjust = 1) +
  annotate("text", x = 0, y = 1,
           label = paste0("Not Sig: ", not_sig_count), color = "gray30", hjust = 0.5) +

  labs(
    title = "Exp3.ON.Beads: Turbo vs Negative",
    x = "log2FC (Turbo vs Negative)",
    y = "-log10(p-value)"
  ) +
  theme_minimal(base_size = 14)

library(readr)
library(readr)
Exp3_OFF_Beads_Aldh1_filtered_IMPUTTION <- read.csv("~/Desktop/Eric_Aldha1l1_TurboID_Optimization.25july2025/Exp3.OFF.Beads/Exp3_OFF_Beads_Aldh1_filtered.IMPUTTION.csv", row.names = 1)
Exp3_OFF_Beads_Meta <- read.csv("~/Desktop/Eric_Aldha1l1_TurboID_Optimization.25july2025/Exp3.OFF.Beads/Exp3.OFF.Beads.Meta.csv", row.names = 1 )

## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on
## '~/Desktop/Eric_Aldha1l1_TurboID_Optimization.25july2025/Exp3.OFF.Beads/Exp3.OFF.Beads.Meta.csv'

row.names(Exp3_OFF_Beads_Meta) == colnames(Exp3_OFF_Beads_Aldh1_filtered_IMPUTTION)

## [1] TRUE TRUE TRUE TRUE

### Fold Cnage and P-values using filtration and imputation files no normalization

cleanDat <- Exp3_OFF_Beads_Aldh1_filtered_IMPUTTION
numericMeta <-Exp3_OFF_Beads_Meta
Grouping <- numericMeta$Group
source("/Users/usri/Desktop/Screen/parANOVA.dex.R")
ANOVAout <- parANOVA.dex()

## - parallelThreads variable not set. Running with 2 threads only.
## - Network color assignment vector not supplied or not of length in rows of cleanDat; will not be included in output table and data frame.
## - twoGroupCorrMethod variable not set to a correction method for p.adjust when only 2 groups of samples specified in Grouping. Using Benjamini Hochberg 'BH' FDR.
## - fallbackIfSmallTukeyP variable not set. Using recommended Bonferroni t-test FDR for unreliable Tukey p values <10^-8.5
## 
## ...Tukey p<10^-8.5 Fallback calculations using Bonferroni corrected T test: 6 [0.11%]

labelTop <- 15
FCmin= 0
flip = -3
signifP=0.05
plotVolc()

## - No comparison p value columns selected in selectComps. Using ALL comparisons.
##  - selectComps may not reference valid integer p value column indexes of ANOVAout (or CORout).
##    Output will be for all comparisons or correlation(s).
## - useNETcolors not set or not TRUE/FALSE. NETcolors not found in ANOVAout, so 3 color volcano(es) will be drawn.
## - Variable outputfigs not specified. Saving volcano plots to /Users/usri/Desktop/Eric_Aldha1l1_TurboID_Optimization.25july2025/Aldh1.Turbo.filtration.25july2025/Aldh.P.value.based/Aldh.pvalue.based.analysis .
## 
## ...Thresholds:
##    [x] Applying a 0% minimum fold change threshold at + and - x=0 .
##    [y] with minimum significance cutoff p < 0.05, equivalent to -log10(p) y=1.3 .
## 
## Processing ANOVA column 3 (Turbo vs Negative) for volcano ...

## Generating PDF volcano for ANOVAout column 3 (Turbo vs Negative) ...

## Generating HTML volcano for ANOVAout column 3 (Turbo vs Negative) ...

## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[3L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

### Import result
Exp3_OFF.Beads.DEP.result <- read_csv("Exp3OFF Beads.volcano/ANOVA_diffEx-ALL-unspecified_study.csv")

## New names:
## Rows: 5494 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (4): F-Value, FDR (BH), Turbo-Negative, diff Turbo-Negative
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

# Step 4: Volcano plot with top genes
Exp3_OFF.Beads.DEP.result$P.value <- Exp3_OFF.Beads.DEP.result$`Turbo-Negative`
Exp3_OFF.Beads.DEP.result$log2FC <- Exp3_OFF.Beads.DEP.result$`diff Turbo-Negative`
Exp3_OFF.Beads.DEP.result$Protein <- Exp3_OFF.Beads.DEP.result$...1

# Prepare plot data
results_df <- Exp3_OFF.Beads.DEP.result
results_df$log10p <- -log10(results_df$P.value)  # Fix column name

# Set thresholds
fc_cutoff <- 0       # recommended fold change cutoff
p_cutoff <- 0.05

# Categorize significance
results_df$Significance <- "Not Sig"
results_df$Significance[results_df$log2FC >= fc_cutoff & results_df$P.value < p_cutoff] <- "Up"
results_df$Significance[results_df$log2FC <= -fc_cutoff & results_df$P.value < p_cutoff] <- "Down"

# Label top genes
top_up <- results_df %>% filter(Significance == "Up") %>% slice_max(order_by = log10p, n = 10)
top_down <- results_df %>% filter(Significance == "Down") %>% slice_max(order_by = log10p, n = 10)
top_genes <- bind_rows(top_up, top_down)

# Count number of genes per category safely
counts_summary <- table(results_df$Significance)
up_count <- ifelse("Up" %in% names(counts_summary), counts_summary["Up"], 0)
down_count <- ifelse("Down" %in% names(counts_summary), counts_summary["Down"], 0)
not_sig_count <- ifelse("Not Sig" %in% names(counts_summary), counts_summary["Not Sig"], 0)

# Plot
ggplot(results_df, aes(x = log2FC, y = log10p)) +
  geom_point(aes(color = Significance), alpha = 0.7, size = 1.8) +
  scale_color_manual(values = c("Down" = "blue", "Up" = "red", "Not Sig" = "grey")) +
  geom_vline(xintercept = c(-fc_cutoff, fc_cutoff), linetype = "dashed", color = "black") +
  geom_hline(yintercept = -log10(p_cutoff), linetype = "dashed", color = "black") +

  # Label UP genes in red
  geom_text_repel(
    data = top_genes %>% filter(Significance == "Up"),
    aes(label = Protein),
    size = 5,
    color = "red",
    box.padding = 0.4,
    max.overlaps = Inf
  ) +

  # Label DOWN genes in blue
  geom_text_repel(
    data = top_genes %>% filter(Significance == "Down"),
    aes(label = Protein),
    size = 5,
    color = "blue",
    box.padding = 0.4,
    max.overlaps = Inf
  ) +

  # Annotate counts
  annotate("text", x = 2, y = max(results_df$log10p, na.rm = TRUE),
           label = paste0("UP: ", up_count), color = "red", hjust = 0) +
  annotate("text", x = -2, y = max(results_df$log10p, na.rm = TRUE),
           label = paste0("DOWN: ", down_count), color = "blue", hjust = 1) +
  annotate("text", x = 0, y = 1,
           label = paste0("Not Sig: ", not_sig_count), color = "gray30", hjust = 0.5) +

  labs(
    title = "Exp3.OFF.Beads: Turbo vs Negative",
    x = "log2FC (Turbo vs Negative)",
    y = "-log10(p-value)"
  ) +
  theme_minimal(base_size = 14)

#Exp3.OFF.Beads.Volcano

Aldh1.Turbo.filtration.Pvaluebased

Dr UPASNA

2025-08-06