Libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Functions

# Function to create the correlation plot
createCorrelationPlot <- function(group1, group2, group1_file, group2_file, cell_type, fdrq_cutoff) {
  
  # Read data
  setwd("/research/labs/neurology/fryer/m214960/Da_Mesquita")
  data1 <- read.delim2(group1_file)
  data2 <- read.delim2(group2_file)
  
  # Subset data based on cell type
  data1 <- data1[data1$cluster == cell_type,]
  data2 <- data2[data2$cluster == cell_type,]
  
  # change class
  data1$p_val_adj <- as.numeric(data1$p_val_adj)
  data1$avg_log2FC <- as.numeric(data1$avg_log2FC)
  data2$p_val_adj <- as.numeric(data2$p_val_adj)
  data2$avg_log2FC <- as.numeric(data2$avg_log2FC)
  
  # Subset data based on adjusted p-value
  data1 <- data1[data1$p_val_adj <= fdrq_cutoff,]
  data2 <- data2[data2$p_val_adj <= fdrq_cutoff,]
  
  # Find shared genes and subset
  shared_genes <- data1$gene[data1$gene %in% data2$gene]
  shared_genes <- shared_genes[!duplicated(shared_genes)]
  data1 <- data1[data1$gene %in% shared_genes,]
  data2 <- data2[data2$gene %in% shared_genes,]
  shared <- left_join(data1, data2, by = "gene", suffix = c("_data1", "_data2"))
  
  # Remove rows with NA values in avg_log2FC_data1 or avg_log2FC_data2
  shared <- shared %>%
    filter(!is.na(avg_log2FC_data1) & !is.na(avg_log2FC_data2))
  
  # Ensure there are enough data points
  if (nrow(shared) < 2) {
    print("Not enough data to plot")
    return(NULL)  # Not enough data to plot
  }
  
  # Spearman's correlation
  res <- cor.test(shared$avg_log2FC_data1, shared$avg_log2FC_data2, method = "spearman")
  p_value <- round(res$p.value, 5)
  rho_value <- round(res$estimate, 3)
  
  # Fit a manual linear model
  lm_fit <- lm(avg_log2FC_data2 ~ avg_log2FC_data1, data = shared)
  slope <- coef(lm_fit)[2]
  intercept <- coef(lm_fit)[1]
  
  # Calculate the limits for the plot based on the range of the data
  x_limits <- c(min(shared$avg_log2FC_data1, na.rm = TRUE), max(shared$avg_log2FC_data1, na.rm = TRUE))
  y_limits <- c(min(shared$avg_log2FC_data2, na.rm = TRUE), max(shared$avg_log2FC_data2, na.rm = TRUE))
  
  # Create the plot and manually add the regression line
  p <- ggplot(data = shared, 
              aes(x = avg_log2FC_data1, y = avg_log2FC_data2, text = paste(gene))) +
    # Restore the pink and blue color annotations
    annotate("rect",
             xmin = 0,
             xmax = max(shared$avg_log2FC_data1),
             ymin = max(shared$avg_log2FC_data2),
             ymax = 0,
             fill = "lightpink3",
             alpha = .5) +  
    annotate("rect",
             xmin = 0,
             xmax = min(shared$avg_log2FC_data1),
             ymin = 0,
             ymax = min(shared$avg_log2FC_data2),
             fill = "cadetblue3",
             alpha = .5) +
    geom_point(size = 2) +
    theme_bw() +
    scale_x_continuous(limits = x_limits) +  # Set x-axis limits
    scale_y_continuous(limits = y_limits) +  # Set y-axis limits
    geom_abline(slope = slope, intercept = intercept, color = "gray", 
                size = 0.8, linetype = "dashed") +  # Make the line thinner and dashed
    annotate("text",
             x = 0,
             y = 0,
             label = paste0("rho = ", as.numeric(rho_value)),
             parse = TRUE,
             color = "red",
             size = 5) +
    labs(x = paste0(group1, " LogFC"),
         y = paste0(group2, " LogFC"))
  
  return(ggplotly(p))
}

Macrophages: E4F vs E3F

  • Experiment 1 > E4F vs E3F > macrophages
  • Experiment 2 > E4CF vs E3CF > macrophages
# set working directory
setwd("/research/labs/neurology/fryer/m214960/Da_Mesquita")

# set variables
path <- "/results/all_clusters/downsampled/DEGs/DEG_tables/"
E4F_vs_E3F_pilot_path <- paste0("E3_E4_pilot", path, "E4_F_vs_E3_F_DEGs.tsv")
E4CF_vs_E3CF_diet_path <- paste0("PLX5622_diet", path, "E4CF_vs_E3CF_DEGs.tsv")

# plot
createCorrelationPlot(group1 = "Experiment 1: E4F vs E3F",
                      group2 = "Experiment 2: E4CF vs E3CF",
                      group1_file = E4F_vs_E3F_pilot_path,
                      group2_file = E4CF_vs_E3CF_diet_path,
                      fdrq_cutoff = 1,
                      cell_type = "Macrophages")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Macrophages: E4M vs E3M

  • Experiment 1 > E4M vs E3M > macrophages
  • Experiment 2 > E4CM vs E3CM > macrophages
# set working directory
setwd("/research/labs/neurology/fryer/m214960/Da_Mesquita")

# set variables
path <- "/results/all_clusters/downsampled/DEGs/DEG_tables/"
E4M_vs_E3M_pilot_path <- paste0("E3_E4_pilot", path, "E4_M_vs_E3_M_DEGs.tsv")
E4CM_vs_E3CM_diet_path <- paste0("PLX5622_diet", path, "E4CM_vs_E3CM_DEGs.tsv")

createCorrelationPlot(group1 = "Experiment 1: E4M vs E3M",
                      group2 = "Experiment 2: E4CM vs E3CM",
                      group1_file = E4M_vs_E3M_pilot_path,
                      group2_file = E4CM_vs_E3CM_diet_path,
                      fdrq_cutoff = 1,
                      cell_type = "Macrophages")