Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Functions
# Function to create the correlation plot
createCorrelationPlot <- function(group1, group2, group1_file, group2_file, cell_type, fdrq_cutoff) {
# Read data
setwd("/research/labs/neurology/fryer/m214960/Da_Mesquita")
data1 <- read.delim2(group1_file)
data2 <- read.delim2(group2_file)
# Subset data based on cell type
data1 <- data1[data1$cluster == cell_type,]
data2 <- data2[data2$cluster == cell_type,]
# change class
data1$p_val_adj <- as.numeric(data1$p_val_adj)
data1$avg_log2FC <- as.numeric(data1$avg_log2FC)
data2$p_val_adj <- as.numeric(data2$p_val_adj)
data2$avg_log2FC <- as.numeric(data2$avg_log2FC)
# Subset data based on adjusted p-value
data1 <- data1[data1$p_val_adj <= fdrq_cutoff,]
data2 <- data2[data2$p_val_adj <= fdrq_cutoff,]
# Find shared genes and subset
shared_genes <- data1$gene[data1$gene %in% data2$gene]
shared_genes <- shared_genes[!duplicated(shared_genes)]
data1 <- data1[data1$gene %in% shared_genes,]
data2 <- data2[data2$gene %in% shared_genes,]
shared <- left_join(data1, data2, by = "gene", suffix = c("_data1", "_data2"))
# Remove rows with NA values in avg_log2FC_data1 or avg_log2FC_data2
shared <- shared %>%
filter(!is.na(avg_log2FC_data1) & !is.na(avg_log2FC_data2))
# Ensure there are enough data points
if (nrow(shared) < 2) {
print("Not enough data to plot")
return(NULL) # Not enough data to plot
}
# Spearman's correlation
res <- cor.test(shared$avg_log2FC_data1, shared$avg_log2FC_data2, method = "spearman")
p_value <- round(res$p.value, 5)
rho_value <- round(res$estimate, 3)
# Fit a manual linear model
lm_fit <- lm(avg_log2FC_data2 ~ avg_log2FC_data1, data = shared)
slope <- coef(lm_fit)[2]
intercept <- coef(lm_fit)[1]
# Calculate the limits for the plot based on the range of the data
x_limits <- c(min(shared$avg_log2FC_data1, na.rm = TRUE), max(shared$avg_log2FC_data1, na.rm = TRUE))
y_limits <- c(min(shared$avg_log2FC_data2, na.rm = TRUE), max(shared$avg_log2FC_data2, na.rm = TRUE))
# Create the plot and manually add the regression line
p <- ggplot(data = shared,
aes(x = avg_log2FC_data1, y = avg_log2FC_data2, text = paste(gene))) +
# Restore the pink and blue color annotations
annotate("rect",
xmin = 0,
xmax = max(shared$avg_log2FC_data1),
ymin = max(shared$avg_log2FC_data2),
ymax = 0,
fill = "lightpink3",
alpha = .5) +
annotate("rect",
xmin = 0,
xmax = min(shared$avg_log2FC_data1),
ymin = 0,
ymax = min(shared$avg_log2FC_data2),
fill = "cadetblue3",
alpha = .5) +
geom_point(size = 2) +
theme_bw() +
scale_x_continuous(limits = x_limits) + # Set x-axis limits
scale_y_continuous(limits = y_limits) + # Set y-axis limits
geom_abline(slope = slope, intercept = intercept, color = "gray",
size = 0.8, linetype = "dashed") + # Make the line thinner and dashed
annotate("text",
x = 0,
y = 0,
label = paste0("rho = ", as.numeric(rho_value)),
parse = TRUE,
color = "red",
size = 5) +
labs(x = paste0(group1, " LogFC"),
y = paste0(group2, " LogFC"))
return(ggplotly(p))
}
Macrophages: E4F vs E3F
- Experiment 1 > E4F vs E3F > macrophages
- Experiment 2 > E4CF vs E3CF > macrophages
# set working directory
setwd("/research/labs/neurology/fryer/m214960/Da_Mesquita")
# set variables
path <- "/results/all_clusters/downsampled/DEGs/DEG_tables/"
E4F_vs_E3F_pilot_path <- paste0("E3_E4_pilot", path, "E4_F_vs_E3_F_DEGs.tsv")
E4CF_vs_E3CF_diet_path <- paste0("PLX5622_diet", path, "E4CF_vs_E3CF_DEGs.tsv")
# plot
createCorrelationPlot(group1 = "Experiment 1: E4F vs E3F",
group2 = "Experiment 2: E4CF vs E3CF",
group1_file = E4F_vs_E3F_pilot_path,
group2_file = E4CF_vs_E3CF_diet_path,
fdrq_cutoff = 1,
cell_type = "Macrophages")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Macrophages: E4M vs E3M
- Experiment 1 > E4M vs E3M > macrophages
- Experiment 2 > E4CM vs E3CM > macrophages
# set working directory
setwd("/research/labs/neurology/fryer/m214960/Da_Mesquita")
# set variables
path <- "/results/all_clusters/downsampled/DEGs/DEG_tables/"
E4M_vs_E3M_pilot_path <- paste0("E3_E4_pilot", path, "E4_M_vs_E3_M_DEGs.tsv")
E4CM_vs_E3CM_diet_path <- paste0("PLX5622_diet", path, "E4CM_vs_E3CM_DEGs.tsv")
createCorrelationPlot(group1 = "Experiment 1: E4M vs E3M",
group2 = "Experiment 2: E4CM vs E3CM",
group1_file = E4M_vs_E3M_pilot_path,
group2_file = E4CM_vs_E3CM_diet_path,
fdrq_cutoff = 1,
cell_type = "Macrophages")