#Differential Expression Analysis
#Load Seurat Object L7
load("../../../0-IMP-OBJECTS/Harmony_integrated_All_samples_Merged_with_PBMC10x_with_harmony_clustering.Robj")
All_samples_Merged
An object of class Seurat
64169 features across 59355 samples within 6 assays
Active assay: SCT (27417 features, 3000 variable features)
3 layers present: counts, data, scale.data
5 other assays present: RNA, ADT, prediction.score.celltype.l1, prediction.score.celltype.l2, prediction.score.celltype.l3
6 dimensional reductions calculated: integrated_dr, ref.umap, pca, umap, harmony, umap.harmony
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "cell_line",label = T, label.box = T)
DimPlot(All_samples_Merged, reduction = "umap.harmony", group.by = "Harmony_snn_res.0.9",label = T, label.box = T)
#Differential Expression Analysis
# Find all markers (positive and negative) for clusters 0 to 24
all_markers_pos_neg <- FindAllMarkers(All_samples_Merged,
only.pos = FALSE,
min.pct = 0.1,
logfc.threshold = 0.25)
Calculating cluster 0
Calculating cluster 1
Calculating cluster 2
Calculating cluster 3
Calculating cluster 4
Calculating cluster 5
Calculating cluster 6
Calculating cluster 7
Calculating cluster 8
Calculating cluster 9
Calculating cluster 10
Calculating cluster 11
Calculating cluster 12
Calculating cluster 13
Calculating cluster 14
Calculating cluster 15
Calculating cluster 16
Calculating cluster 17
Calculating cluster 18
Calculating cluster 19
Calculating cluster 20
Calculating cluster 21
Calculating cluster 22
Calculating cluster 23
# Save all markers (positive and negative) to CSV
write.csv(all_markers_pos_neg, "all_markers_positive_and_negative.csv", row.names = FALSE)
# Find only positive markers for clusters 0 to 24
all_markers_pos <- FindAllMarkers(All_samples_Merged,
only.pos = TRUE,
min.pct = 0.1,
logfc.threshold = 0.25)
Calculating cluster 0
Calculating cluster 1
Calculating cluster 2
Calculating cluster 3
Calculating cluster 4
Calculating cluster 5
Calculating cluster 6
Calculating cluster 7
Calculating cluster 8
Calculating cluster 9
Calculating cluster 10
Calculating cluster 11
Calculating cluster 12
Calculating cluster 13
Calculating cluster 14
Calculating cluster 15
Calculating cluster 16
Calculating cluster 17
Calculating cluster 18
Calculating cluster 19
Calculating cluster 20
Calculating cluster 21
Calculating cluster 22
Calculating cluster 23
View(all_markers_pos_neg)
# Save only positive markers to CSV
write.csv(all_markers_pos, "all_markers_positive_only.csv", row.names = FALSE)
# Extract top 5 positive markers for each cluster, considering both avg_log2FC and p_val_adj
top5_markers <- all_markers_pos %>%
group_by(cluster) %>%
filter(p_val_adj < 0.05) %>% # Filter for statistical significance
slice_max(n = 5, order_by = avg_log2FC) %>% # Select top 5 by avg_log2FC among significant genes
ungroup()
# Create heatmap for top 5 markers
heatmap_all <- DoHeatmap(All_samples_Merged,
features = unique(top5_markers$gene),
group.by = "Harmony_snn_res.0.9",
assay = "SCT")
Avis : The following features were omitted as they were not found in the scale.data slot for the SCT assay: CCDC169, MMP2, CPA3, AC092979.1, AC111000.4, SCGB1C1, AP001189.1, PDZK1IP1, LINC01539, BX255923.2, AC020651.2, AC104809.2, KCNA5, CLEC4C, SCT, AC097375.1, STEAP4, MARC1, MIR222HG, DLX2, CARD10, CCND1, PTGDR, PTCHD1, CTDSPL, NUAK1, VENTX, FCGR1A, VCAN-AS1, AL161785.1, RNASE2, LINC02046, LINC02113, DOCK1, NAALADL2-AS2, ADGB, LINC02406, PKHD1, SLIT3, HMGA2, DSG2, SKOR1, ITGAD, AC005842.1, SPON1, AP005229.1, AC090771.2, GSTM2, CD248
print(heatmap_all)
ggsave("heatmap_all_clusters.png", plot = heatmap_all, width = 12, height = 10)
# Create heatmap using normalized RNA data
heatmap_all2 <- DoHeatmap(All_samples_Merged,
features = unique(top5_markers$gene),
group.by = "Harmony_snn_res.0.9",
assay = "SCT") +
theme(axis.text.y = element_text(size = 6))
Avis : The following features were omitted as they were not found in the scale.data slot for the SCT assay: CCDC169, MMP2, CPA3, AC092979.1, AC111000.4, SCGB1C1, AP001189.1, PDZK1IP1, LINC01539, BX255923.2, AC020651.2, AC104809.2, KCNA5, CLEC4C, SCT, AC097375.1, STEAP4, MARC1, MIR222HG, DLX2, CARD10, CCND1, PTGDR, PTCHD1, CTDSPL, NUAK1, VENTX, FCGR1A, VCAN-AS1, AL161785.1, RNASE2, LINC02046, LINC02113, DOCK1, NAALADL2-AS2, ADGB, LINC02406, PKHD1, SLIT3, HMGA2, DSG2, SKOR1, ITGAD, AC005842.1, SPON1, AP005229.1, AC090771.2, GSTM2, CD248
print(heatmap_all)
ggsave("heatmap_all_clusters2.png", plot = heatmap_all2, width = 12, height = 10)
# Create heatmap with adjusted cluster label sizes
heatmap_all3 <- DoHeatmap(All_samples_Merged,
features = unique(top5_markers$gene),
group.by = "Harmony_snn_res.0.9",
assay = "SCT") +
theme(axis.text.y = element_text(size = 6), # Adjust gene text size
axis.text.x = element_text(size = 6)) + # Adjust cluster text size
theme(axis.title.x = element_text(size = 8), # Adjust x-axis title size if needed
axis.title.y = element_text(size = 8)) # Adjust y-axis title size if needed
Avis : The following features were omitted as they were not found in the scale.data slot for the SCT assay: CCDC169, MMP2, CPA3, AC092979.1, AC111000.4, SCGB1C1, AP001189.1, PDZK1IP1, LINC01539, BX255923.2, AC020651.2, AC104809.2, KCNA5, CLEC4C, SCT, AC097375.1, STEAP4, MARC1, MIR222HG, DLX2, CARD10, CCND1, PTGDR, PTCHD1, CTDSPL, NUAK1, VENTX, FCGR1A, VCAN-AS1, AL161785.1, RNASE2, LINC02046, LINC02113, DOCK1, NAALADL2-AS2, ADGB, LINC02406, PKHD1, SLIT3, HMGA2, DSG2, SKOR1, ITGAD, AC005842.1, SPON1, AP005229.1, AC090771.2, GSTM2, CD248
# Print and save the heatmap
print(heatmap_all3)
ggsave("heatmap_all_clusters3.png", plot = heatmap_all3, width = 12, height = 10)
# Create dotplot for top 5 markers
dotplot_all <- DotPlot(All_samples_Merged,
features = unique(top5_markers$gene),
group.by = "Harmony_snn_res.0.9") +
coord_flip() + # Flip coordinates to have genes on y-axis
theme(axis.text.y = element_text(size = 7), # Adjust gene text size
axis.text.x = element_text(size = 8)) # Adjust cluster text size
print(dotplot_all)
ggsave("dotplot_all_clusters.png", plot = dotplot_all, width = 12, height = 10)
# Set the identity to Harmony_snn_res.0.9
Idents(All_samples_Merged) <- "Harmony_snn_res.0.9"
# Define the specific cell line clusters and their order
cell_line_clusters <- c(3, 8, 10, 18, 1, 2, 13, 4, 6, 7, 9, 16, 19)
# Subset the Seurat object to include only the cell line clusters
All_samples_Merged_cell_lines <- subset(All_samples_Merged, idents = cell_line_clusters)
# Reorder the identities of the cell line clusters
All_samples_Merged_cell_lines$cell_line_order <- factor(Idents(All_samples_Merged_cell_lines),
levels = cell_line_clusters)
Idents(All_samples_Merged_cell_lines) <- "cell_line_order"
# Find markers for the cell line clusters (positive only)
cell_line_markers <- FindAllMarkers(All_samples_Merged_cell_lines,
only.pos = TRUE,
min.pct = 0.1,
logfc.threshold = 0.25)
# Extract top 5 markers for each cell line cluster, considering both avg_log2FC and p_val_adj
top5_cell_line_markers <- cell_line_markers %>%
group_by(cluster) %>%
filter(p_val_adj < 0.05) %>% # Filter for statistical significance
slice_max(n = 5, order_by = avg_log2FC) %>% # Select top 5 by avg_log2FC among significant genes
ungroup()
# Create heatmap for cell line clusters
heatmap_cell_lines <- DoHeatmap(All_samples_Merged_cell_lines,
features = unique(top5_cell_line_markers$gene),
group.by = "cell_line_order", # Use ordered identities
assay = "SCT") +
theme(axis.text.y = element_text(size = 6), # Adjust gene text size
axis.text.x = element_text(size = 6)) + # Adjust cluster text size
theme(axis.title.x = element_text(size = 8), # Adjust x-axis title size if needed
axis.title.y = element_text(size = 8)) # Adjust y-axis title size if needed
# Print and save the heatmap
print(heatmap_cell_lines)
ggsave("heatmap_cell_lines.png", plot = heatmap_cell_lines, width = 12, height = 10)
# Create dotplot for cell line clusters
dotplot_cell_lines <- DotPlot(All_samples_Merged_cell_lines,
features = unique(top5_cell_line_markers$gene),
group.by = "cell_line_order") + # Use ordered identities
coord_flip() + # Flip coordinates to have genes on y-axis
theme(axis.text.y = element_text(size = 7), # Adjust gene text size
axis.text.x = element_text(size = 8)) + # Adjust cluster text size
theme(axis.title.x = element_text(size = 8), # Adjust x-axis title size if needed
axis.title.y = element_text(size = 8)) # Adjust y-axis title size if needed
# Print and save the dotplot
print(dotplot_cell_lines)
ggsave("dotplot_cell_lines.png", plot = dotplot_cell_lines, width = 12, height = 10)
Idents(All_samples_Merged_cell_lines) <- "cell_line_order"
# Extract top 10 markers for each cell line cluster, considering both avg_log2FC and p_val_adj
top10_cell_line_markers <- cell_line_markers %>%
group_by(cluster) %>%
filter(p_val_adj < 0.05) %>% # Filter for statistical significance
slice_max(n = 10, order_by = avg_log2FC) %>% # Select top 10 by avg_log2FC among significant genes
ungroup()
# Create heatmap for cell line clusters using top 10 markers
heatmap_cell_lines <- DoHeatmap(All_samples_Merged_cell_lines,
features = unique(top10_cell_line_markers$gene),
group.by = "cell_line_order", # Use ordered identities
assay = "SCT") +
theme(axis.text.y = element_text(size = 6), # Adjust gene text size
axis.text.x = element_text(size = 6)) + # Adjust cluster text size
theme(axis.title.x = element_text(size = 8), # Adjust x-axis title size if needed
axis.title.y = element_text(size = 8)) # Adjust y-axis title size if needed
# Print and save the heatmap
print(heatmap_cell_lines)
ggsave("heatmap_cell_lines_top10.png", plot = heatmap_cell_lines, width = 12, height = 10)
# Create dotplot for cell line clusters using top 10 markers
dotplot_cell_lines <- DotPlot(All_samples_Merged_cell_lines,
features = unique(top10_cell_line_markers$gene),
group.by = "cell_line_order") + # Use ordered identities
coord_flip() + # Flip coordinates to have genes on y-axis
theme(axis.text.y = element_text(size = 7), # Adjust gene text size
axis.text.x = element_text(size = 8)) + # Adjust cluster text size
theme(axis.title.x = element_text(size = 8), # Adjust x-axis title size if needed
axis.title.y = element_text(size = 8)) # Adjust y-axis title size if needed
# Print and save the dotplot
print(dotplot_cell_lines)
ggsave("dotplot_cell_lines_top10.png", plot = dotplot_cell_lines, width = 12, height = 10)