title: “Identifying cluster biomarkers” author: “Upasna Srivastava”
date: “2023-06-30” output: html_document —
library(Seurat)
library(ggplot2)
library(cowplot)
cluster_markers <- FindAllMarkers(
seurat_obj,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 1.0,
method='MAST'
)
cluster_markers$CellType_cluster <- paste0(unlist(cluster_annotations[cluster_markers$cluster]), '-', cluster_markers$cluster)
write.csv(cluster_markers, file='data/cluster_markers.csv', quote=FALSE, row.names=FALSE)
# plot the number of DEGs per cluster:
df <- as.data.frame(rev(table(cluster_markers$CellType_cluster)))
colnames(df) <- c('cluster', 'n_DEGs')
# bar plot of the number of cells in each sample
p <- ggplot(df, aes(y=n_DEGs, x=reorder(cluster, -n_DEGs), fill=cluster)) +
geom_bar(stat='identity') +
scale_y_continuous(expand = c(0,0)) +
NoLegend() + RotatedAxis() +
ylab(expression(italic(N)[DEGs])) + xlab('') +
theme(
panel.grid.minor=element_blank(),
panel.grid.major.y=element_line(colour="lightgray", size=0.5),
)
png('figures/basic_DEGs_barplot.png', width=9, height=4, res=300, units='in')
print(p)
dev.off()
# plot the top 3 DEGs per cluster as a heatmap:
top_DEGs <- cluster_markers %>%
group_by(CellType_cluster) %>%
top_n(3, wt=avg_logFC) %>%
.$gene
png('figures/basic_DEGs_heatmap.png', width=10, height=10, res=300, units='in')
pdf('figures/basic_DEGs_heatmap.pdf', width=15, height=12, useDingbats=FALSE)
DoHeatmap(seurat_obj, features=top_DEGs, group.by='seurat_clusters', label=FALSE) + scale_fill_gradientn(colors=viridis(256)) + NoLegend()
dev.off()
#Saving and loading Seurat objects
# add barcode and UMAP to metadata
seurat_obj$barcode <- colnames(seurat_obj)
seurat_obj$UMAP_1 <- seurat_obj@reductions$umap@cell.embeddings[,1]
seurat_obj$UMAP_2 <- seurat_obj@reductions$umap@cell.embeddings[,2]
# save seurat object
saveRDS(seurat_obj, file='data/processed_seurat_object.rds')
# load seurat object
seurat_obj <- readRDS(ile='data/processed_seurat_object.rds')
cluster_markers <- FindAllMarkers(
seurat_obj,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 1.0,
method='MAST'
)
cluster_markers$CellType_cluster <- paste0(unlist(cluster_annotations[cluster_markers$cluster]), '-', cluster_markers$cluster)
write.csv(cluster_markers, file='data/cluster_markers.csv', quote=FALSE, row.names=FALSE)
cluster_markers <- FindAllMarkers(
seurat_obj,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 1.0,
method='MAST'
)
cluster_markers$CellType_cluster <- paste0(unlist(cluster_annotations[cluster_markers$cluster]), '-', cluster_markers$cluster)
write.csv(cluster_markers, file='data/cluster_markers.csv', quote=FALSE, row.names=FALSE)
write.csv(cluster_markers, file='data/cluster_markers.csv', quote=FALSE, row.names=FALSE)
# plot the number of DEGs per cluster:
df <- as.data.frame(rev(table(cluster_markers$CellType_cluster)))
colnames(df) <- c('cluster', 'n_DEGs')
# bar plot of the number of cells in each sample
p <- ggplot(df, aes(y=n_DEGs, x=reorder(cluster, -n_DEGs), fill=cluster)) +
geom_bar(stat='identity') +
scale_y_continuous(expand = c(0,0)) +
NoLegend() + RotatedAxis() +
ylab(expression(italic(N)[DEGs])) + xlab('') +
theme(
panel.grid.minor=element_blank(),
panel.grid.major.y=element_line(colour="lightgray", size=0.5),
)
png('figures/basic_DEGs_barplot.png', width=9, height=4, res=300, units='in')
print(p)
dev.off()
# plot the top 3 DEGs per cluster as a heatmap:
top_DEGs <- cluster_markers %>%
group_by(CellType_cluster) %>%
top_n(3, wt=avg_logFC) %>%
.$gene
png('figures/basic_DEGs_heatmap.png', width=10, height=10, res=300, units='in')
pdf('figures/basic_DEGs_heatmap.pdf', width=15, height=12, useDingbats=FALSE)
DoHeatmap(seurat_obj, features=top_DEGs, group.by='seurat_clusters', label=FALSE) + scale_fill_gradientn(colors=viridis(256)) + NoLegend()
dev.off()
`
LS0tCm91dHB1dDoKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdAotLS0KCnRpdGxlOiAiSWRlbnRpZnlpbmcgY2x1c3RlciBiaW9tYXJrZXJzIgphdXRob3I6ICJVcGFzbmEgU3JpdmFzdGF2YSIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6IGh0bWxfZG9jdW1lbnQKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KFNldXJhdCkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGNvd3Bsb3QpCmNsdXN0ZXJfbWFya2VycyA8LSBGaW5kQWxsTWFya2VycygKICBzZXVyYXRfb2JqLAogIG9ubHkucG9zID0gVFJVRSwKICBtaW4ucGN0ID0gMC4yNSwKICBsb2dmYy50aHJlc2hvbGQgPSAxLjAsCiAgbWV0aG9kPSdNQVNUJwopCmNsdXN0ZXJfbWFya2VycyRDZWxsVHlwZV9jbHVzdGVyIDwtIHBhc3RlMCh1bmxpc3QoY2x1c3Rlcl9hbm5vdGF0aW9uc1tjbHVzdGVyX21hcmtlcnMkY2x1c3Rlcl0pLCAnLScsIGNsdXN0ZXJfbWFya2VycyRjbHVzdGVyKQp3cml0ZS5jc3YoY2x1c3Rlcl9tYXJrZXJzLCBmaWxlPSdkYXRhL2NsdXN0ZXJfbWFya2Vycy5jc3YnLCBxdW90ZT1GQUxTRSwgcm93Lm5hbWVzPUZBTFNFKQpgYGAKCmBgYHtyfQojIHBsb3QgdGhlIG51bWJlciBvZiBERUdzIHBlciBjbHVzdGVyOgpkZiA8LSBhcy5kYXRhLmZyYW1lKHJldih0YWJsZShjbHVzdGVyX21hcmtlcnMkQ2VsbFR5cGVfY2x1c3RlcikpKQpjb2xuYW1lcyhkZikgPC0gYygnY2x1c3RlcicsICduX0RFR3MnKQoKIyBiYXIgcGxvdCBvZiB0aGUgbnVtYmVyIG9mIGNlbGxzIGluIGVhY2ggc2FtcGxlCnAgPC0gZ2dwbG90KGRmLCBhZXMoeT1uX0RFR3MsIHg9cmVvcmRlcihjbHVzdGVyLCAtbl9ERUdzKSwgZmlsbD1jbHVzdGVyKSkgKwogIGdlb21fYmFyKHN0YXQ9J2lkZW50aXR5JykgKwogIHNjYWxlX3lfY29udGludW91cyhleHBhbmQgPSBjKDAsMCkpICsKICBOb0xlZ2VuZCgpICsgUm90YXRlZEF4aXMoKSArCiAgeWxhYihleHByZXNzaW9uKGl0YWxpYyhOKVtERUdzXSkpICsgeGxhYignJykgKwogIHRoZW1lKAogICAgcGFuZWwuZ3JpZC5taW5vcj1lbGVtZW50X2JsYW5rKCksCiAgICBwYW5lbC5ncmlkLm1ham9yLnk9ZWxlbWVudF9saW5lKGNvbG91cj0ibGlnaHRncmF5Iiwgc2l6ZT0wLjUpLAogICkKCnBuZygnZmlndXJlcy9iYXNpY19ERUdzX2JhcnBsb3QucG5nJywgd2lkdGg9OSwgaGVpZ2h0PTQsIHJlcz0zMDAsIHVuaXRzPSdpbicpCnByaW50KHApCmRldi5vZmYoKQoKIyBwbG90IHRoZSB0b3AgMyBERUdzIHBlciBjbHVzdGVyIGFzIGEgaGVhdG1hcDoKdG9wX0RFR3MgPC0gY2x1c3Rlcl9tYXJrZXJzICU+JQogIGdyb3VwX2J5KENlbGxUeXBlX2NsdXN0ZXIpICU+JQogIHRvcF9uKDMsIHd0PWF2Z19sb2dGQykgJT4lCiAgLiRnZW5lCgpwbmcoJ2ZpZ3VyZXMvYmFzaWNfREVHc19oZWF0bWFwLnBuZycsIHdpZHRoPTEwLCBoZWlnaHQ9MTAsIHJlcz0zMDAsIHVuaXRzPSdpbicpCnBkZignZmlndXJlcy9iYXNpY19ERUdzX2hlYXRtYXAucGRmJywgd2lkdGg9MTUsIGhlaWdodD0xMiwgdXNlRGluZ2JhdHM9RkFMU0UpCkRvSGVhdG1hcChzZXVyYXRfb2JqLCBmZWF0dXJlcz10b3BfREVHcywgZ3JvdXAuYnk9J3NldXJhdF9jbHVzdGVycycsIGxhYmVsPUZBTFNFKSArIHNjYWxlX2ZpbGxfZ3JhZGllbnRuKGNvbG9ycz12aXJpZGlzKDI1NikpICsgTm9MZWdlbmQoKQpkZXYub2ZmKCkKYGBgCmBgYHtyfQojU2F2aW5nIGFuZCBsb2FkaW5nIFNldXJhdCBvYmplY3RzCiMgYWRkIGJhcmNvZGUgYW5kIFVNQVAgdG8gbWV0YWRhdGEKc2V1cmF0X29iaiRiYXJjb2RlIDwtIGNvbG5hbWVzKHNldXJhdF9vYmopCnNldXJhdF9vYmokVU1BUF8xIDwtIHNldXJhdF9vYmpAcmVkdWN0aW9ucyR1bWFwQGNlbGwuZW1iZWRkaW5nc1ssMV0Kc2V1cmF0X29iaiRVTUFQXzIgPC0gc2V1cmF0X29iakByZWR1Y3Rpb25zJHVtYXBAY2VsbC5lbWJlZGRpbmdzWywyXQoKIyBzYXZlIHNldXJhdCBvYmplY3QKc2F2ZVJEUyhzZXVyYXRfb2JqLCBmaWxlPSdkYXRhL3Byb2Nlc3NlZF9zZXVyYXRfb2JqZWN0LnJkcycpCgojIGxvYWQgc2V1cmF0IG9iamVjdApgYGAKCgpgYGB7cn0Kc2V1cmF0X29iaiA8LSByZWFkUkRTKGlsZT0nZGF0YS9wcm9jZXNzZWRfc2V1cmF0X29iamVjdC5yZHMnKQpgYGAKYGBge2Jhc2h9CmNsdXN0ZXJfbWFya2VycyA8LSBGaW5kQWxsTWFya2VycygKICBzZXVyYXRfb2JqLAogIG9ubHkucG9zID0gVFJVRSwKICBtaW4ucGN0ID0gMC4yNSwKICBsb2dmYy50aHJlc2hvbGQgPSAxLjAsCiAgbWV0aG9kPSdNQVNUJwopCmNsdXN0ZXJfbWFya2VycyRDZWxsVHlwZV9jbHVzdGVyIDwtIHBhc3RlMCh1bmxpc3QoY2x1c3Rlcl9hbm5vdGF0aW9uc1tjbHVzdGVyX21hcmtlcnMkY2x1c3Rlcl0pLCAnLScsIGNsdXN0ZXJfbWFya2VycyRjbHVzdGVyKQp3cml0ZS5jc3YoY2x1c3Rlcl9tYXJrZXJzLCBmaWxlPSdkYXRhL2NsdXN0ZXJfbWFya2Vycy5jc3YnLCBxdW90ZT1GQUxTRSwgcm93Lm5hbWVzPUZBTFNFKQpgYGAKYGBge3J9CmNsdXN0ZXJfbWFya2VycyA8LSBGaW5kQWxsTWFya2VycygKICBzZXVyYXRfb2JqLAogIG9ubHkucG9zID0gVFJVRSwKICBtaW4ucGN0ID0gMC4yNSwKICBsb2dmYy50aHJlc2hvbGQgPSAxLjAsCiAgbWV0aG9kPSdNQVNUJwopCmNsdXN0ZXJfbWFya2VycyRDZWxsVHlwZV9jbHVzdGVyIDwtIHBhc3RlMCh1bmxpc3QoY2x1c3Rlcl9hbm5vdGF0aW9uc1tjbHVzdGVyX21hcmtlcnMkY2x1c3Rlcl0pLCAnLScsIGNsdXN0ZXJfbWFya2VycyRjbHVzdGVyKQp3cml0ZS5jc3YoY2x1c3Rlcl9tYXJrZXJzLCBmaWxlPSdkYXRhL2NsdXN0ZXJfbWFya2Vycy5jc3YnLCBxdW90ZT1GQUxTRSwgcm93Lm5hbWVzPUZBTFNFKQpgYGAKCgpgYGB7cn0Kd3JpdGUuY3N2KGNsdXN0ZXJfbWFya2VycywgZmlsZT0nZGF0YS9jbHVzdGVyX21hcmtlcnMuY3N2JywgcXVvdGU9RkFMU0UsIHJvdy5uYW1lcz1GQUxTRSkKIyBwbG90IHRoZSBudW1iZXIgb2YgREVHcyBwZXIgY2x1c3RlcjoKZGYgPC0gYXMuZGF0YS5mcmFtZShyZXYodGFibGUoY2x1c3Rlcl9tYXJrZXJzJENlbGxUeXBlX2NsdXN0ZXIpKSkKY29sbmFtZXMoZGYpIDwtIGMoJ2NsdXN0ZXInLCAnbl9ERUdzJykKCiMgYmFyIHBsb3Qgb2YgdGhlIG51bWJlciBvZiBjZWxscyBpbiBlYWNoIHNhbXBsZQpwIDwtIGdncGxvdChkZiwgYWVzKHk9bl9ERUdzLCB4PXJlb3JkZXIoY2x1c3RlciwgLW5fREVHcyksIGZpbGw9Y2x1c3RlcikpICsKICBnZW9tX2JhcihzdGF0PSdpZGVudGl0eScpICsKICBzY2FsZV95X2NvbnRpbnVvdXMoZXhwYW5kID0gYygwLDApKSArCiAgTm9MZWdlbmQoKSArIFJvdGF0ZWRBeGlzKCkgKwogIHlsYWIoZXhwcmVzc2lvbihpdGFsaWMoTilbREVHc10pKSArIHhsYWIoJycpICsKICB0aGVtZSgKICAgIHBhbmVsLmdyaWQubWlub3I9ZWxlbWVudF9ibGFuaygpLAogICAgcGFuZWwuZ3JpZC5tYWpvci55PWVsZW1lbnRfbGluZShjb2xvdXI9ImxpZ2h0Z3JheSIsIHNpemU9MC41KSwKICApCgpwbmcoJ2ZpZ3VyZXMvYmFzaWNfREVHc19iYXJwbG90LnBuZycsIHdpZHRoPTksIGhlaWdodD00LCByZXM9MzAwLCB1bml0cz0naW4nKQpwcmludChwKQpkZXYub2ZmKCkKCiMgcGxvdCB0aGUgdG9wIDMgREVHcyBwZXIgY2x1c3RlciBhcyBhIGhlYXRtYXA6CnRvcF9ERUdzIDwtIGNsdXN0ZXJfbWFya2VycyAlPiUKICBncm91cF9ieShDZWxsVHlwZV9jbHVzdGVyKSAlPiUKICB0b3BfbigzLCB3dD1hdmdfbG9nRkMpICU+JQogIC4kZ2VuZQoKcG5nKCdmaWd1cmVzL2Jhc2ljX0RFR3NfaGVhdG1hcC5wbmcnLCB3aWR0aD0xMCwgaGVpZ2h0PTEwLCByZXM9MzAwLCB1bml0cz0naW4nKQpwZGYoJ2ZpZ3VyZXMvYmFzaWNfREVHc19oZWF0bWFwLnBkZicsIHdpZHRoPTE1LCBoZWlnaHQ9MTIsIHVzZURpbmdiYXRzPUZBTFNFKQpEb0hlYXRtYXAoc2V1cmF0X29iaiwgZmVhdHVyZXM9dG9wX0RFR3MsIGdyb3VwLmJ5PSdzZXVyYXRfY2x1c3RlcnMnLCBsYWJlbD1GQUxTRSkgKyBzY2FsZV9maWxsX2dyYWRpZW50bihjb2xvcnM9dmlyaWRpcygyNTYpKSArIE5vTGVnZW5kKCkKZGV2Lm9mZigpCmBgYAoKYA==