We load the annotation
library(biomaRt)
mart <- useMart("ENSEMBL_MART_ENSEMBL")
mart <- useDataset(mart=mart,dataset="ggallus_gene_ensembl")
myattributes <- c("ensembl_gene_id",
"entrezgene_id",
"external_gene_name",
"chromosome_name",
"start_position",
"end_position",
"strand",
"gene_biotype",
"description")
bdata <- getBM(mart=mart,attributes=myattributes,uniqueRows=T,
useCache=FALSE)
# remove duplicated gene_ids
annotation <- bdata[!duplicated(bdata$ensembl_gene_id),c('chromosome_name', 'ensembl_gene_id', 'external_gene_name', 'entrezgene_id', 'gene_biotype')]
colnames(annotation) <- c('chromosome', 'gene_id', 'Name', 'entrezgene_id', 'biotype')
annotation[(annotation$Name == ''), 'Name'] <- 'Uncharacterised'
library(Seurat)
path <- '~/Desktop/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/'
samples_info <- read.csv(paste0(path,'SampleSheet.csv'), skip = 1, header = T)
samples_info <- samples_info[(samples_info$Lane == 1),]
expression_list <- list()
for(s in 1:nrow(samples_info)){
sample <- samples_info$Sample_Name[s]
cat('\r', sample)
tmp <- Seurat::Read10X_h5(
filename = paste0(path,'Merged/', sample, '/filtered_feature_bc_matrix.h5'),
use.names = T)
tmp_list <- list(tmp)
names(tmp_list) <- gsub(pattern = 'UF-2986-', replacement = '', x = sample)
expression_list <- append(expression_list, tmp_list)
}
rm(tmp)
rm(tmp_list)
Then we create object and add group information.
samples <- unique(sapply(names(expression_list), function(x) substr(x,1,nchar(x)-2)))
variables <- gsub(x = names(expression_list), pattern = '-', replacement = '_') # Creating variable names to assing objects to them.
for(s in 1:length(expression_list)){
tmp <- CreateSeuratObject(expression_list[[s]], project = names(expression_list[s]))
tmp$type <- gsub(x = variables[s], pattern = '_rep.*', replacement = '')
assign(variables[s], tmp)
}
Merged:
all_data <- merge(E8_Retina_rep_1, c(E8_Retina_rep_2, E8_Retina_rep_3, E8_Retina_rep_4, E14_Retina_rep_1, E14_Retina_rep_2, E14_Retina_rep_3, E14_Retina_rep_4, MYCN_cellline_rep_1, MYCN_cellline_rep_2, MYCN_cellline_rep_3, MYCN_cellline_rep_4), add.cell.ids = variables )
rm(E8_Retina_rep_1, E8_Retina_rep_2, E8_Retina_rep_3, E8_Retina_rep_4, E14_Retina_rep_1, E14_Retina_rep_2, E14_Retina_rep_3, E14_Retina_rep_4, MYCN_cellline_rep_1, MYCN_cellline_rep_2, MYCN_cellline_rep_3, MYCN_cellline_rep_4)
gc()
Finding expression of MT
mito_genes <- as.vector(unique(annotation[(annotation$chromosome == 'MT'), 'Name'])[-1])
all_data <- PercentageFeatureSet(all_data, features = mito_genes, col.name = "percent_mito")
Finding ribosomal proteins
all_data <- PercentageFeatureSet(all_data, "^RP[SL]", col.name = "percent_ribo")
library(ggplot2)
library(ggpubr)
path <- '~/Desktop/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/'
feats <- c("nFeature_RNA","nCount_RNA","percent_mito","percent_ribo")
png(paste0(path, 'results/Expression_QC_all_replicates.png'), width = 4000, height = 2000, res = 200)
p <- VlnPlot(all_data, group.by= "orig.ident", features = feats, pt.size = 0.1,ncol = 4) + NoLegend()
print(p)
dev.off()
meta_data <- all_data@meta.data
png(paste0(path, 'results/Mitochondrial_Ribosomal_perc.png'), width = 4000, height = 2000, res = 200)
p1 <- ggplot(data = meta_data, aes(x = type, y = percent_mito)) + geom_boxplot() + xlab('') + ylab('Mitochodnrial genes%')
p2 <- ggplot(data = meta_data, aes(x = type, y = percent_ribo)) + geom_boxplot() + xlab('') + ylab('Ribosomal genes%')
ggarrange(p1,p2,nrow = 1,ncol = 2)
dev.off()
We can also visualize two features against each other (e.g. nFeature vs nCount)
FeatureScatter(all_data, "nCount_RNA" , "nFeature_RNA", group.by = "orig.ident", pt.size = .5)
Here we will only consider cells with at least 100 detected genes and genes need to be expressed in at least 3 cells.
selected_c <- WhichCells(all_data, expression = nFeature_RNA > 100)
selected_f <- rownames(all_data)[ Matrix::rowSums(all_data) > 3]
data.filt <- subset(all_data, features=selected_f, cells=selected_c)
dim(data.filt)
Now keep the cells fulfilling following criteria:
- MT% < 25
- Ribo% > 0 #No filtering
selected_mito <- WhichCells(data.filt, expression = percent_mito < 25)
selected_ribo <- WhichCells(data.filt, expression = percent_ribo >= 0)
# and subset the object to only keep those cells
data.filt <- subset(data.filt, cells = selected_mito)
data.filt <- subset(data.filt, cells = selected_ribo)
dim(data.filt)
as.data.frame(table(data.filt$orig.ident))
saveRDS(data.filt, file= paste0(path, '/results/Alldata_filt.rds'))
Now we are splitting the Seurat object by type ie. E8, E14 and MYCN
obj.list <- SplitObject(data.filt, split.by = "type")
E8_Retina <- obj.list[['E8_Retina']]
E14_Retina <- obj.list[['E14_Retina']]
MYCN_cellline <- obj.list[['MYCN_cellline']]
#E8 only
E8_Retina = NormalizeData(E8_Retina)
# remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')
suppressMessages(require(DoubletFinder))
E8_Retina = FindVariableFeatures(E8_Retina, verbose = F) # Finding 2000 most variable genes
gc()
E8_Retina = ScaleData(E8_Retina, vars.to.regress = c("nFeature_RNA", "percent_mito"), verbose = F) #Scaling and ceteringcentring the features in the dataset. The data is regressed against nFeature_RNA and percent_mito resuingresulting into residuals that are scaled and centered.
gc()
E8_Retina = RunPCA(E8_Retina, verbose = F, npcs = 20) #Running PCA for total 20 PCs to compute
gc()
E8_Retina = RunUMAP(E8_Retina, dims = 1:10, verbose = F) #Running UMAP using 1:10 dimonsions
gc()
# define the expected number of doublet cellscells.
nExp <- round(ncol(E8_Retina)* 0.04) # expect 4% doublets
E8_Retina <- doubletFinder_v3(E8_Retina, pN=0.25, pK = 0.09, nExp = nExp, PCs = 1:10)
# name of the DF prediction can change, so extract the correct column name.
DF.name = colnames(E8_Retina@meta.data)[grepl("DF.classification", colnames(E8_Retina@meta.data))]
png(paste0(path, 'results/E8_Doublets_clustering.png'), width = 2000, height = 2000, res = 200)
cowplot::plot_grid( ncol = 1,
DimPlot(E8_Retina, group.by = "orig.ident") + NoAxes(),
DimPlot(E8_Retina, group.by = DF.name) + NoAxes()
)
dev.off()
png(paste0(path, 'results/E8_Doublets.png'), width = 2000, height = 2000, res = 200)
VlnPlot(E8_Retina, features = "nFeature_RNA", group.by = DF.name, pt.size = .1)
dev.off()
###E14 only
E14_Retina = NormalizeData(E14_Retina)
E14_Retina = FindVariableFeatures(E14_Retina, verbose = F) # Finding 2000 most variable genes
gc()
E14_Retina = ScaleData(E14_Retina, vars.to.regress = c("nFeature_RNA", "percent_mito"), verbose = F) #Scaling and ceteringcentring the features in the dataset. The data is regressed against nFeature_RNA and percent_mito resuingresulting into residuals that are scaled and centered.
gc()
E14_Retina = RunPCA(E14_Retina, verbose = F, npcs = 20) #Running PCA for total 20 PCs to compute
gc()
E14_Retina = RunUMAP(E14_Retina, dims = 1:10, verbose = F) #Running UMAP using 1:10 dimonsions
gc()
# define the expected number of doublet cellscells.
nExp <- round(ncol(E14_Retina)* 0.04) # expect 4% doublets
E14_Retina <- doubletFinder_v3(E14_Retina, pN=0.25, pK = 0.09, nExp = nExp, PCs = 1:10)
# name of the DF prediction can change, so extract the correct column name.
DF.name = colnames(E14_Retina@meta.data)[grepl("DF.classification", colnames(E14_Retina@meta.data))]
png(paste0(path, 'results/E14_Doublets_clustering.png'), width = 2000, height = 2000, res = 200)
cowplot::plot_grid( ncol = 1,
DimPlot(E14_Retina, group.by = "orig.ident") + NoAxes(),
DimPlot(E14_Retina, group.by = DF.name) + NoAxes()
)
dev.off()
png(paste0(path, 'results/E14_Doublets.png'), width = 2000, height = 2000, res = 200)
VlnPlot(E14_Retina, features = "nFeature_RNA", group.by = DF.name, pt.size = .1)
dev.off()
#MYCN only
MYCN_cellline = NormalizeData(MYCN_cellline)
MYCN_cellline = FindVariableFeatures(MYCN_cellline, verbose = F) # Finding 2000 most variable genes
gc()
MYCN_cellline = ScaleData(MYCN_cellline, vars.to.regress = c("nFeature_RNA", "percent_mito"), verbose = F) #Scaling and ceteringcentring the features in the dataset. The data is regressed against nFeature_RNA and percent_mito resuingresulting into residuals that are scaled and centered.
gc()
MYCN_cellline = RunPCA(MYCN_cellline, verbose = F, npcs = 20) #Running PCA for total 20 PCs to compute
gc()
MYCN_cellline = RunUMAP(MYCN_cellline, dims = 1:10, verbose = F) #Running UMAP using 1:10 dimonsions
gc()
# define the expected number of doublet cellscells.
nExp <- round(ncol(MYCN_cellline)* 0.04) # expect 4% doublets
MYCN_cellline <- doubletFinder_v3(MYCN_cellline, pN=0.25, pK = 0.09, nExp = nExp, PCs = 1:10)
# name of the DF prediction can change, so extract the correct column name.
DF.name = colnames(MYCN_cellline@meta.data)[grepl("DF.classification", colnames(MYCN_cellline@meta.data))]
png(paste0(path, 'results/MYCN_cellline_clustering.png'), width = 2000, height = 2000, res = 200)
cowplot::plot_grid( ncol = 1,
DimPlot(MYCN_cellline, group.by = "orig.ident") + NoAxes(),
DimPlot(MYCN_cellline, group.by = DF.name) + NoAxes()
)
dev.off()
png(paste0(path, 'results/MYCN_cellline_Doublets.png'), width = 2000, height = 2000, res = 200)
VlnPlot(MYCN_cellline, features = "nFeature_RNA", group.by = DF.name, pt.size = .1)
dev.off()
Now we will filter out the doublets from each cell type
E8_Retina_doublefilt <- subset(E8_Retina, subset = DF.classifications_0.25_0.09_1303 == 'Singlet')
dim(E8_Retina)
dim(E8_Retina_doublefilt)
E14_Retina_doublefilt <- subset(E14_Retina, subset = DF.classifications_0.25_0.09_1371 == 'Singlet')
dim(E14_Retina)
dim(E14_Retina_doublefilt)
MYCN_cellline_doublefilt <- subset(MYCN_cellline, subset = DF.classifications_0.25_0.09_745 == 'Singlet')
dim(MYCN_cellline)
dim(MYCN_cellline_doublefilt)
Reduce dimonsionality Seurat first identify the most variable genes (2000 genes) to use for downstream analysis by FindVariableFeatures. This function calculates the average expression and dispersion for each gene, and then these genes are placed into bins. For dispersioan within each bin a z-score is calculated. By this approach the relationship between average expression and variability is controlled. In another word, we try to identify outliers.
suppressWarnings(suppressMessages(E8_Retina_doublefilt <- FindVariableFeatures(E8_Retina_doublefilt, selection.method = "vst", nfeatures = 2000 ,verbose = FALSE,assay = "RNA")))
top20 <- head(VariableFeatures(E8_Retina_doublefilt), 20)
png(paste0(path, 'results//E8_Top_2000_variable_genes.png'), width = 2000, height = 2000, res = 200)
LabelPoints(plot = VariableFeaturePlot(E8_Retina_doublefilt), points = top20, repel = TRUE)
dev.off()
suppressWarnings(suppressMessages(E14_Retina_doublefilt <- FindVariableFeatures(E14_Retina_doublefilt, selection.method = "vst", nfeatures = 2000 ,verbose = FALSE,assay = "RNA")))
top20 <- head(VariableFeatures(E14_Retina_doublefilt), 20)
png(paste0(path, 'results//E14_Top_2000_variable_genes.png'), width = 2000, height = 2000, res = 200)
LabelPoints(plot = VariableFeaturePlot(E14_Retina_doublefilt), points = top20, repel = TRUE)
dev.off()
suppressWarnings(suppressMessages(MYCN_cellline_doublefilt <- FindVariableFeatures(MYCN_cellline_doublefilt, selection.method = "vst", nfeatures = 2000 ,verbose = FALSE,assay = "RNA")))
top20 <- head(VariableFeatures(MYCN_cellline_doublefilt), 20)
png(paste0(path, 'results//MYCN_Top_2000_variable_genes.png'), width = 2000, height = 2000, res = 200)
LabelPoints(plot = VariableFeaturePlot(MYCN_cellline_doublefilt), points = top20, repel = TRUE)
dev.off()
Then we scale the data to have mean = 0 and SD = 1 by ScaleData where we regress out (e.g. vars.to.regress= “percent_mito”, “nFeature_RNA” ).
E8_Retina_doublefilt <- ScaleData(E8_Retina_doublefilt, vars.to.regress = c("percent_mito", "nFeature_RNA"), assay = "RNA")
E14_Retina_doublefilt <- ScaleData(E14_Retina_doublefilt, vars.to.regress = c("percent_mito", "nFeature_RNA"), assay = "RNA")
MYCN_cellline_doublefilt <- ScaleData(MYCN_cellline_doublefilt, vars.to.regress = c("percent_mito", "nFeature_RNA"), assay = "RNA")
We will now perform the PCA plots for all the three cell types
library(cowplot)
E8_Retina_doublefilt <- RunPCA(E8_Retina_doublefilt, npcs = 50, verbose = F)
png(paste0(path, '/results/E8_PCA.png'), width = 5000, height = 3000, res = 200)
plot_grid(ncol = 2,
DimPlot(E8_Retina_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 1:2),
DimPlot(E8_Retina_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 3:4))
dev.off()
E14_Retina_doublefilt <- RunPCA(E14_Retina_doublefilt, npcs = 50, verbose = F)
png(paste0(path, '/results/E14_PCA.png'), width = 5000, height = 3000, res = 200)
plot_grid(ncol = 2,
DimPlot(E14_Retina_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 1:2),
DimPlot(E14_Retina_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 3:4))
dev.off()
MYCN_cellline_doublefilt <- RunPCA(MYCN_cellline_doublefilt, npcs = 50, verbose = F)
png(paste0(path, '/results/MYCN_PCA.png'), width = 5000, height = 3000, res = 200)
plot_grid(ncol = 2,
DimPlot(MYCN_cellline_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 1:2),
DimPlot(MYCN_cellline_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 3:4))
dev.off()
Now we run tSNE on the three cell types
E8_Retina_doublefilt <- RunTSNE(E8_Retina_doublefilt, reduction = "pca", dims = 1:30,
perplexity=30,
max_iter=1000,
theta=0.5,
eta=200,
num_threads=0 )
png(paste0(path, '/results/E8_tSNE.png'), width = 3000, height = 2000, res = 200)
DimPlot(E8_Retina_doublefilt, reduction = "tsne", group.by = "type")
dev.off()
E14_Retina_doublefilt <- RunTSNE(E14_Retina_doublefilt, reduction = "pca", dims = 1:30,
perplexity=30,
max_iter=1000,
theta=0.5,
eta=200,
num_threads=0 )
png(paste0(path, '/results/E14_tSNE.png'), width = 3000, height = 2000, res = 200)
DimPlot(E14_Retina_doublefilt, reduction = "tsne", group.by = "type")
dev.off()
MYCN_cellline_doublefilt <- RunTSNE(MYCN_cellline_doublefilt, reduction = "pca", dims = 1:30,
perplexity=30,
max_iter=1000,
theta=0.5,
eta=200,
num_threads=0 )
png(paste0(path, '/results/MYCN_tSNE.png'), width = 3000, height = 2000, res = 200)
DimPlot(MYCN_cellline_doublefilt, reduction = "tsne", group.by = "type")
dev.off()
Now we will make a UMAP for the three cell types
E8_Retina_doublefilt <- RunUMAP(E8_Retina_doublefilt, reduction = "pca", dims = 1:30,
n.components=2,
n.neighbors=30,
n.epochs=200,
min.dist=0.3,
learning.rate=1,
spread=1 )
png(paste0(path, '/results/E8_umap_by_group.png'), width = 3000, height = 2000, res = 200)
DimPlot(E8_Retina_doublefilt, reduction = "umap", group.by = "type")
dev.off()
E14_Retina_doublefilt <- RunUMAP(E14_Retina_doublefilt, reduction = "pca", dims = 1:30,
n.components=2,
n.neighbors=30,
n.epochs=200,
min.dist=0.3,
learning.rate=1,
spread=1 )
png(paste0(path, '/results/E14_umap_by_group.png'), width = 3000, height = 2000, res = 200)
DimPlot(E14_Retina_doublefilt, reduction = "umap", group.by = "type")
dev.off()
MYCN_cellline_doublefilt <- RunUMAP(MYCN_cellline_doublefilt, reduction = "pca", dims = 1:30,
n.components=2,
n.neighbors=30,
n.epochs=200,
min.dist=0.3,
learning.rate=1,
spread=1 )
png(paste0(path, '/results/MYCN_umap_by_group.png'), width = 3000, height = 2000, res = 200)
DimPlot(MYCN_cellline_doublefilt, reduction = "umap", group.by = "type")
dev.off()
Correction for batch effect for the three cell types
suppressPackageStartupMessages({
library(Seurat)
library(cowplot)
library(ggplot2)
})
#E8
print(names(E8_Retina_doublefilt@reductions))
alldata.list <- SplitObject(E8_Retina_doublefilt, split.by = "orig.ident")
for (i in 1:length(alldata.list)) {
alldata.list[[i]] <- NormalizeData(alldata.list[[i]], verbose = FALSE)
alldata.list[[i]] <- FindVariableFeatures(alldata.list[[i]], selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
}
hvgs_per_dataset <- lapply(alldata.list, function(x) {
x@assays$RNA@var.features
})
temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply(hvgs_per_dataset, function(x) {
temp %in% x
})
pheatmap::pheatmap(t(overlap * 1), cluster_rows = F, color = c("grey90", "grey20"))
#We identify anchors using the FindIntegrationAnchors function, which takes a list of Seurat objects as input.
alldata.anchors <- FindIntegrationAnchors(object.list = alldata.list, dims = 1:30, reduction = "cca")
#Now pass the anchors to the integratedData to create a Seurat object
E8_Retina_doublefilt.int <- IntegrateData(anchorset = alldata.anchors, dims = 1:30, new.assay.name = "CCA")
#Running dimensionality reduction on integrated data
E8_Retina_doublefilt.int <- ScaleData(E8_Retina_doublefilt.int, verbose = FALSE)
E8_Retina_doublefilt.int <- RunPCA(E8_Retina_doublefilt.int, npcs = 30, verbose = FALSE)
E8_Retina_doublefilt.int <- RunUMAP(E8_Retina_doublefilt.int, dims = 1:30)
E8_Retina_doublefilt.int <- RunTSNE(E8_Retina_doublefilt.int, dims = 1:30)
png(paste0(path, '/results/E8_Retina_doublefilt.int_umap_.png'), width = 3000, height = 2000, res = 200)
DimPlot(E8_Retina_doublefilt.int, reduction = "umap", group.by = "type")
dev.off()
#E14
print(names(E14_Retina_doublefilt@reductions))
alldata.list1 <- SplitObject(E14_Retina_doublefilt, split.by = "orig.ident")
for (i in 1:length(alldata.list1)) {
alldata.list1[[i]] <- NormalizeData(alldata.list1[[i]], verbose = FALSE)
alldata.list1[[i]] <- FindVariableFeatures(alldata.list1[[i]], selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
}
hvgs_per_dataset <- lapply(alldata.list1, function(x) {
x@assays$RNA@var.features
})
temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply(hvgs_per_dataset, function(x) {
temp %in% x
})
pheatmap::pheatmap(t(overlap * 1), cluster_rows = F, color = c("grey90", "grey20"))
#We identify anchors using the FindIntegrationAnchors function, which takes a list of Seurat objects as input.
alldata.anchors1 <- FindIntegrationAnchors(object.list = alldata.list1, dims = 1:30, reduction = "cca")
#Now pass the anchors to the integratedData to create a Seurat object
E14_Retina_doublefilt.int <- IntegrateData(anchorset = alldata.anchors1, dims = 1:30, new.assay.name = "CCA")
#Running dimensionality reduction on integrated data
E14_Retina_doublefilt.int <- ScaleData(E14_Retina_doublefilt.int, verbose = FALSE)
E14_Retina_doublefilt.int <- RunPCA(E14_Retina_doublefilt.int, npcs = 30, verbose = FALSE)
E14_Retina_doublefilt.int <- RunUMAP(E14_Retina_doublefilt.int, dims = 1:30)
E14_Retina_doublefilt.int <- RunTSNE(E14_Retina_doublefilt.int, dims = 1:30)
png(paste0(path, '/results/E14_Retina_doublefilt.int_umap_.png'), width = 3000, height = 2000, res = 200)
DimPlot(E14_Retina_doublefilt.int, reduction = "umap", group.by = "type")
dev.off()
#MYCN
print(names(MYCN_cellline_doublefilt@reductions))
alldata.list2 <- SplitObject(MYCN_cellline_doublefilt, split.by = "orig.ident")
for (i in 1:length(alldata.list2)) {
alldata.list2[[i]] <- NormalizeData(alldata.list2[[i]], verbose = FALSE)
alldata.list2[[i]] <- FindVariableFeatures(alldata.list2[[i]], selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
}
hvgs_per_dataset <- lapply(alldata.list2, function(x) {
x@assays$RNA@var.features
})
temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply(hvgs_per_dataset, function(x) {
temp %in% x
})
pheatmap::pheatmap(t(overlap * 1), cluster_rows = F, color = c("grey90", "grey20"))
#We identify anchors using the FindIntegrationAnchors function, which takes a list of Seurat objects as input.
alldata.anchors2 <- FindIntegrationAnchors(object.list = alldata.list2, dims = 1:30, reduction = "cca")
#Now pass the anchors to the integratedData to create a Seurat object
MYCN_cellline_doublefilt.int <- IntegrateData(anchorset = alldata.anchors2, dims = 1:30, new.assay.name = "CCA")
#Running dimensionality reduction on integrated data
MYCN_cellline_doublefilt.int <- ScaleData(MYCN_cellline_doublefilt.int, verbose = FALSE)
MYCN_cellline_doublefilt.int <- RunPCA(MYCN_cellline_doublefilt.int, npcs = 30, verbose = FALSE)
MYCN_cellline_doublefilt.int <- RunUMAP(MYCN_cellline_doublefilt.int, dims = 1:30)
MYCN_cellline_doublefilt.int <- RunTSNE(MYCN_cellline_doublefilt.int, dims = 1:30)
png(paste0(path, '/results/MYCN_cellline_doublefilt.int_umap_.png'), width = 3000, height = 2000, res = 200)
DimPlot(MYCN_cellline_doublefilt.int, reduction = "umap", group.by = "type")
dev.off()
This part of the code is used with all the samples and cell cycle correction is being applied
library(Seurat)
library(ggplot2)
library(ggpubr)
library(purrr)
library(knitr)
library(tidyverse)
data.filt_new <- readRDS('/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/results/scRNA_Seq/02-QC_Post_Mapping/Merged/No_correction_for_Cell_Cycle/data.filt_mito_25_no_ribo_filter.rds')
#path <- '~/Documents/NBIS_projects/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/'
#path1 <- '~/Documents/NBIS_projects/SMS_5933_sc_script/'
samples_info <- read.csv('/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/SampleSheet.csv',skip = 1, header = T)
metrics <- c("Estimated.Number.of.Cells", "Mean.Reads.per.Cell", "Median.Genes.per.Cell", "Number.of.Reads", "Valid.Barcodes", "Sequencing.Saturation", "Q30.Bases.in.Barcode", "Q30.Bases.in.RNA.Read", "Q30.Bases.in.UMI", "Reads.Mapped.to.Genome", "Reads.Mapped.Confidently.to.Genome", "Reads.Mapped.Confidently.to.Intergenic.Regions", "Reads.Mapped.Confidently.to.Intronic.Regions", "Reads.Mapped.Confidently.to.Exonic.Regions", "Reads.Mapped.Confidently.to.Transcriptome", "Reads.Mapped.Antisense.to.Gene", "Fraction.Reads.in.Cells", "Total.Genes.Detected", "Median.UMI.Counts.per.Cell")
metrics_df <- data.frame(matrix(0, nrow = nrow(samples_info), ncol = length(metrics) ))
# A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. We can
# segregate this list into markers of G2/M phase and markers of S phase
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
# Create our Seurat object and complete the initalization steps
#data.filt_new<- NormalizeData(data.filt_new, normalization.method = "LogNormalize", scale.factor = 1e6)
data.filt_new <- FindVariableFeatures(data.filt_new, selection.method = "vst")
data.filt_new <- ScaleData(data.filt_new, features = rownames(data.filt_new))
data.filt_new <- RunPCA(data.filt_new, features = VariableFeatures(data.filt_new), ndims.print = 6:10, nfeatures.print = 10)
##DimHeatmap
png(file ='/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/figures/DimHeatmap_All.png', width = 5000, height = 3000, res = 200)
DimHeatmap(data.filt_new, dims = c(8, 10))
dev.off()
##Assign Cell-Cycle Scores
data.filt_new <- CellCycleScoring(data.filt_new, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
# view cell cycle scores and phase assignments and wirting to a file
#cellcycle_phases <- (data.filt_new[[]])
# Running a PCA on cell cycle genes reveals, unsurprisingly, that cells separate entirely by phase
data.filt_new <- RunPCA(data.filt_new, features = c(s.genes, g2m.genes))
png(file= '/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/figures/Cell_seperate_by_cellCycle.png', width = 5000, height = 3000, res = 200)
DimPlot(data.filt_new)
dev.off()
#Regress out cell cycle scores during data scaling
data.filt_new_regressed <- ScaleData(data.filt_new, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(data.filt_new))
# Now, a PCA on the variable genes no longer returns components associated with cell cycle
data.filt_new_regressed <- RunPCA(data.filt_new_regressed, features = VariableFeatures(data.filt_new), nfeatures.print = 10)
# When running a PCA on only cell cycle genes, cells no longer separate by cell-cycle phase
data.filt_new_regressed <- RunPCA(data.filt_new_regressed, features = c(s.genes, g2m.genes))
png(file= '/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/figures/Cell_nolonger_seperate_by_cellCycle.png', width = 5000,height = 3000, res = 200)
DimPlot(data.filt_new_regressed)
dev.off()
#This file saved include all the above steps one can direclty load this file
saveRDS(data.filt_new_regressed, file= '/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/data.filt_new_regressed_08_May.rds')