Annotation

We load the annotation

library(biomaRt)
mart <- useMart("ENSEMBL_MART_ENSEMBL")
mart <- useDataset(mart=mart,dataset="ggallus_gene_ensembl")
myattributes <- c("ensembl_gene_id",
                  "entrezgene_id",
                  "external_gene_name",
                  "chromosome_name",
                  "start_position",
                  "end_position",
                  "strand",
                  "gene_biotype",
                  "description")
bdata <- getBM(mart=mart,attributes=myattributes,uniqueRows=T,
               useCache=FALSE)
# remove duplicated gene_ids
annotation <- bdata[!duplicated(bdata$ensembl_gene_id),c('chromosome_name', 'ensembl_gene_id', 'external_gene_name', 'entrezgene_id', 'gene_biotype')]
colnames(annotation) <- c('chromosome', 'gene_id', 'Name', 'entrezgene_id', 'biotype')
annotation[(annotation$Name == ''), 'Name'] <- 'Uncharacterised'

Reading 10x_mappings

Merged we are reading the merged files

library(Seurat)
path <- '~/Desktop/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/'
samples_info <- read.csv(paste0(path,'SampleSheet.csv'), skip = 1, header = T)
samples_info <- samples_info[(samples_info$Lane == 1),]
expression_list <- list()
for(s in 1:nrow(samples_info)){
  sample <- samples_info$Sample_Name[s]
  cat('\r', sample)
  
  tmp <- Seurat::Read10X_h5(
    filename = paste0(path,'Merged/', sample, '/filtered_feature_bc_matrix.h5'),
    use.names = T)
  tmp_list <- list(tmp)
  names(tmp_list) <- gsub(pattern = 'UF-2986-', replacement = '', x = sample)
  expression_list <- append(expression_list, tmp_list)
}
rm(tmp)
rm(tmp_list)

Then we create object and add group information.

samples <- unique(sapply(names(expression_list), function(x) substr(x,1,nchar(x)-2)))
variables <- gsub(x = names(expression_list), pattern = '-', replacement = '_') # Creating variable names to assing objects to them.  

for(s in 1:length(expression_list)){
  tmp <- CreateSeuratObject(expression_list[[s]],  project = names(expression_list[s]))
  tmp$type <- gsub(x = variables[s], pattern = '_rep.*', replacement = '')
  assign(variables[s], tmp)
}

Merged:

all_data <- merge(E8_Retina_rep_1,  c(E8_Retina_rep_2,  E8_Retina_rep_3,    E8_Retina_rep_4,    E14_Retina_rep_1,   E14_Retina_rep_2,   E14_Retina_rep_3,   E14_Retina_rep_4,   MYCN_cellline_rep_1,    MYCN_cellline_rep_2,    MYCN_cellline_rep_3,    MYCN_cellline_rep_4), add.cell.ids = variables ) 
rm(E8_Retina_rep_1, E8_Retina_rep_2,    E8_Retina_rep_3,    E8_Retina_rep_4,    E14_Retina_rep_1,   E14_Retina_rep_2,   E14_Retina_rep_3,   E14_Retina_rep_4,   MYCN_cellline_rep_1,    MYCN_cellline_rep_2,    MYCN_cellline_rep_3,    MYCN_cellline_rep_4)
gc()

Generate QC metrics

Finding expression of MT

mito_genes <- as.vector(unique(annotation[(annotation$chromosome == 'MT'), 'Name'])[-1])
all_data <- PercentageFeatureSet(all_data, features = mito_genes, col.name = "percent_mito")

Finding ribosomal proteins

all_data <- PercentageFeatureSet(all_data, "^RP[SL]", col.name = "percent_ribo")
library(ggplot2)
library(ggpubr)
path <- '~/Desktop/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/'
feats <- c("nFeature_RNA","nCount_RNA","percent_mito","percent_ribo")
png(paste0(path, 'results/Expression_QC_all_replicates.png'), width = 4000, height = 2000, res = 200)
p <- VlnPlot(all_data, group.by= "orig.ident", features = feats, pt.size = 0.1,ncol = 4) + NoLegend()
print(p)
dev.off()

meta_data <- all_data@meta.data
png(paste0(path, 'results/Mitochondrial_Ribosomal_perc.png'), width = 4000, height = 2000, res = 200)
p1 <- ggplot(data = meta_data, aes(x = type, y = percent_mito)) + geom_boxplot() + xlab('') + ylab('Mitochodnrial genes%')
p2 <- ggplot(data = meta_data, aes(x = type, y = percent_ribo)) + geom_boxplot() + xlab('') + ylab('Ribosomal genes%')
ggarrange(p1,p2,nrow = 1,ncol = 2)
dev.off()

We can also visualize two features against each other (e.g. nFeature vs nCount)

FeatureScatter(all_data, "nCount_RNA"  , "nFeature_RNA", group.by = "orig.ident", pt.size = .5)

Filtering

Here we will only consider cells with at least 100 detected genes and genes need to be expressed in at least 3 cells.

selected_c <- WhichCells(all_data, expression = nFeature_RNA > 100)
selected_f <- rownames(all_data)[ Matrix::rowSums(all_data) > 3]

data.filt <- subset(all_data, features=selected_f, cells=selected_c)
dim(data.filt)

Now keep the cells fulfilling following criteria:
- MT% < 25
- Ribo% > 0 #No filtering

selected_mito <- WhichCells(data.filt, expression = percent_mito < 25)
 selected_ribo <- WhichCells(data.filt, expression = percent_ribo >= 0)

# and subset the object to only keep those cells
data.filt <- subset(data.filt, cells = selected_mito)
data.filt <- subset(data.filt, cells = selected_ribo)

dim(data.filt)

as.data.frame(table(data.filt$orig.ident))

saveRDS(data.filt, file= paste0(path, '/results/Alldata_filt.rds'))

Now we are splitting the Seurat object by type ie. E8, E14 and MYCN

obj.list <- SplitObject(data.filt, split.by = "type")

E8_Retina <- obj.list[['E8_Retina']]

E14_Retina <- obj.list[['E14_Retina']]

MYCN_cellline <- obj.list[['MYCN_cellline']]

Finding doublets in each cell type

#E8 only

E8_Retina = NormalizeData(E8_Retina)

# remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')
suppressMessages(require(DoubletFinder))

E8_Retina = FindVariableFeatures(E8_Retina, verbose = F) # Finding 2000 most variable genes
gc()
E8_Retina = ScaleData(E8_Retina, vars.to.regress = c("nFeature_RNA", "percent_mito"), verbose = F) #Scaling and ceteringcentring the features in the dataset. The data is regressed against nFeature_RNA and percent_mito resuingresulting into residuals that are scaled and centered.  
gc()
E8_Retina = RunPCA(E8_Retina, verbose = F, npcs = 20) #Running PCA for total 20 PCs to compute
gc()
E8_Retina = RunUMAP(E8_Retina, dims = 1:10, verbose = F) #Running UMAP using 1:10 dimonsions 
gc()

# define the expected number of doublet cellscells.
nExp <- round(ncol(E8_Retina)* 0.04) # expect 4% doublets
E8_Retina <- doubletFinder_v3(E8_Retina, pN=0.25, pK = 0.09, nExp = nExp, PCs = 1:10)

# name of the DF prediction can change, so extract the correct column name.
DF.name = colnames(E8_Retina@meta.data)[grepl("DF.classification", colnames(E8_Retina@meta.data))]


png(paste0(path, 'results/E8_Doublets_clustering.png'), width = 2000, height = 2000, res = 200)
cowplot::plot_grid( ncol = 1,
DimPlot(E8_Retina, group.by = "orig.ident") + NoAxes(),
DimPlot(E8_Retina, group.by = DF.name) + NoAxes()
)
dev.off()

png(paste0(path, 'results/E8_Doublets.png'), width = 2000, height = 2000, res = 200)
VlnPlot(E8_Retina, features = "nFeature_RNA", group.by = DF.name, pt.size = .1)
dev.off()

###E14 only

E14_Retina = NormalizeData(E14_Retina)

E14_Retina = FindVariableFeatures(E14_Retina, verbose = F) # Finding 2000 most variable genes
gc()
E14_Retina = ScaleData(E14_Retina, vars.to.regress = c("nFeature_RNA", "percent_mito"), verbose = F) #Scaling and ceteringcentring the features in the dataset. The data is regressed against nFeature_RNA and percent_mito resuingresulting into residuals that are scaled and centered.  
gc()
E14_Retina = RunPCA(E14_Retina, verbose = F, npcs = 20) #Running PCA for total 20 PCs to compute
gc()
E14_Retina = RunUMAP(E14_Retina, dims = 1:10, verbose = F) #Running UMAP using 1:10 dimonsions 
gc()
# define the expected number of doublet cellscells.
nExp <- round(ncol(E14_Retina)* 0.04) # expect 4% doublets
E14_Retina <- doubletFinder_v3(E14_Retina, pN=0.25, pK = 0.09, nExp = nExp, PCs = 1:10)

# name of the DF prediction can change, so extract the correct column name.
DF.name = colnames(E14_Retina@meta.data)[grepl("DF.classification", colnames(E14_Retina@meta.data))]


png(paste0(path, 'results/E14_Doublets_clustering.png'), width = 2000, height = 2000, res = 200)
cowplot::plot_grid( ncol = 1,
DimPlot(E14_Retina, group.by = "orig.ident") + NoAxes(),
DimPlot(E14_Retina, group.by = DF.name) + NoAxes()
)
dev.off()

png(paste0(path, 'results/E14_Doublets.png'), width = 2000, height = 2000, res = 200)
VlnPlot(E14_Retina, features = "nFeature_RNA", group.by = DF.name, pt.size = .1)
dev.off()


#MYCN only

MYCN_cellline = NormalizeData(MYCN_cellline)

MYCN_cellline = FindVariableFeatures(MYCN_cellline, verbose = F) # Finding 2000 most variable genes
gc()
MYCN_cellline = ScaleData(MYCN_cellline, vars.to.regress = c("nFeature_RNA", "percent_mito"), verbose = F) #Scaling and ceteringcentring the features in the dataset. The data is regressed against nFeature_RNA and percent_mito resuingresulting into residuals that are scaled and centered.  
gc()
MYCN_cellline = RunPCA(MYCN_cellline, verbose = F, npcs = 20) #Running PCA for total 20 PCs to compute
gc()
MYCN_cellline = RunUMAP(MYCN_cellline, dims = 1:10, verbose = F) #Running UMAP using 1:10 dimonsions 
gc()
# define the expected number of doublet cellscells.
nExp <- round(ncol(MYCN_cellline)* 0.04) # expect 4% doublets
MYCN_cellline <- doubletFinder_v3(MYCN_cellline, pN=0.25, pK = 0.09, nExp = nExp, PCs = 1:10)

# name of the DF prediction can change, so extract the correct column name.
DF.name = colnames(MYCN_cellline@meta.data)[grepl("DF.classification", colnames(MYCN_cellline@meta.data))]


png(paste0(path, 'results/MYCN_cellline_clustering.png'), width = 2000, height = 2000, res = 200)
cowplot::plot_grid( ncol = 1,
DimPlot(MYCN_cellline, group.by = "orig.ident") + NoAxes(),
DimPlot(MYCN_cellline, group.by = DF.name) + NoAxes()
)
dev.off()

png(paste0(path, 'results/MYCN_cellline_Doublets.png'), width = 2000, height = 2000, res = 200)
VlnPlot(MYCN_cellline, features = "nFeature_RNA", group.by = DF.name, pt.size = .1)
dev.off()

Now we will filter out the doublets from each cell type

E8_Retina_doublefilt <- subset(E8_Retina, subset = DF.classifications_0.25_0.09_1303 == 'Singlet')
dim(E8_Retina)
dim(E8_Retina_doublefilt)

E14_Retina_doublefilt <- subset(E14_Retina, subset = DF.classifications_0.25_0.09_1371 == 'Singlet')
dim(E14_Retina)
dim(E14_Retina_doublefilt)

MYCN_cellline_doublefilt <- subset(MYCN_cellline, subset = DF.classifications_0.25_0.09_745 == 'Singlet')
dim(MYCN_cellline)
dim(MYCN_cellline_doublefilt)

Reduce dimonsionality Seurat first identify the most variable genes (2000 genes) to use for downstream analysis by FindVariableFeatures. This function calculates the average expression and dispersion for each gene, and then these genes are placed into bins. For dispersioan within each bin a z-score is calculated. By this approach the relationship between average expression and variability is controlled. In another word, we try to identify outliers.

suppressWarnings(suppressMessages(E8_Retina_doublefilt <- FindVariableFeatures(E8_Retina_doublefilt, selection.method = "vst", nfeatures = 2000 ,verbose = FALSE,assay = "RNA")))
top20 <- head(VariableFeatures(E8_Retina_doublefilt), 20)

png(paste0(path, 'results//E8_Top_2000_variable_genes.png'), width = 2000, height = 2000, res = 200)
LabelPoints(plot = VariableFeaturePlot(E8_Retina_doublefilt), points = top20, repel = TRUE)
dev.off()

suppressWarnings(suppressMessages(E14_Retina_doublefilt <- FindVariableFeatures(E14_Retina_doublefilt, selection.method = "vst", nfeatures = 2000 ,verbose = FALSE,assay = "RNA")))
top20 <- head(VariableFeatures(E14_Retina_doublefilt), 20)

png(paste0(path, 'results//E14_Top_2000_variable_genes.png'), width = 2000, height = 2000, res = 200)
LabelPoints(plot = VariableFeaturePlot(E14_Retina_doublefilt), points = top20, repel = TRUE)
dev.off()

suppressWarnings(suppressMessages(MYCN_cellline_doublefilt <- FindVariableFeatures(MYCN_cellline_doublefilt, selection.method = "vst", nfeatures = 2000 ,verbose = FALSE,assay = "RNA")))
top20 <- head(VariableFeatures(MYCN_cellline_doublefilt), 20)

png(paste0(path, 'results//MYCN_Top_2000_variable_genes.png'), width = 2000, height = 2000, res = 200)
LabelPoints(plot = VariableFeaturePlot(MYCN_cellline_doublefilt), points = top20, repel = TRUE)
dev.off()

Then we scale the data to have mean = 0 and SD = 1 by ScaleData where we regress out (e.g. vars.to.regress= “percent_mito”, “nFeature_RNA” ).

E8_Retina_doublefilt <- ScaleData(E8_Retina_doublefilt, vars.to.regress = c("percent_mito", "nFeature_RNA"), assay = "RNA")

E14_Retina_doublefilt <- ScaleData(E14_Retina_doublefilt, vars.to.regress = c("percent_mito", "nFeature_RNA"), assay = "RNA")

MYCN_cellline_doublefilt <- ScaleData(MYCN_cellline_doublefilt, vars.to.regress = c("percent_mito", "nFeature_RNA"), assay = "RNA")

We will now perform the PCA plots for all the three cell types

library(cowplot)
E8_Retina_doublefilt <- RunPCA(E8_Retina_doublefilt, npcs = 50, verbose = F)
png(paste0(path, '/results/E8_PCA.png'), width = 5000, height = 3000, res = 200)
plot_grid(ncol = 2,
  DimPlot(E8_Retina_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 1:2),
  DimPlot(E8_Retina_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 3:4))
dev.off()

E14_Retina_doublefilt <- RunPCA(E14_Retina_doublefilt, npcs = 50, verbose = F)
png(paste0(path, '/results/E14_PCA.png'), width = 5000, height = 3000, res = 200)
plot_grid(ncol = 2,
  DimPlot(E14_Retina_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 1:2),
  DimPlot(E14_Retina_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 3:4))
dev.off()


MYCN_cellline_doublefilt <- RunPCA(MYCN_cellline_doublefilt, npcs = 50, verbose = F)
png(paste0(path, '/results/MYCN_PCA.png'), width = 5000, height = 3000, res = 200)
plot_grid(ncol = 2,
  DimPlot(MYCN_cellline_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 1:2),
  DimPlot(MYCN_cellline_doublefilt, reduction = "pca", group.by = "orig.ident",dims = 3:4))
dev.off()

Now we run tSNE on the three cell types

E8_Retina_doublefilt <- RunTSNE(E8_Retina_doublefilt, reduction = "pca", dims = 1:30,
                   perplexity=30,
                   max_iter=1000,
                   theta=0.5,
                   eta=200,
                   num_threads=0 )

png(paste0(path, '/results/E8_tSNE.png'),  width = 3000, height = 2000, res = 200)
DimPlot(E8_Retina_doublefilt, reduction = "tsne", group.by = "type")
dev.off()

E14_Retina_doublefilt <- RunTSNE(E14_Retina_doublefilt, reduction = "pca", dims = 1:30,
                   perplexity=30,
                   max_iter=1000,
                   theta=0.5,
                   eta=200,
                   num_threads=0 )

png(paste0(path, '/results/E14_tSNE.png'),  width = 3000, height = 2000, res = 200)
DimPlot(E14_Retina_doublefilt, reduction = "tsne", group.by = "type")
dev.off()

MYCN_cellline_doublefilt <- RunTSNE(MYCN_cellline_doublefilt, reduction = "pca", dims = 1:30,
                   perplexity=30,
                   max_iter=1000,
                   theta=0.5,
                   eta=200,
                   num_threads=0 )

png(paste0(path, '/results/MYCN_tSNE.png'),  width = 3000, height = 2000, res = 200)
DimPlot(MYCN_cellline_doublefilt, reduction = "tsne", group.by = "type")
dev.off()

Now we will make a UMAP for the three cell types

E8_Retina_doublefilt <- RunUMAP(E8_Retina_doublefilt, reduction = "pca", dims = 1:30,
                   n.components=2,
                   n.neighbors=30,
                   n.epochs=200,
                   min.dist=0.3,
                   learning.rate=1,
                   spread=1 )
png(paste0(path, '/results/E8_umap_by_group.png'), width = 3000, height = 2000, res = 200)
DimPlot(E8_Retina_doublefilt, reduction = "umap", group.by = "type")
dev.off()

E14_Retina_doublefilt <- RunUMAP(E14_Retina_doublefilt, reduction = "pca", dims = 1:30,
                   n.components=2,
                   n.neighbors=30,
                   n.epochs=200,
                   min.dist=0.3,
                   learning.rate=1,
                   spread=1 )
png(paste0(path, '/results/E14_umap_by_group.png'), width = 3000, height = 2000, res = 200)
DimPlot(E14_Retina_doublefilt, reduction = "umap", group.by = "type")
dev.off()

MYCN_cellline_doublefilt <- RunUMAP(MYCN_cellline_doublefilt, reduction = "pca", dims = 1:30,
                   n.components=2,
                   n.neighbors=30,
                   n.epochs=200,
                   min.dist=0.3,
                   learning.rate=1,
                   spread=1 )
png(paste0(path, '/results/MYCN_umap_by_group.png'), width = 3000, height = 2000, res = 200)
DimPlot(MYCN_cellline_doublefilt, reduction = "umap", group.by = "type")
dev.off()

Correction for batch effect for the three cell types

suppressPackageStartupMessages({
    library(Seurat)
    library(cowplot)
    library(ggplot2)
})

#E8
print(names(E8_Retina_doublefilt@reductions))
alldata.list <- SplitObject(E8_Retina_doublefilt, split.by = "orig.ident")

for (i in 1:length(alldata.list)) {
    alldata.list[[i]] <- NormalizeData(alldata.list[[i]], verbose = FALSE)
    alldata.list[[i]] <- FindVariableFeatures(alldata.list[[i]], selection.method = "vst",
        nfeatures = 2000, verbose = FALSE)
}

hvgs_per_dataset <- lapply(alldata.list, function(x) {
    x@assays$RNA@var.features
})

temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply(hvgs_per_dataset, function(x) {
    temp %in% x
})
pheatmap::pheatmap(t(overlap * 1), cluster_rows = F, color = c("grey90", "grey20"))


#We identify anchors using the FindIntegrationAnchors function, which takes a list of Seurat objects as input.

alldata.anchors <- FindIntegrationAnchors(object.list = alldata.list, dims = 1:30, reduction = "cca")

#Now pass the anchors to the integratedData to create a Seurat object

E8_Retina_doublefilt.int <- IntegrateData(anchorset = alldata.anchors, dims = 1:30, new.assay.name = "CCA")

#Running dimensionality reduction on integrated data
E8_Retina_doublefilt.int <- ScaleData(E8_Retina_doublefilt.int, verbose = FALSE)
E8_Retina_doublefilt.int <- RunPCA(E8_Retina_doublefilt.int, npcs = 30, verbose = FALSE)
E8_Retina_doublefilt.int <- RunUMAP(E8_Retina_doublefilt.int, dims = 1:30)
E8_Retina_doublefilt.int <- RunTSNE(E8_Retina_doublefilt.int, dims = 1:30)

png(paste0(path, '/results/E8_Retina_doublefilt.int_umap_.png'), width = 3000, height = 2000, res = 200)
DimPlot(E8_Retina_doublefilt.int, reduction = "umap", group.by = "type")
dev.off()


#E14
print(names(E14_Retina_doublefilt@reductions))
alldata.list1 <- SplitObject(E14_Retina_doublefilt, split.by = "orig.ident")

for (i in 1:length(alldata.list1)) {
    alldata.list1[[i]] <- NormalizeData(alldata.list1[[i]], verbose = FALSE)
    alldata.list1[[i]] <- FindVariableFeatures(alldata.list1[[i]], selection.method = "vst",
        nfeatures = 2000, verbose = FALSE)
}

hvgs_per_dataset <- lapply(alldata.list1, function(x) {
    x@assays$RNA@var.features
})

temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply(hvgs_per_dataset, function(x) {
    temp %in% x
})
pheatmap::pheatmap(t(overlap * 1), cluster_rows = F, color = c("grey90", "grey20"))

#We identify anchors using the FindIntegrationAnchors function, which takes a list of Seurat objects as input.

alldata.anchors1 <- FindIntegrationAnchors(object.list = alldata.list1, dims = 1:30, reduction = "cca")

#Now pass the anchors to the integratedData to create a Seurat object

E14_Retina_doublefilt.int <- IntegrateData(anchorset = alldata.anchors1, dims = 1:30, new.assay.name = "CCA")

#Running dimensionality reduction on integrated data
E14_Retina_doublefilt.int <- ScaleData(E14_Retina_doublefilt.int, verbose = FALSE)
E14_Retina_doublefilt.int <- RunPCA(E14_Retina_doublefilt.int, npcs = 30, verbose = FALSE)
E14_Retina_doublefilt.int <- RunUMAP(E14_Retina_doublefilt.int, dims = 1:30)
E14_Retina_doublefilt.int <- RunTSNE(E14_Retina_doublefilt.int, dims = 1:30)

png(paste0(path, '/results/E14_Retina_doublefilt.int_umap_.png'), width = 3000, height = 2000, res = 200)
DimPlot(E14_Retina_doublefilt.int, reduction = "umap", group.by = "type")
dev.off()


#MYCN
print(names(MYCN_cellline_doublefilt@reductions))
alldata.list2 <- SplitObject(MYCN_cellline_doublefilt, split.by = "orig.ident")

for (i in 1:length(alldata.list2)) {
    alldata.list2[[i]] <- NormalizeData(alldata.list2[[i]], verbose = FALSE)
    alldata.list2[[i]] <- FindVariableFeatures(alldata.list2[[i]], selection.method = "vst",
        nfeatures = 2000, verbose = FALSE)
}

hvgs_per_dataset <- lapply(alldata.list2, function(x) {
    x@assays$RNA@var.features
})

temp <- unique(unlist(hvgs_per_dataset))
overlap <- sapply(hvgs_per_dataset, function(x) {
    temp %in% x
})
pheatmap::pheatmap(t(overlap * 1), cluster_rows = F, color = c("grey90", "grey20"))

#We identify anchors using the FindIntegrationAnchors function, which takes a list of Seurat objects as input.

alldata.anchors2 <- FindIntegrationAnchors(object.list = alldata.list2, dims = 1:30, reduction = "cca")

#Now pass the anchors to the integratedData to create a Seurat object

MYCN_cellline_doublefilt.int <- IntegrateData(anchorset = alldata.anchors2, dims = 1:30, new.assay.name = "CCA")

#Running dimensionality reduction on integrated data
MYCN_cellline_doublefilt.int <- ScaleData(MYCN_cellline_doublefilt.int, verbose = FALSE)
MYCN_cellline_doublefilt.int <- RunPCA(MYCN_cellline_doublefilt.int, npcs = 30, verbose = FALSE)
MYCN_cellline_doublefilt.int <- RunUMAP(MYCN_cellline_doublefilt.int, dims = 1:30)
MYCN_cellline_doublefilt.int <- RunTSNE(MYCN_cellline_doublefilt.int, dims = 1:30)

png(paste0(path, '/results/MYCN_cellline_doublefilt.int_umap_.png'), width = 3000, height = 2000, res = 200)
DimPlot(MYCN_cellline_doublefilt.int, reduction = "umap", group.by = "type")
dev.off()

This part of the code is used with all the samples and cell cycle correction is being applied

library(Seurat)
library(ggplot2)
library(ggpubr)
library(purrr)
library(knitr)
library(tidyverse)

data.filt_new <- readRDS('/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/results/scRNA_Seq/02-QC_Post_Mapping/Merged/No_correction_for_Cell_Cycle/data.filt_mito_25_no_ribo_filter.rds')
#path <- '~/Documents/NBIS_projects/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/'
#path1 <- '~/Documents/NBIS_projects/SMS_5933_sc_script/'
samples_info <- read.csv('/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/SampleSheet.csv',skip = 1, header = T)
metrics <- c("Estimated.Number.of.Cells",   "Mean.Reads.per.Cell",  "Median.Genes.per.Cell",    "Number.of.Reads",  "Valid.Barcodes",   "Sequencing.Saturation",    "Q30.Bases.in.Barcode", "Q30.Bases.in.RNA.Read",    "Q30.Bases.in.UMI", "Reads.Mapped.to.Genome",   "Reads.Mapped.Confidently.to.Genome",   "Reads.Mapped.Confidently.to.Intergenic.Regions",   "Reads.Mapped.Confidently.to.Intronic.Regions", "Reads.Mapped.Confidently.to.Exonic.Regions",   "Reads.Mapped.Confidently.to.Transcriptome",    "Reads.Mapped.Antisense.to.Gene",   "Fraction.Reads.in.Cells",  "Total.Genes.Detected", "Median.UMI.Counts.per.Cell")
metrics_df <- data.frame(matrix(0, nrow = nrow(samples_info), ncol = length(metrics) ))

# A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat.  We can
# segregate this list into markers of G2/M phase and markers of S phase
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

# Create our Seurat object and complete the initalization steps
#data.filt_new<- NormalizeData(data.filt_new, normalization.method = "LogNormalize", scale.factor = 1e6)
data.filt_new <- FindVariableFeatures(data.filt_new, selection.method = "vst")
data.filt_new <- ScaleData(data.filt_new, features = rownames(data.filt_new))

data.filt_new <- RunPCA(data.filt_new, features = VariableFeatures(data.filt_new), ndims.print = 6:10, nfeatures.print = 10)
##DimHeatmap
png(file ='/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/figures/DimHeatmap_All.png', width = 5000, height = 3000, res = 200)
DimHeatmap(data.filt_new, dims = c(8, 10))
dev.off()

##Assign Cell-Cycle Scores
data.filt_new <- CellCycleScoring(data.filt_new, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)

# view cell cycle scores and phase assignments and wirting to a file
#cellcycle_phases <- (data.filt_new[[]])

# Running a PCA on cell cycle genes reveals, unsurprisingly, that cells separate entirely by phase
data.filt_new <- RunPCA(data.filt_new, features = c(s.genes, g2m.genes))
png(file= '/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/figures/Cell_seperate_by_cellCycle.png', width = 5000, height = 3000, res = 200)
DimPlot(data.filt_new)
dev.off()

#Regress out cell cycle scores during data scaling
data.filt_new_regressed <- ScaleData(data.filt_new, vars.to.regress = c("S.Score", "G2M.Score"), features = rownames(data.filt_new))

# Now, a PCA on the variable genes no longer returns components associated with cell cycle
data.filt_new_regressed <- RunPCA(data.filt_new_regressed, features = VariableFeatures(data.filt_new), nfeatures.print = 10)

# When running a PCA on only cell cycle genes, cells no longer separate by cell-cycle phase
data.filt_new_regressed <- RunPCA(data.filt_new_regressed, features = c(s.genes, g2m.genes))
png(file= '/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/figures/Cell_nolonger_seperate_by_cellCycle.png', width = 5000,height = 3000, res = 200)
DimPlot(data.filt_new_regressed)
dev.off()


#This file saved include all the above steps one can direclty load this file
saveRDS(data.filt_new_regressed, file= '/crex/proj/snic2021-23-425/private/SMS_5933_21_MYCN_bulk_scRNASeq_chicken_retina/code/scRNA_Seq/data.filt_new_regressed_08_May.rds')