#Loading the Data
library(Seurat)
## Attaching SeuratObject
library(sctransform)
library(MAST)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.2 ✔ purrr 1.0.1
## ✔ tibble 3.2.1 ✔ dplyr 1.1.2
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks IRanges::slice()
library(rcna)
setwd('/hpc/group/patellab/aam131/UW44SliceCultureNew/UW44scx')
cell.calls<-load('cells.class (1).robj')
UW44.control.calls<-as.data.frame(Cells.class["UW44scx01"])
UW44.ivosidenib.calls<-as.data.frame(Cells.class["UW44scx02"])
colnames(UW44.control.calls)<-NULL
colnames(UW44.ivosidenib.calls)<-NULL
#Making Control and Treatment Objects and Making QC Plots for Objects
sample_name='UW44scx_control'
data_path='Control/filtered_feature_bc_matrix/'
min.cells=10
min.features=200
sample.data<-Read10X(data_path)
cell.names<-colnames(sample.data)
cell.names<-sub("-1", "", cell.names)
colnames(sample.data)<-cell.names
seurat_obj<-CreateSeuratObject(sample.data,project=sample_name,min.cells=min.cells, min.features=min.features)
seurat_obj<-PercentageFeatureSet(seurat_obj, pattern = "^MT-", col.name = "percent.mt")
plot1<-VlnPlot(seurat_obj,features=c('nCount_RNA','nFeature_RNA','percent.mt'))
plot1
seurat_obj<- subset(seurat_obj, subset = nFeature_RNA > 500 & nFeature_RNA < 8000 & percent.mt < 10 & nCount_RNA >0 & nCount_RNA <50000)
seurat_obj$cell.calls<-UW44.control.calls
table(seurat_obj$cell.calls)
##
## Doublet Negative UW44_D0-TTCCTGCCATTACTA
## 2282 817 804
## UW44_D10-ATGATGAACAGCCAG UW44_D14-CTCGAACGCTTATCG UW44_D2-CCGTACCTCATTGTT
## 588 637 1425
## UW44_D5-GGTAGATGTCCTCAG UW44_D7-TGGTGTCATTCTTGA
## 1162 618
Idents(seurat_obj)<-'cell.calls'
seurat_obj$NA.calls<-is.na(seurat_obj$cell.calls)
seurat_obj<-subset(seurat_obj, NA.calls=='FALSE')
seurat_obj<-RenameIdents(seurat_obj, 'UW44_D0-TTCCTGCCATTACTA'='ctrlD0','UW44_D10-ATGATGAACAGCCAG'='ctrlD10','UW44_D14-CTCGAACGCTTATCG'='ctrlD14','UW44_D2-CCGTACCTCATTGTT'='ctrlD2','UW44_D5-GGTAGATGTCCTCAG'='ctrlD5','UW44_D7-TGGTGTCATTCTTGA'='ctrlD7')
seurat_obj$cell.calls<-Idents(seurat_obj)
UW44scx_control<-seurat_obj
sample_name='UW44scx_trt'
data_path='Treatment/filtered_feature_bc_matrix/'
min.cells=10
min.features=200
sample.data<-Read10X(data_path)
cell.names<-colnames(sample.data)
cell.names<-sub("-1", "", cell.names)
colnames(sample.data)<-cell.names
seurat_obj<-CreateSeuratObject(sample.data,project=sample_name,min.cells=min.cells, min.features=min.features)
seurat_obj<-PercentageFeatureSet(seurat_obj, pattern = "^MT-", col.name = "percent.mt")
plot2<-VlnPlot(seurat_obj,features=c('nCount_RNA','nFeature_RNA','percent.mt'))
plot2
seurat_obj<- subset(seurat_obj, subset = nFeature_RNA > 500 & nFeature_RNA < 8000 & percent.mt < 10 & nCount_RNA >0 & nCount_RNA <50000)
seurat_obj$cell.calls<-UW44.ivosidenib.calls
table(seurat_obj$cell.calls)
##
## Doublet
## 1888
## Negative
## 3257
## UW44_02scxd10_ivosidinibd8-ATGATGAACAGCCAG
## 1633
## UW44_02scxd10_tmxxrt_4dpostTrT-GGTAGATGTCCTCAG
## 1536
## UW44_02scxd14_ivosidinibd12-CTCGAACGCTTATCG
## 1300
## UW44_02scxd4_tmzxrt_d2-TTCCTGCCATTACTA
## 2951
## UW44_02scxd7_ivosidinibd5-TGGTGTCATTCTTGA
## 1771
## UW44_02scxd7_tmzxrt_1d_postTrT-CCGTACCTCATTGTT
## 1797
Idents(seurat_obj)<-'cell.calls'
seurat_obj$NA.calls<-is.na(seurat_obj$cell.calls)
seurat_obj<-subset(seurat_obj, NA.calls=='FALSE')
seurat_obj<-RenameIdents(seurat_obj, 'UW44_02scxd10_ivosidinibd8-ATGATGAACAGCCAG'='ivoD10','UW44_02scxd10_tmxxrt_4dpostTrT-GGTAGATGTCCTCAG'='socD10','UW44_02scxd14_ivosidinibd12-CTCGAACGCTTATCG'='ivoD14','UW44_02scxd4_tmzxrt_d2-TTCCTGCCATTACTA'='socD5','UW44_02scxd7_ivosidinibd5-TGGTGTCATTCTTGA'='ivoD7','UW44_02scxd7_tmzxrt_1d_postTrT-CCGTACCTCATTGTT'='socD7')
seurat_obj$cell.calls<-Idents(seurat_obj)
UW44scx_trt<-seurat_obj
#Merging the Objects
UW44scx.start<-merge(UW44scx_control,UW44scx_trt)
## Warning in CheckDuplicateCellNames(object.list = objects): Some cell names are
## duplicated across objects provided. Renaming to enforce unique cell names.
seurat.singlet <- subset(UW44scx.start, idents = c("Negative","Doublet"), invert = TRUE)
Idents(seurat.singlet)<-'cell.calls'
seurat.singlet<-RenameIdents(seurat.singlet,'ctrlD0'='Ctrl','ctrlD10'='Ctrl','ctrlD14'='Ctrl','ctrlD2'='Ctrl','ctrlD5'='Ctrl','ctrlD7'='Ctrl','ivoD10'='Ivosidenib','ivoD7'='Ivosidenib','ivoD14'='Ivosidenib','socD10'='XRT/TMZ','socD5'='XRT/TMZ','socD7'='XRT/TMZ')
seurat.singlet$Condition<-Idents(seurat.singlet)
Idents(seurat.singlet)<-'cell.calls'
seurat.singlet<-RenameIdents(seurat.singlet,'ctrlD0'='D0','ctrlD10'='D10','ctrlD14'='D14','ctrlD2'='D2','ctrlD5'='D5','ctrlD7'='D7','ivoD10'='D10','ivoD7'='D7','ivoD14'='D14','socD10'='D10','socD5'='D5','socD7'='D7')
seurat.singlet$Time<-Idents(seurat.singlet)
library(scRNAseq)
## Warning: package 'scRNAseq' was built under R version 4.2.3
library(harmony)
## Loading required package: Rcpp
VlnPlot(seurat.singlet,features=c("nCount_RNA","nFeature_RNA","percent.mt"))
FeatureScatter(seurat.singlet, "nCount_RNA", "nFeature_RNA", group.by = "Condition", pt.size = 0.5)
seurat.singlet<-SCTransform(seurat.singlet,vars.to.regress=c('percent.mt','nCount_RNA'),vst.flavor='v2')
## vst.flavor='v2' set, setting model to use fixed slope and exclude poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Total Step 1 genes: 27242
## Total overdispersed genes: 25281
## Excluding 1961 genes from Step 1 because they are not overdispersed.
## Variance stabilizing transformation of count matrix of size 29952 by 16222
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
## Setting estimate of 157 genes to inf as theta_mm/theta_mle < 1e-3
## # of step1 poisson genes (variance < mean): 0
## # of low mean genes (mean < 0.001): 2837
## Total # of Step1 poisson genes (theta=Inf; variance < mean): 201
## Total # of poisson genes (theta=Inf; variance < mean): 4583
## Calling offset model for all 4583 poisson genes
## Found 160 outliers - those will be ignored in fitting/regularization step
## Ignoring theta inf genes
## Replacing fit params for 4583 poisson genes by theta=Inf
## Setting min_variance based on median UMI: 0.04
## Second step: Get residuals using fitted parameters for 29952 genes
##
|
| | 0%
|
|= | 2%
|
|== | 3%
|
|==== | 5%
|
|===== | 7%
|
|====== | 8%
|
|======= | 10%
|
|======== | 12%
|
|========= | 13%
|
|========== | 15%
|
|============ | 17%
|
|============= | 18%
|
|============== | 20%
|
|=============== | 22%
|
|================ | 23%
|
|================== | 25%
|
|=================== | 27%
|
|==================== | 28%
|
|===================== | 30%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 35%
|
|========================== | 37%
|
|=========================== | 38%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|================================ | 45%
|
|================================= | 47%
|
|================================== | 48%
|
|=================================== | 50%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================== | 60%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================== | 65%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================= | 70%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 75%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================== | 80%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|============================================================ | 85%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|=============================================================== | 90%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 95%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|======================================================================| 100%
## Computing corrected count matrix for 29952 genes
##
|
| | 0%
|
|= | 2%
|
|== | 3%
|
|==== | 5%
|
|===== | 7%
|
|====== | 8%
|
|======= | 10%
|
|======== | 12%
|
|========= | 13%
|
|========== | 15%
|
|============ | 17%
|
|============= | 18%
|
|============== | 20%
|
|=============== | 22%
|
|================ | 23%
|
|================== | 25%
|
|=================== | 27%
|
|==================== | 28%
|
|===================== | 30%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 35%
|
|========================== | 37%
|
|=========================== | 38%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|================================ | 45%
|
|================================= | 47%
|
|================================== | 48%
|
|=================================== | 50%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================== | 60%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================== | 65%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================= | 70%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 75%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================== | 80%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|============================================================ | 85%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|=============================================================== | 90%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 95%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|======================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 1.513649 mins
## Determine variable features
## Place corrected count matrix in counts slot
## Regressing out percent.mt, nCount_RNA
## Centering data matrix
## Set default assay to SCT
seurat.singlet<-RunPCA(seurat.singlet)
## PC_ 1
## Positive: LRMDA, FRMD4A, RBM47, MSR1, FMN1, SLC11A1, LNCAROD, MERTK, LYN, DOCK8
## CTSB, PLXDC2, SAT1, APBB1IP, NEAT1, DOCK4, EPB41L3, KYNU, TNS3, PIK3R5
## CPQ, KCNMA1, STEAP1B, ATP8B4, FYB1, KCNK13, SAMSN1, PTPRC, PIK3AP1, ADAM28
## Negative: LHFPL3, LRP1B, CNTNAP2, NPAS3, KCND2, LRRC4C, SNTG1, PCDH15, DLGAP1, FGF14
## LSAMP, PTPRZ1, NLGN1, CSMD1, NXPH1, MMP16, NRG3, NRCAM, NRXN1, CADM2
## GPC6, DSCAM, GRID2, FAM155A, DPP6, ADGRL3, NRXN3, ANKS1B, CADPS, SOX2-OT
## PC_ 2
## Positive: FGF14, LRRC4C, CNTNAP2, PTPRZ1, MMP16, SNTG1, GPC6, RASGEF1B, PCDH15, NXPH1
## LHFPL3, KCND2, DLGAP1, NPAS3, LRMDA, LRP1B, FAM155A, NRG3, GRID2, LRFN5
## SLC8A1, SORBS1, NRCAM, RBM47, PLXDC2, CADPS, FRMD4A, GAP43, CSMD1, DPP6
## Negative: NKAIN2, CNTNAP4, RNF220, MAP7, ST18, PCDH9, ZNF536, DLC1, SPOCK3, TMTC2
## C10orf90, SLCO3A1, PCSK6, PRUNE2, KIRREL3, PDE1A, UNC5C, SLC24A2, SLC7A14-AS1, BCAS1
## ELMO1, RAPGEF5, CTNNA3, PPP2R2B, ENPP2, MBP, ANK3, PDE1C, CDH19, ANO4
## PC_ 3
## Positive: CNTNAP4, NKAIN2, ST18, MAP7, ZNF536, PCDH9, CTNNA3, SLC24A2, C10orf90, TMTC2
## MBP, SPOCK3, CADM2, LHFPL3, PRUNE2, KIRREL3, IL1RAPL1, PCSK6, ANK3, PEX5L
## BCAS1, SLC7A14-AS1, UNC5C, ANO4, PCDH15, TMEM144, LINC00609, SHTN1, DSCAM, ENPP2
## Negative: MECOM, FLT1, ADGRL4, ADGRF5, LAMA4, PGF, COL4A1, PTPRM, AFAP1L1, PECAM1
## EGFL7, ADGRL2, COL4A2, CSGALNACT1, ARHGAP29, ERG, PLEKHG1, NOTCH4, THSD4, MYO10
## STC1, RAPGEF5, EMCN, MAST4, HSPG2, DIPK2B, ITGA1, GALNT18, MYO1B, COBLL1
## PC_ 4
## Positive: CNTNAP2, LRRC4C, FGF14, MECOM, RAPGEF5, RASGEF1B, GPC6, FLT1, ANKS1B, ADGRL4
## TSHZ2, AFAP1L1, PECAM1, ADGRF5, EGFL7, ELMO1, ARHGAP29, PGF, PDE4D, ADGRL2
## ERG, DOCK9, NPAS3, RNF220, MAML3, NRXN3, DCBLD2, NOTCH4, EMCN, SOX2-OT
## Negative: OBI1-AS1, HPSE2, NTRK2, ADGRV1, SLC1A3, SLC4A4, NKAIN3, SPARCL1, KCNN3, LINC01088
## PLCG2, ADCY2, BMPR1B, DCLK1, TNC, MGAT4C, FAM189A2, HIF3A, ATP1A2, GPC5
## DPP10, CLU, LIFR, HSPA1A, ASTN2, HSP90AA1, GLIS3, ARHGEF4, ETNPPL, SLC1A2
## PC_ 5
## Positive: DSCAM, SMOC1, LHFPL3, PCDH15, CADM2, KCND2, OPCML, TNR, NRXN1, LINC00609
## SNTG1, DLGAP1, AC013265.1, SEZ6L, GRID2, SGCZ, CSMD3, DGKB, AC002069.2, RAPGEF4
## ARPP21, GALNT13, CSMD1, NXPH1, PHYHIPL, GLCCI1, AL445250.1, PID1, PTPRT, GRIA2
## Negative: RASGEF1B, GPC6, TSHZ2, NEAT1, DPYD, IL33, LRFN5, NAMPT, CHI3L1, TNC
## ADAMTSL1, HMGA2, DCBLD2, LHFPL6, DEC1, AKAP6, KIF26B, FAT1, SLIT2, MGST1
## GPR39, AC083837.1, ACTN2, PSD3, CADPS, TENM3, UPP1, NIBAN1, AL078604.4, NRP2
seurat.singlet<-RunHarmony(seurat.singlet,assay.use='SCT',reduction='pca',dims.use=1:50,group.by.vars='orig.ident',plot.convergence=T)
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony 6/10
## Harmony 7/10
## Harmony 8/10
## Harmony 9/10
## Harmony converged after 9 iterations
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.SCT.harmony; see ?make.names for more details
## on syntax validity
seurat.singlet<-FindNeighbors(seurat.singlet,reduction='harmony')
## Computing nearest neighbor graph
## Computing SNN
seurat.singlet<-FindClusters(seurat.singlet,resolution=0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 16222
## Number of edges: 523475
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8843
## Number of communities: 22
## Elapsed time: 1 seconds
seurat.singlet<-RunUMAP(seurat.singlet,dims=1:50,reduction='harmony')
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 22:38:11 UMAP embedding parameters a = 0.9922 b = 1.112
## 22:38:11 Read 16222 rows and found 50 numeric columns
## 22:38:11 Using Annoy for neighbor search, n_neighbors = 30
## 22:38:12 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 22:38:14 Writing NN index file to temp file /tmp/RtmpybuRkm/file196c256f3a4bcc
## 22:38:14 Searching Annoy index using 1 thread, search_k = 3000
## 22:38:19 Annoy recall = 100%
## 22:38:21 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 22:38:23 Initializing from normalized Laplacian + noise (using irlba)
## 22:38:24 Commencing optimization for 200 epochs, with 722794 positive edges
## 22:38:31 Optimization finished
DimPlot(seurat.singlet)
slice.markers <- FindAllMarkers(seurat.singlet, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 17
## Calculating cluster 18
## Calculating cluster 19
## Calculating cluster 20
## Calculating cluster 21
slice.markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)
slice.markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(seurat.singlet, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(seurat.singlet, features = top10$gene): The following
## features were omitted as they were not found in the scale.data slot for the SCT
## assay: ARGLU1, BDP1, AHI1, RBM25
_______ #Remove the Mt. Enchiched Clusters and Generate DimPlot See
additonal images of Notion page where the Mt. enriched clusters are
shown in red. Based on this Clusters 3,4,7,12,17 are considered to be
enriched for dead cells.
seurat.singlet <- subset(seurat.singlet, idents = c(3,4,7,12,17), invert = TRUE)
DimPlot(seurat.singlet, label=TRUE,pt.size = 0.7)
Of the remaining clusters, we are now using known markers of commonly detected cells to identify particular cell types
features <- c("SOX2", "PTPRZ1", "EGFR", "PTPRC", "CD163", "IL1B", "GABBR2", "GABRB3",
"RBFOX3", "GAD2", "GRIP1", "MBP", "ST18","MOG","MYO1B","PDGFRB","LAMC3",
"PECAM1", "VWF", "ANGPT2","ALDH1L1","NDRG2","S100B","GFAP")
VlnPlot(seurat.singlet, features, stack = TRUE, sort = FALSE, flip = TRUE) +
theme(legend.position = "none") + ggtitle("Cell Type Markers")
Based on these plots we can clearly identify clusters 8, 10, and 16 as being immune cells. We will used these cells as reference for future CNV analysis.
#Running InferCNV on the Object
library(infercnv)
## Warning: package 'infercnv' was built under R version 4.2.3
x <- as.data.frame(Cells(seurat.singlet))
x[2]<-seurat.singlet@active.ident
write.table(x, file = "mtcars.txt", sep = "\t",
row.names = FALSE, col.names = FALSE)
infercnv_obj = CreateInfercnvObject(raw_counts_matrix=GetAssayData(seurat.singlet, slot = "counts"),
annotations_file="mtcars.txt",
delim="\t",
gene_order_file="gencode_v38_gene_pos.txt",
ref_group_names=c("8","10","16"))
## INFO [2023-06-25 22:48:57] Parsing gene order file: gencode_v38_gene_pos.txt
## INFO [2023-06-25 22:48:58] Parsing cell annotations file: mtcars.txt
## INFO [2023-06-25 22:48:58] ::order_reduce:Start.
## INFO [2023-06-25 22:48:58] .order_reduce(): expr and order match.
## INFO [2023-06-25 22:49:01] ::process_data:order_reduce:Reduction from positional data, new dimensions (r,c) = 29952,11921 Total=72963619 Min=0 Max=2157.
## INFO [2023-06-25 22:49:01] num genes removed taking into account provided gene ordering list: 10549 = 35.2196848290598% removed.
## INFO [2023-06-25 22:49:01] -filtering out cells < 100 or > Inf, removing 0 % of cells
## WARN [2023-06-25 22:49:01] Please use "options(scipen = 100)" before running infercnv if you are using the analysis_mode="subclusters" option or you may encounter an error while the hclust is being generated.
## INFO [2023-06-25 22:49:03] validating infercnv_obj
cutoff=0.1
infercnv_obj <- infercnv:::require_above_min_mean_expr_cutoff(infercnv_obj, cutoff)
## INFO [2023-06-25 22:49:03] ::above_min_mean_expr_cutoff:Start
## INFO [2023-06-25 22:49:03] Removing 11141 genes from matrix as below mean expr threshold: 0.1
## INFO [2023-06-25 22:49:03] validating infercnv_obj
## INFO [2023-06-25 22:49:03] There are 8262 genes and 11921 cells remaining in the expr matrix.
infercnv_obj <- infercnv:::normalize_counts_by_seq_depth(infercnv_obj)
## INFO [2023-06-25 22:49:03] normalizing counts matrix by depth
## INFO [2023-06-25 22:49:07] Computed total sum normalization factor as median libsize: 5529.000000
infercnv_obj <- infercnv:::anscombe_transform(infercnv_obj)
infercnv_obj <- infercnv:::log2xplus1(infercnv_obj)
## INFO [2023-06-25 22:49:10] transforming log2xplus1()
threshold = mean(abs(infercnv:::get_average_bounds(infercnv_obj)))
infercnv_obj <- infercnv:::apply_max_threshold_bounds(infercnv_obj, threshold=threshold)
## INFO [2023-06-25 22:49:29] ::process_data:setting max centered expr, threshold set to: +/-: 3.29271722914226
infercnv_obj = infercnv:::smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE)
## INFO [2023-06-25 22:49:31] smooth_by_chromosome: chr: chr1
## INFO [2023-06-25 22:49:43] smooth_by_chromosome: chr: chr2
## INFO [2023-06-25 22:49:54] smooth_by_chromosome: chr: chr3
## INFO [2023-06-25 22:50:06] smooth_by_chromosome: chr: chr4
## INFO [2023-06-25 22:50:16] smooth_by_chromosome: chr: chr5
## INFO [2023-06-25 22:50:27] smooth_by_chromosome: chr: chr6
## INFO [2023-06-25 22:50:37] smooth_by_chromosome: chr: chr7
## INFO [2023-06-25 22:50:48] smooth_by_chromosome: chr: chr8
## INFO [2023-06-25 22:50:59] smooth_by_chromosome: chr: chr9
## INFO [2023-06-25 22:51:10] smooth_by_chromosome: chr: chr10
## INFO [2023-06-25 22:51:20] smooth_by_chromosome: chr: chr11
## INFO [2023-06-25 22:51:31] smooth_by_chromosome: chr: chr12
## INFO [2023-06-25 22:51:42] smooth_by_chromosome: chr: chr13
## INFO [2023-06-25 22:51:52] smooth_by_chromosome: chr: chr14
## INFO [2023-06-25 22:52:04] smooth_by_chromosome: chr: chr15
## INFO [2023-06-25 22:52:14] smooth_by_chromosome: chr: chr16
## INFO [2023-06-25 22:52:24] smooth_by_chromosome: chr: chr17
## INFO [2023-06-25 22:52:35] smooth_by_chromosome: chr: chr18
## INFO [2023-06-25 22:52:45] smooth_by_chromosome: chr: chr19
## INFO [2023-06-25 22:52:55] smooth_by_chromosome: chr: chr20
## INFO [2023-06-25 22:53:05] smooth_by_chromosome: chr: chr21
## INFO [2023-06-25 22:53:13] smooth_by_chromosome: chr: chr22
infercnv_obj <- infercnv:::center_cell_expr_across_chromosome(infercnv_obj, method = "median")
## INFO [2023-06-25 22:53:23] ::center_smooth across chromosomes per cell
plot_cnv(infercnv_obj, out_dir = "/hpc/group/patellab/aam131/UW44SliceCultureNew/UW44scx", output_filename='infercnv.chr_smoothed', x.range="auto", title = "chr smoothed")
## INFO [2023-06-25 22:53:36] ::plot_cnv:Start
## INFO [2023-06-25 22:53:36] ::plot_cnv:Current data dimensions (r,c)=8262,11921 Total=429909.630736514 Min=-0.283142750228635 Max=1.06859317854554.
## INFO [2023-06-25 22:53:36] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2023-06-25 22:53:38] plot_cnv(): auto thresholding at: (-0.191953 , 0.200683)
## INFO [2023-06-25 22:53:40] plot_cnv_observation:Start
## INFO [2023-06-25 22:53:40] Observation data size: Cells= 10360 Genes= 8262
## INFO [2023-06-25 22:53:40] clustering observations via method: ward.D
## INFO [2023-06-25 22:53:40] Number of cells in group(1) is 2309
## INFO [2023-06-25 22:53:40] group size being clustered: 2309,8262
## INFO [2023-06-25 23:03:35] Number of cells in group(2) is 1826
## INFO [2023-06-25 23:03:35] group size being clustered: 1826,8262
## INFO [2023-06-25 23:09:08] Number of cells in group(3) is 554
## INFO [2023-06-25 23:09:08] group size being clustered: 554,8262
## INFO [2023-06-25 23:09:34] Number of cells in group(4) is 402
## INFO [2023-06-25 23:09:34] group size being clustered: 402,8262
## INFO [2023-06-25 23:09:46] Number of cells in group(5) is 272
## INFO [2023-06-25 23:09:46] group size being clustered: 272,8262
## INFO [2023-06-25 23:09:50] Number of cells in group(6) is 271
## INFO [2023-06-25 23:09:50] group size being clustered: 271,8262
## INFO [2023-06-25 23:09:54] Number of cells in group(7) is 151
## INFO [2023-06-25 23:09:54] group size being clustered: 151,8262
## INFO [2023-06-25 23:09:55] Number of cells in group(8) is 109
## INFO [2023-06-25 23:09:55] group size being clustered: 109,8262
## INFO [2023-06-25 23:09:55] Number of cells in group(9) is 1412
## INFO [2023-06-25 23:09:55] group size being clustered: 1412,8262
## INFO [2023-06-25 23:13:07] Number of cells in group(10) is 44
## INFO [2023-06-25 23:13:07] group size being clustered: 44,8262
## INFO [2023-06-25 23:13:08] Number of cells in group(11) is 17
## INFO [2023-06-25 23:13:08] group size being clustered: 17,8262
## INFO [2023-06-25 23:13:08] Number of cells in group(12) is 1232
## INFO [2023-06-25 23:13:08] group size being clustered: 1232,8262
## INFO [2023-06-25 23:15:30] Number of cells in group(13) is 951
## INFO [2023-06-25 23:15:30] group size being clustered: 951,8262
## INFO [2023-06-25 23:16:53] Number of cells in group(14) is 810
## INFO [2023-06-25 23:16:53] group size being clustered: 810,8262
## INFO [2023-06-25 23:17:50] plot_cnv_observation:Writing observation groupings/color.
## INFO [2023-06-25 23:17:50] plot_cnv_observation:Done writing observation groupings/color.
## INFO [2023-06-25 23:17:51] plot_cnv_observation:Writing observation heatmap thresholds.
## INFO [2023-06-25 23:17:51] plot_cnv_observation:Done writing observation heatmap thresholds.
## INFO [2023-06-25 23:18:05] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2023-06-25 23:18:05] Quantiles of plotted data range: -0.19195268097594,-0.0445440696867155,0,0.0479066962157194,0.200682581424135
## INFO [2023-06-25 23:18:14] plot_cnv_references:Start
## INFO [2023-06-25 23:18:14] Reference data size: Cells= 1561 Genes= 8262
## INFO [2023-06-25 23:20:02] plot_cnv_references:Number reference groups= 3
## INFO [2023-06-25 23:20:02] plot_cnv_references:Plotting heatmap.
## INFO [2023-06-25 23:20:04] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2023-06-25 23:20:04] Quantiles of plotted data range: -0.19195268097594,-0.0404382567569638,0,0.0439141901578076,0.200682581424135
## $cluster_by_groups
## [1] TRUE
##
## $k_obs_groups
## [1] 3
##
## $contig_cex
## [1] 1
##
## $x.center
## [1] 0.00436495
##
## $x.range
## [1] -0.1919527 0.2006826
##
## $hclust_method
## [1] "ward.D"
##
## $color_safe_pal
## [1] FALSE
##
## $output_format
## [1] "png"
##
## $png_res
## [1] 300
##
## $dynamic_resize
## [1] 0
infercnv_obj <- infercnv:::subtract_ref_expr_from_obs(infercnv_obj)
## INFO [2023-06-25 23:20:05] ::subtract_ref_expr_from_obs:Start inv_log=FALSE, use_bounds=TRUE
## INFO [2023-06-25 23:20:05] subtracting mean(normal) per gene per cell across all data
## INFO [2023-06-25 23:20:12] -subtracting expr per gene, use_bounds=TRUE
infercnv_obj <- infercnv:::invert_log2(infercnv_obj)
## INFO [2023-06-25 23:20:18] invert_log2(), computing 2^x
infercnv_obj <- infercnv:::clear_noise_via_ref_mean_sd(infercnv_obj, sd_amplifier = 1.5)
## INFO [2023-06-25 23:20:20] denoising using mean(normal) +- sd_amplifier * sd(normal) per gene per cell across all data
## INFO [2023-06-25 23:20:20] :: **** clear_noise_via_ref_quantiles **** : removing noise between bounds: 0.960187462656767 - 1.04089391383297
plot_cnv(infercnv_obj, out_dir = "/hpc/group/patellab/aam131/UW44SliceCultureNew/UW44scx", output_filename='infercnv.denoised', x.range="auto", title = "denoised")
## INFO [2023-06-25 23:20:21] ::plot_cnv:Start
## INFO [2023-06-25 23:20:21] ::plot_cnv:Current data dimensions (r,c)=8262,11921 Total=98711829.788204 Min=0.75211963032104 Max=2.06471329603865.
## INFO [2023-06-25 23:20:22] ::plot_cnv:Depending on the size of the matrix this may take a moment.
## INFO [2023-06-25 23:20:24] plot_cnv(): auto thresholding at: (0.887469 , 1.117009)
## INFO [2023-06-25 23:20:27] plot_cnv_observation:Start
## INFO [2023-06-25 23:20:27] Observation data size: Cells= 10360 Genes= 8262
## INFO [2023-06-25 23:20:27] clustering observations via method: ward.D
## INFO [2023-06-25 23:20:27] Number of cells in group(1) is 2309
## INFO [2023-06-25 23:20:27] group size being clustered: 2309,8262
## INFO [2023-06-25 23:34:24] Number of cells in group(2) is 1826
## INFO [2023-06-25 23:34:24] group size being clustered: 1826,8262
## INFO [2023-06-25 23:42:04] Number of cells in group(3) is 554
## INFO [2023-06-25 23:42:04] group size being clustered: 554,8262
## INFO [2023-06-25 23:42:26] Number of cells in group(4) is 402
## INFO [2023-06-25 23:42:26] group size being clustered: 402,8262
## INFO [2023-06-25 23:42:35] Number of cells in group(5) is 272
## INFO [2023-06-25 23:42:35] group size being clustered: 272,8262
## INFO [2023-06-25 23:42:41] Number of cells in group(6) is 271
## INFO [2023-06-25 23:42:41] group size being clustered: 271,8262
## INFO [2023-06-25 23:42:48] Number of cells in group(7) is 151
## INFO [2023-06-25 23:42:48] group size being clustered: 151,8262
## INFO [2023-06-25 23:42:50] Number of cells in group(8) is 109
## INFO [2023-06-25 23:42:50] group size being clustered: 109,8262
## INFO [2023-06-25 23:42:50] Number of cells in group(9) is 1412
## INFO [2023-06-25 23:42:51] group size being clustered: 1412,8262
## INFO [2023-06-25 23:47:09] Number of cells in group(10) is 44
## INFO [2023-06-25 23:47:09] group size being clustered: 44,8262
## INFO [2023-06-25 23:47:09] Number of cells in group(11) is 17
## INFO [2023-06-25 23:47:09] group size being clustered: 17,8262
## INFO [2023-06-25 23:47:09] Number of cells in group(12) is 1232
## INFO [2023-06-25 23:47:10] group size being clustered: 1232,8262
## INFO [2023-06-25 23:50:18] Number of cells in group(13) is 951
## INFO [2023-06-25 23:50:18] group size being clustered: 951,8262
## INFO [2023-06-25 23:52:23] Number of cells in group(14) is 810
## INFO [2023-06-25 23:52:24] group size being clustered: 810,8262
## INFO [2023-06-25 23:53:46] plot_cnv_observation:Writing observation groupings/color.
## INFO [2023-06-25 23:53:46] plot_cnv_observation:Done writing observation groupings/color.
## INFO [2023-06-25 23:53:46] plot_cnv_observation:Writing observation heatmap thresholds.
## INFO [2023-06-25 23:53:46] plot_cnv_observation:Done writing observation heatmap thresholds.
## INFO [2023-06-25 23:54:01] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2023-06-25 23:54:01] Quantiles of plotted data range: 0.887469351559575,1.00054068824487,1.00054068824487,1.00054068824487,1.11700876546505
## INFO [2023-06-25 23:54:09] plot_cnv_references:Start
## INFO [2023-06-25 23:54:09] Reference data size: Cells= 1561 Genes= 8262
## INFO [2023-06-25 23:55:58] plot_cnv_references:Number reference groups= 3
## INFO [2023-06-25 23:55:58] plot_cnv_references:Plotting heatmap.
## INFO [2023-06-25 23:56:00] Colors for breaks: #00008B,#24249B,#4848AB,#6D6DBC,#9191CC,#B6B6DD,#DADAEE,#FFFFFF,#EEDADA,#DDB6B6,#CC9191,#BC6D6D,#AB4848,#9B2424,#8B0000
## INFO [2023-06-25 23:56:00] Quantiles of plotted data range: 0.887469351559575,1.00054068824487,1.00054068824487,1.00054068824487,1.11700876546505
## $cluster_by_groups
## [1] TRUE
##
## $k_obs_groups
## [1] 3
##
## $contig_cex
## [1] 1
##
## $x.center
## [1] 1.002239
##
## $x.range
## [1] 0.8874694 1.1170088
##
## $hclust_method
## [1] "ward.D"
##
## $color_safe_pal
## [1] FALSE
##
## $output_format
## [1] "png"
##
## $png_res
## [1] 300
##
## $dynamic_resize
## [1] 0
Based on this we can clearly see that cluster 0, 1 , 2, and 6 are Tumor cells. Of note, Cluster 11 and 5 have similar chromosomal aberrations and might be a progenitor population that gives rise to tumor cells.
library(dplyr)
library(openxlsx)
library(HGNChelper)
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/gene_sets_prepare.R")
source("https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/R/sctype_score_.R")
db_ = "https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_full.xlsx"; tissue = c("Brain")
#db_ = "https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_full.xlsx"; tissue = c("Brain", "Immune System")
gs_list = gene_sets_prepare(db_, tissue)
es.max = sctype_score(scRNAseqData = seurat.singlet[["SCT"]]@scale.data, scaled = TRUE, gs = gs_list$gs_positive, gs2 = gs_list$gs_negative)
cL_resutls = do.call("rbind", lapply(unique(seurat.singlet@meta.data$seurat_clusters), function(cl){
es.max.cl = sort(rowSums(es.max[ ,rownames(seurat.singlet@meta.data[seurat.singlet@meta.data$seurat_clusters==cl, ])]), decreasing = !0)
head(data.frame(cluster = cl, type = names(es.max.cl), scores = es.max.cl, ncells = sum(seurat.singlet@meta.data$seurat_clusters==cl)), 10)
}))
sctype_scores = cL_resutls %>% group_by(cluster) %>% top_n(n = 1, wt = scores)
sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
print(sctype_scores[,1:3])
## # A tibble: 17 × 3
## # Groups: cluster [17]
## cluster type scores
## <fct> <chr> <dbl>
## 1 11 Neuroepithelial cells 408.
## 2 5 Radial glial cells 1490.
## 3 18 Endothelial cells 229.
## 4 9 Oligodendrocytes 1571.
## 5 15 Endothelial cells 4501.
## 6 8 Microglial cells 2215.
## 7 2 Oligodendrocyte precursor cells 2905.
## 8 1 Schwann precursor cells 1156.
## 9 6 Oligodendrocyte precursor cells 5676.
## 10 0 Schwann precursor cells 3593.
## 11 19 Glutamatergic neurons 665.
## 12 14 Oligodendrocytes 3801.
## 13 20 Astrocytes 1084.
## 14 13 Astrocytes 1092.
## 15 16 Microglial cells 1349.
## 16 10 Microglial cells 2741.
## 17 21 Glutamatergic neurons 14.4
slice.markers <- FindAllMarkers(seurat.singlet, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 13
## Calculating cluster 14
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 18
## Calculating cluster 19
## Calculating cluster 20
## Calculating cluster 21
slice.markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)
slice.markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(seurat.singlet, features = top10$gene) + NoLegend()
## Warning in DoHeatmap(seurat.singlet, features = top10$gene): The following
## features were omitted as they were not found in the scale.data slot for the SCT
## assay: ARGLU1, AHI1, BDP1, RBM25
Based on ScType, Matrix Plots, and DEG plots, all the annotations are
clear.
They are: - Cluster 0: Tumor c1 - Cluster 1: Tumor c2 - Cluster 2: Tumor c3 - Cluster 5: Radial Glial Cells - Cluster 6: Tumor c4 - Cluster 8: Microglia c1 - Cluster 9: Oligodendrocytes c1 - Cluster 10: Microglia c2 - Cluster 11: Neuroepithelial Cells - Cluster 13: Astrocytes c1 - Cluster 14: Oligodendrocytes c2 - Cluster 15: Pericytes - Cluster 16: Migroglia c3 - Cluster 18: Endothelial Cells - Cluster 19: Excitatory Neurons - Cluster 20: Astrocytes c2 - Cluster 21: Inhibitory Neurons
#Label the Clusters and Generate a Working Object For Future Analyses
new.cluster.ids <- c("Tumor c1","Tumor c2","Tumor c3", "Radial Glial Cells","Tumor c4", "Microglia c1","Oligodendrocytes c1","Microglia c2", "Neuroepithelial Cells","Astrocytes c1","Oligodendrocytes c2","Pericytes","Microglia c3","Endothelial Cells","Excitatory Neurons","Astrocytes c2","Inhibitory Neurons")
names(new.cluster.ids) <- levels(seurat.singlet)
seurat.singlet <- RenameIdents(seurat.singlet, new.cluster.ids)
seurat.singlet$clusterlabels<-Idents(seurat.singlet)
DimPlot(seurat.singlet, label=TRUE,pt.size = 0.7)
saveRDS(seurat.singlet, file = "annotatedUW44Data.RDS")