suppressPackageStartupMessages({
library(BiocFileCache)
library(scran)
library(scater)
library(tidyverse)
library(SingleCellExperiment)
library(DropletUtils)
library(ggplot2)
library(cowplot)
library(Seurat)
library(monocle3)
library(SeuratObject)
})
####### SNL CCSP
data_dir <- "~/SNL MEX/17681X1/MEX"
list.files(data_dir)
## [1] "barcodes.tsv.gz" "features.tsv.gz" "matrix.mtx.gz"
SNL_CCSP <- Read10X(data_dir)
SNL_CCSP <- CreateSeuratObject(counts = SNL_CCSP,project="SNL_CCSP")
label<-"CCSP"
tx<-"Control"
SNL_CCSP@meta.data$Cre<-label
SNL_CCSP@meta.data$tx<-tx
SNL_CCSP@meta.data$n<-"n=1"
SNL_CCSP@meta.data$gnomex<-"17681X1"
head(SNL_CCSP@meta.data)
## orig.ident nCount_RNA nFeature_RNA Cre tx n gnomex
## AAACCCAAGTAGAATC-1 SNL_CCSP 1837 660 CCSP Control n=1 17681X1
## AAACCCATCAGGAACG-1 SNL_CCSP 30130 5068 CCSP Control n=1 17681X1
## AAACCCATCTCAAAGC-1 SNL_CCSP 4218 983 CCSP Control n=1 17681X1
## AAACCCATCTGCACCT-1 SNL_CCSP 10055 1569 CCSP Control n=1 17681X1
## AAACCCATCTTGCAGA-1 SNL_CCSP 8658 564 CCSP Control n=1 17681X1
## AAACGAAAGCCATCCG-1 SNL_CCSP 33399 5967 CCSP Control n=1 17681X1
####### SNL CCSP + CXCR2i ####
data_dir <- "~/SNL/MEX/17681X2 AZD CCSP"
list.files(data_dir)
## [1] "barcodes.tsv.gz" "features.tsv.gz" "matrix.mtx.gz"
SNL_CCSP_Tx <- Read10X(data_dir)
SNL_CCSP_Tx <- CreateSeuratObject(counts = SNL_CCSP_Tx,projec="SNL_CCSP_Tx")
label<-"CCSP"
tx<-"CXCR2i"
SNL_CCSP_Tx@meta.data$Cre<-label
SNL_CCSP_Tx@meta.data$tx<-tx
SNL_CCSP_Tx@meta.data$n<-"n=1"
SNL_CCSP_Tx@meta.data$gnomex<-"17681X2"
head(SNL_CCSP_Tx@meta.data)
## orig.ident nCount_RNA nFeature_RNA Cre tx n gnomex
## AAACCCAAGCAGCACA-1 SNL_CCSP_Tx 9301 2534 CCSP CXCR2i n=1 17681X2
## AAACCCAAGGGATCAC-1 SNL_CCSP_Tx 2701 858 CCSP CXCR2i n=1 17681X2
## AAACCCACACATACGT-1 SNL_CCSP_Tx 8817 1630 CCSP CXCR2i n=1 17681X2
## AAACCCACATCATCCC-1 SNL_CCSP_Tx 1059 616 CCSP CXCR2i n=1 17681X2
## AAACCCAGTCCAGTTA-1 SNL_CCSP_Tx 14102 1933 CCSP CXCR2i n=1 17681X2
## AAACCCAGTCTTTCAT-1 SNL_CCSP_Tx 9579 1885 CCSP CXCR2i n=1 17681X2
####### SNL CMV 1 (15242X1) ####
data_dir <- "~/SNL MEX/15242X1/MEX"
list.files(data_dir)
## [1] "barcodes.tsv" "genes.tsv" "matrix.mtx"
SNL_CMV1 <- Read10X(data_dir)
SNL_CMV1 <- CreateSeuratObject(counts = SNL_CMV1,project="SNL_CMV1")
label<-"CMV"
SNL_CMV1@meta.data$Cre<-label
SNL_CMV1@meta.data$tx<-"Control"
SNL_CMV1@meta.data$n<-"n=1"
SNL_CMV1@meta.data$gnomex<-"15242X1"
head(SNL_CMV1@meta.data)
## orig.ident nCount_RNA nFeature_RNA Cre tx n gnomex
## AAACCTGAGATAGCAT-1 SNL_CMV1 3247 1220 CMV Control n=1 15242X1
## AAACCTGCAATGGATA-1 SNL_CMV1 11893 3269 CMV Control n=1 15242X1
## AAACCTGCAATGTAAG-1 SNL_CMV1 6207 1908 CMV Control n=1 15242X1
## AAACCTGCAGAAGCAC-1 SNL_CMV1 9743 2638 CMV Control n=1 15242X1
## AAACCTGCAGCTCCGA-1 SNL_CMV1 4977 1701 CMV Control n=1 15242X1
## AAACCTGGTAGAGCTG-1 SNL_CMV1 12015 2934 CMV Control n=1 15242X1
####### SNL CMV CXCR2i (15242X2) ####
data_dir <- "~/SNL/SNL AZD treated 10x data"
list.files(data_dir)
## [1] "barcodes.tsv" "genes.tsv" "matrix.mtx"
SNL_CMV_Tx <- Read10X(data_dir)
SNL_CMV_Tx <- CreateSeuratObject(counts = SNL_CMV_Tx,project="SNL_CMV_Tx")
label<-"CMV"
SNL_CMV_Tx@meta.data$Cre<-label
SNL_CMV_Tx@meta.data$tx<-"CXCR2i"
SNL_CMV_Tx@meta.data$n<-"n=1"
SNL_CMV_Tx@meta.data$gnomex<-"15242X2"
head(SNL_CMV_Tx@meta.data)
## orig.ident nCount_RNA nFeature_RNA Cre tx n gnomex
## AAACCTGAGACATAAC-1 SNL_CMV_Tx 3157 1654 CMV CXCR2i n=1 15242X2
## AAACCTGAGCGCTCCA-1 SNL_CMV_Tx 3730 1539 CMV CXCR2i n=1 15242X2
## AAACCTGCAAGCGCTC-1 SNL_CMV_Tx 4510 1714 CMV CXCR2i n=1 15242X2
## AAACCTGGTAGTACCT-1 SNL_CMV_Tx 7010 2052 CMV CXCR2i n=1 15242X2
## AAACCTGTCACCATAG-1 SNL_CMV_Tx 2421 1085 CMV CXCR2i n=1 15242X2
## AAACCTGTCAGTGTTG-1 SNL_CMV_Tx 6889 2242 CMV CXCR2i n=1 15242X2
####### SNL CMV 2 (15739X2) ####
data_dir <- "~/SNL MEX/15739X2/MEX"
list.files(data_dir)
## [1] "barcodes.tsv.gz" "features.tsv.gz" "matrix.mtx.gz"
SNL_CMV2 <- Read10X(data_dir)
SNL_CMV2 <- CreateSeuratObject(counts = SNL_CMV2,project="SNL_CMV2")
label<-"CMV"
SNL_CMV2@meta.data$Cre<-label
SNL_CMV2@meta.data$tx<-"Control"
SNL_CMV2@meta.data$n<-"n=2"
SNL_CMV2@meta.data$gnomex<-"15739X2"
head(SNL_CMV2@meta.data)
## orig.ident nCount_RNA nFeature_RNA Cre tx n gnomex
## AAACCTGCACATTTCT-1 SNL_CMV2 6886 1808 CMV Control n=2 15739X2
## AAACCTGGTACCAGTT-1 SNL_CMV2 6805 1746 CMV Control n=2 15739X2
## AAACCTGGTAGAGTGC-1 SNL_CMV2 8217 2530 CMV Control n=2 15739X2
## AAACCTGGTCGCTTCT-1 SNL_CMV2 17570 3915 CMV Control n=2 15739X2
## AAACCTGGTCTTCTCG-1 SNL_CMV2 15449 2881 CMV Control n=2 15739X2
## AAACCTGGTGGTACAG-1 SNL_CMV2 565 294 CMV Control n=2 15739X2
####### SNL CMV 3 (15739X3) ####
data_dir <- "~/SNL MEX/15739X3/MEX"
list.files(data_dir)
## [1] "barcodes.tsv.gz" "features.tsv.gz" "matrix.mtx.gz"
SNL_CMV3 <- Read10X(data_dir)
SNL_CMV3 <- CreateSeuratObject(counts = SNL_CMV3,project="SNL_CMV3")
label<-"CMV"
SNL_CMV3@meta.data$Cre<-label
SNL_CMV3@meta.data$tx<-"Control"
SNL_CMV3@meta.data$n<-"n=3"
SNL_CMV3@meta.data$gnomex<-"15739X3"
head(SNL_CMV3@meta.data)
## orig.ident nCount_RNA nFeature_RNA Cre tx n gnomex
## AAACCTGAGCTGTTCA-1 SNL_CMV3 4753 1500 CMV Control n=3 15739X3
## AAACCTGCAAGCGATG-1 SNL_CMV3 16781 3515 CMV Control n=3 15739X3
## AAACCTGGTTGGGACA-1 SNL_CMV3 13883 2515 CMV Control n=3 15739X3
## AAACGGGAGGCTAGGT-1 SNL_CMV3 9831 2377 CMV Control n=3 15739X3
## AAACGGGGTCAACTGT-1 SNL_CMV3 7756 1734 CMV Control n=3 15739X3
## AAAGATGAGAAACCGC-1 SNL_CMV3 9215 1991 CMV Control n=3 15739X3
SNL_All<-merge(SNL_CCSP, c(SNL_CCSP_Tx, SNL_CMV1, SNL_CMV2, SNL_CMV3, SNL_CMV_Tx))
## Warning in CheckDuplicateCellNames(object.list = objects): Some cell names are
## duplicated across objects provided. Renaming to enforce unique cell names.
SNL_All
## An object of class Seurat
## 32454 features across 24380 samples within 1 assay
## Active assay: RNA (32454 features, 0 variable features)
# Check cell counts per cell of origin infection.
table(SNL_All@meta.data$Cre)
##
## CCSP CMV
## 11690 12690
# Check cell counts per treatment.
table(SNL_All@meta.data$tx)
##
## Control CXCR2i
## 14635 9745
SNL_All[["percent.mt"]] <- PercentageFeatureSet(SNL_All, pattern = "^mt-")
VlnPlot(SNL_All, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3,group.by="Cre", pt.size=0.2)
FeatureScatter(SNL_All, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by="Cre")
# Subsetting quality cells
SNL_All<- subset(SNL_All, subset = nFeature_RNA > 500 & nFeature_RNA < 5000 & percent.mt < 20)
# Check new cell counts post QC.
table(SNL_All@meta.data$Cre)
##
## CCSP CMV
## 8704 11548
table(SNL_All@meta.data$tx)
##
## Control CXCR2i
## 11982 8270
SNL_All
## An object of class Seurat
## 32454 features across 20252 samples within 1 assay
## Active assay: RNA (32454 features, 0 variable features)
SNL_CCSP_1<-subset(SNL_All, idents=c("SNL_CCSP","SNL_CCSP_Tx"))
# Check cell numbers now.
table(SNL_CCSP_1@meta.data$tx)
##
## Control CXCR2i
## 3442 5262
#Ensure subsetting got rid of all CMV samples.
table(SNL_CCSP_1@meta.data$Cre)
##
## CCSP
## 8704
SNL_CCSP_1 <- NormalizeData(SNL_CCSP_1, normalization.method = "LogNormalize", scale.factor = 10000)
SNL_CCSP_1
## An object of class Seurat
## 32454 features across 8704 samples within 1 assay
## Active assay: RNA (32454 features, 0 variable features)
SNL_CCSP_1 <- FindVariableFeatures(SNL_CCSP_1, selection.method = "vst", nfeatures = 2000)
Identify the 10 most highly variable genes features that are highly expressed in some cells, and lowly expressed in others.. often carrying biological significance in R
top10 <- head(VariableFeatures(SNL_CCSP_1), 50)
top10
## [1] "Sftpc" "Sftpa1" "Lyz2" "Scd1" "Sftpb" "Cxcl15"
## [7] "Reg3g" "Tff1" "Chil1" "Cd74" "Slc34a2" "Krt15"
## [13] "Lcn2" "S100a9" "Chil4" "Napsa" "Npc2" "Hp"
## [19] "Dmbt1" "S100a8" "Muc5b" "Hc" "Tff2" "Ppbp"
## [25] "Krt14" "Tm4sf1" "AW112010" "Crct1" "Cxcl2" "Gkn1"
## [31] "Sprr2h" "Krt17" "Reg1" "Apoe" "Scgb3a1" "Ifitm1"
## [37] "Tppp3" "Lyz1" "Krt5" "Lamp3" "Agr2" "Sprr1a"
## [43] "Spp1" "Krtap31-2" "Cxcl3" "Cyp2a5" "Itih4" "Scgb1a1"
## [49] "Fabp5" "Lpcat1"
Shifts the expression of each gene, so that the mean expression across cells is 0. Scales the expression of each gene, so that the variance across cells is 1. This step gives equal weight in downstream analyses, so that highly-expressed genes do not dominate.
all.genes <- rownames(SNL_CCSP_1)
SNL_CCSP_1 <- ScaleData(SNL_CCSP_1, features = all.genes)
## Centering and scaling data matrix
Here we can see top genes driving principal components/data variability in first 5 PCs.
SNL_CCSP_1 <- RunPCA(SNL_CCSP_1, features = VariableFeatures(object = SNL_CCSP_1))
## PC_ 1
## Positive: 2300002M23Rik, Glrx, Cnfn, Gadd45b, Isg15, Csta1, Slc6a14, Fam84a, Ndrg1, Cysrt1
## Ifit1bl1, Arg1, Klk13, Duoxa2, S100a10, Upk1b, Il1a, Gbp4, Jdp2, Spink5
## Herpud1, Plat, Hist1h1c, Il1r2, Pbp2, Prss22, Gm44275, Calml3, Hist1h2ac, Dusp14
## Negative: Ptma, Cd63, Krt8, Rpsa, Rpl3, Ppia, Rps18, Dbi, Eef1a1, Fabp5
## Rps2, Atp1a1, Tagln2, Lgals4, Rps8, Rpl12, Oat, Prdx1, Rpl5, Rps19
## Rpl8, Rack1, Rplp1, Prom1, Rpl13, Rpl14, Rps21, Hsp90ab1, Rpl36a, Rps3a1
## PC_ 2
## Positive: Slc34a2, Lamp3, Napsa, Hc, Sftpb, Cxcl15, Chil1, Lpcat1, Dram1, Cd74
## Scd1, Sftpa1, Lgi3, H2-Aa, Lyz2, Lrp2, Ager, Sfta2, Egfl6, H2-Eb1
## H2-Ab1, Itih4, Ptgs1, Mme, Mlc1, Lrrk2, Fasn, S100g, Sftpc, Ppp1r14c
## Negative: Lgals4, Oit1, Clu, Ctse, Prom1, Agr2, Anxa10, Tff2, Dpcr1, Wfdc1
## Vsig1, Rgs17, Il18, Paqr5, Tff1, 2210407C18Rik, Krt19, Fermt1, Mgst2, Tmsb10
## Prss32, Ces1f, Gsta4, Krt8, Basp1, Fut2, Muc16, Scara3, Pla2g10, Gm3336
## PC_ 3
## Positive: Tpm2, Cldn3, Tnfrsf11b, Cldn4, Tacstd2, Srd5a1, Gapdh, Anxa8, Nupr1, Plat
## Serpinb6b, Akr1b8, Cwh43, Basp1, Srgn, Tnip3, H2afz, Marcksl1, Hk2, Arg1
## Ctsl, Myo1b, Hilpda, Junb, Aldh1a3, Msn, Stom, Calm1, Phlda1, Atox1
## Negative: Tff2, Ern2, Selenop, Agr2, Ces1f, Prom1, Lgals4, Xist, Ctse, Cfi
## Vsig1, 5330417C22Rik, Galnt6, Muc16, Zfp704, Paqr5, Fabp5, Tff1, Smim6, Reg3g
## Ptprn2, Glrx, Gstm1, Dmbt1, Oat, Rgs17, Sult1c2, Scara3, Car2, Itih2
## PC_ 4
## Positive: Napsa, Lamp3, Slc34a2, Cldn18, Hc, Lgi3, Fabp5, S100g, Ctse, Tff1
## Lrp2, Cxcl15, Lyz2, Dpcr1, Scd1, Itih4, Il33, Chil1, Egfl6, Ager
## Apoe, Mlc1, Anxa10, Clu, Mme, Dram1, Fasn, Sftpb, Lpcat1, Sgpp2
## Negative: Ccdc153, Tppp3, Tmem212, Enkur, 1110017D15Rik, Rsph1, Gm19935, Dnah12, Mlf1, Tuba1a
## Cfap206, Cfap126, Mapk15, Mns1, Dnah9, Sec14l3, Gm867, Ccdc113, Lrrc10b, Foxj1
## Ccdc39, Iqcg, 1700007K13Rik, Riiad1, Dnah6, Fam92b, Ccdc17, 4833427G06Rik, Fam183b, Drc3
## PC_ 5
## Positive: Hdc, Chchd10, Cldn3, Emb, Sftpd, Ppp1r14c, Sftpb, Cbr2, Slc34a2, Chil1
## Napsa, Hc, Nupr1, Lamp3, Vpreb3, Wfdc2, Aldh1a1, Sec14l3, Dram1, Tmem212
## Cxcl15, Arg1, Acot1, Krt19, Fasn, Lgi3, Cfap126, Lrp2, Capsl, Lrrc10b
## Negative: Calcrl, Egfl7, Cldn5, Ptprb, Pecam1, Ramp2, Tmem100, Ace, Cav1, Itga1
## Cyyr1, Clec14a, Ctla2a, Acvrl1, Hpgd, Scn7a, Cd36, Cavin2, Cdh5, BC028528
## Aqp1, Thbd, Adgrl4, Gpihbp1, Cd93, S1pr1, Clec1a, Icam2, Tspan7, Klf2
DimPlot(SNL_CCSP_1, reduction = "pca", group.by='tx')+ggtitle("PCA of all cells captured")
Here we see some outlying cells that are likely non-tumor.
Visualize dimensions and choose elbow that captures majority of variance.
ElbowPlot(SNL_CCSP_1, ndims=50)
Choosing 25 dimensions
SNL_CCSP_1 <- FindNeighbors(SNL_CCSP_1, dims = 1:25)
## Computing nearest neighbor graph
## Computing SNN
SNL_CCSP_1<- FindClusters(SNL_CCSP_1, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8704
## Number of edges: 282351
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8395
## Number of communities: 10
## Elapsed time: 1 seconds
SNL_CCSP_1 <- RunUMAP(SNL_CCSP_1, dims = 1:25)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:37:46 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:37:46 Read 8704 rows and found 25 numeric columns
## 16:37:46 Using Annoy for neighbor search, n_neighbors = 30
## 16:37:46 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:37:48 Writing NN index file to temp file /tmp/Rtmpyyhzg4/filea5cc4a2a9832
## 16:37:48 Searching Annoy index using 1 thread, search_k = 3000
## 16:37:51 Annoy recall = 100%
## 16:37:52 Commencing smooth kNN distance calibration using 1 thread
## 16:37:54 Initializing from normalized Laplacian + noise
## 16:37:54 Commencing optimization for 500 epochs, with 367956 positive edges
## 16:38:26 Optimization finished
x<-DimPlot(SNL_CCSP_1, reduction = "umap",label=TRUE,pt.size=.1)+ggtitle("All cells by Cluster ID")
y<-DimPlot(SNL_CCSP_1, reduction = "umap", group.by = "tx",label=FALSE,pt.size=0.1)+ggtitle("All cells by treatment")
multiplot(x,y, cols=2)
Looks like we have some outlying populations, I am going to check cell types with our markers.
more_types<-c("GFP","Sox2", "Nkx2-1", "Trp63", "Krt5","Krt13","Krt4",
"Myc","Yap1","Sox9","Runx2","Sox6","Sp7",
"Vcam1","Fgf10","Dcn","Clec3b","Fbln1", # rpma
"Col14a1", # matrix fibroblast/osteoclastic
"Acta2","Myh11","Tagln","Mustn1", #myofibroblast
"Lpl","Lipa","Pparg","Plin2","Ear1","Fabp1","Spp1", #lipofibroblast/osteoblastic
"Mertk","Marco","Mrc1","Ly75","Adgre1","Itgax","Cd68","Csf1r","Mafb","Msr1","Arg1","Adgre4","Clec4a1", #Macs/Myeloid
"Cx3cr1","Itgam","Cd14", #Monocytes
"S100a9","S100a8","Mmp9","Csf3r","Cxcr2","Ly6g", #Neuts
"Batf3","Xcr1","Clec9a","Ccl17","Ccl22", #DC
"Cd3d","Cd3e","Cd3g","Cd28","Cd8a","Cd4","Foxp3", # Tcell
"Gzma","Ncr1","Gzmb", #NK
"Fcmr","Cd19","Fcer2a","Pax5","Cd22","Cd79b","Cd79a", #B cells
"Slamf7", "Prdm1","Mzb1", #Plasma
"Mcam","Pecam1","Icam2","Cd36","Cd93", #Endothelial
"Scd1","Lamp3","Sftpb","Sftpa1","Aqp5", #Alveolar
"Scgb1a1","Cyp2f2","Scgb3a2","Scgb3a1","Lypd2", #club
"Foxj1","Rfx2","Rfx3","Trp73", #ciliated
"Muc5b","Muc5ac", #goblet
"Krt17","Perp", #basal
"Pou2f3","Ascl2","Rgs13","Il25","Tslp", #tuft
"Foxi1","Cftr","Ascl3", #ionocyte
"Epgn","Slc16a14","Grhl1", #Eosinophils
"Cpa3","Fcer1a","Ms4a2", #Basophils
"Cma1","Tpsb2", #Mast
"Cd200r3" #Combined with Mast, Baso, Eosinophil
)
DotPlot(SNL_CCSP_1,features = more_types, group.by="seurat_clusters")+theme(axis.text.x=element_text(angle=90, hjust=1,size=8), axis.text.y=element_text(hjust=1,size=10))
Looks like cluster 8 is AT2/Club cells/healthy lung. Cluster 9 is enriched for endothelial cell markers like PECAM1, MCAM.
Re-cluster tumor cells without 8 or 9
SNL_CCSP_tumor<-subset(SNL_CCSP_1, idents=c("0","1","2","3","4","5","6", "7"))
table(SNL_CCSP_tumor@meta.data$tx)
##
## Control CXCR2i
## 3349 5201
all.genes <- rownames(SNL_CCSP_tumor)
SNL_CCSP_tumor <- ScaleData(SNL_CCSP_tumor, features = all.genes)
## Centering and scaling data matrix
SNL_CCSP_tumor <- RunPCA(SNL_CCSP_tumor, features = VariableFeatures(object = SNL_CCSP_tumor))
## PC_ 1
## Positive: 2300002M23Rik, Glrx, Cnfn, Gadd45b, Isg15, Csta1, Slc6a14, Fam84a, Ndrg1, S100a10
## Cysrt1, Klk13, Ifit1bl1, Arg1, Duoxa2, Upk1b, Gbp4, Spink5, Il1a, Jdp2
## Herpud1, Il1r2, Pbp2, Hist1h1c, Gm44275, Plat, Prss22, Calml3, Hist1h2ac, Dusp14
## Negative: Krt8, Ptma, Cd63, Rpsa, Rpl3, Ppia, Lgals4, Rps18, Eef1a1, Dbi
## Fabp5, Prom1, Atp1a1, Rps2, Tagln2, Rpl12, Oat, Rps8, Agr2, Prdx1
## Rpl5, Rack1, Rpl8, Rplp1, Hsp90ab1, Cxcl17, Rps19, Rpl13, Rpl14, Ctse
## PC_ 2
## Positive: Tff2, Selenop, Ern2, Agr2, Prom1, Ces1f, Lgals4, Ctse, Xist, Vsig1
## Cfi, Galnt6, Muc16, Paqr5, 5330417C22Rik, Zfp704, Tff1, Gstm1, Fabp5, Reg3g
## Ptprn2, Smim6, Kitl, Glrx, Tcf4, Rgs17, Dmbt1, Sult1c2, Scara3, Oat
## Negative: Tpm2, Cldn3, Tnfrsf11b, Tacstd2, Cldn4, Srd5a1, Gapdh, Anxa8, Nupr1, Plat
## Serpinb6b, Akr1b8, Cwh43, H2afz, Tnip3, Hk2, Srgn, Basp1, Arg1, Marcksl1
## Msn, Ctsl, Myo1b, Junb, Hilpda, Aldh1a3, Atox1, Mif, Calm1, Stom
## PC_ 3
## Positive: Ccdc153, Sec14l3, Tmem212, Enkur, 1110017D15Rik, Lrrc10b, Rsph1, Dnah12, Gm19935, Cfap126
## Cfap206, Mlf1, Mns1, Dnah9, Mapk15, Ccdc113, Tppp3, Gm867, Foxj1, Ccdc39
## Iqcg, 1700007K13Rik, Dynlrb2, Riiad1, Dnah6, 4833427G06Rik, Fam92b, Fam183b, Drc3, Ccdc17
## Negative: Tff1, Ctse, Dpcr1, Anxa10, Lgals4, Clu, Rgs17, Vsig1, Fabp5, Cldn18
## Fut2, Vsig2, Ptprn2, Phgr1, Prss32, Ces1f, Tesc, Paqr5, Oit1, Pla2g10
## Ldhd, Tff2, Ern2, Wtip, Kit, Sytl2, B4galt6, Zfpm1, Lypd8, Gm3336
## PC_ 4
## Positive: Cnfn, Cysrt1, Krt4, Klk13, Gadd45b, Crct1, Krt14, Klk10, Krt13, Csta1
## Ly6g6c, S100a10, Klk14, Dmkn, S100a7a, 2300002M23Rik, 2610528A11Rik, Krt5, Dusp14, Rplp0
## Upk1b, Glrx, Pbp2, Lgals7, Gm44275, Aqp3, Eef1b2, Dapl1, Ndrg1, Cyp2f2
## Negative: Srd5a1, Muc4, Myo1b, Basp1, Tnfrsf11b, Plat, Btg2, Marcksl1, Vtcn1, Aplp2
## Cldn3, Hk2, Clu, Cxcl3, Ifit3, Ifit3b, Cd14, Cxcl5, Slc26a4, Akr1b8
## Sorl1, Scin, Cdh1, Tnip3, Cwh43, Fam46a, Sprr2f, Prss22, Ces1f, Nupr1
## PC_ 5
## Positive: Cbr2, Ces1d, Tmem176b, Mgst1, Tmem176a, Cyp2f2, Chad, Hp, Ldhb, Cp
## Lrg1, Gsta3, 5330417C22Rik, Selenbp1, Ifitm3, Rassf9, Car8, Gas6, Gabrp, Wnt4
## Dcxr, Aldh2, Them5, Adh7, Acsl1, Ifitm1, C3, Anpep, Foxp2, Lbp
## Negative: Dpcr1, Gldc, Tff1, Cldn18, Il33, Ndnf, Cftr, B4galt6, Fst, Avil
## Areg, Kit, Anxa10, Bok, Emp3, Ereg, Bmyc, Krt20, Gcnt1, Car9
## Arhgdig, Nedd9, Asns, Cela1, Samd5, Otop3, Cldn2, Prss12, Nptx2, Pkib
DimPlot(SNL_CCSP_tumor, reduction = "pca", group.by='tx')+ggtitle("PCA of Tumor Cells Only")
Looks much better, more continous changes.
ElbowPlot(SNL_CCSP_tumor, ndims=50)
Choosing ~15 dims which captures majority of variance
SNL_CCSP_tumor <- FindNeighbors(SNL_CCSP_tumor, dims = 1:15)
## Computing nearest neighbor graph
## Computing SNN
SNL_CCSP_tumor<- FindClusters(SNL_CCSP_tumor, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8550
## Number of edges: 269355
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8397
## Number of communities: 9
## Elapsed time: 1 seconds
SNL_CCSP_tumor <- RunUMAP(SNL_CCSP_tumor, dims = 1:15)
## 16:39:09 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:39:09 Read 8550 rows and found 15 numeric columns
## 16:39:09 Using Annoy for neighbor search, n_neighbors = 30
## 16:39:09 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:39:12 Writing NN index file to temp file /tmp/Rtmpyyhzg4/filea5cc7b626d52
## 16:39:12 Searching Annoy index using 1 thread, search_k = 3000
## 16:39:15 Annoy recall = 100%
## 16:39:16 Commencing smooth kNN distance calibration using 1 thread
## 16:39:18 Initializing from normalized Laplacian + noise
## 16:39:18 Commencing optimization for 500 epochs, with 348698 positive edges
## 16:39:49 Optimization finished
x<-DimPlot(SNL_CCSP_tumor, reduction = "umap",label=TRUE,pt.size=.1)+ggtitle("Tumor cells by Cluster ID")
y<-DimPlot(SNL_CCSP_tumor, reduction = "umap", group.by = "tx",label=FALSE,pt.size=0.1)+ggtitle("Tumor cells by treatment")
multiplot(x, y, cols=2)
DotPlot(SNL_CCSP_tumor,features = more_types, group.by="seurat_clusters")+ theme(axis.text.x=element_text(angle=90, hjust=1,size=8), axis.text.y=element_text(hjust=1,size=10))
Looks good for now. Moving forward.
library(hciR)
library(hciRdata)
s.genes<-cc.genes$s.genes
g2m.genes<-cc.genes$g2m.genes
s.genes.list<-subset(mouse94, mouse94$human_homolog %in% s.genes)
s.genes.mouse<-(s.genes.list$gene_name)
s.genes.mouse
## [1] "Cdc45" "Uhrf1" "Mcm2" "Slbp" "Mcm5" "Pola1"
## [7] "Gmnn" "Cdc6" "Rrm2" "Atad2" "Dscc1" "Mcm4"
## [13] "Chaf1b" "Rfc2" "Msh2" "Fen1" "Hells" "Prim1"
## [19] "Tyms" "Mcm6" "Wdr76" "Rad51" "Pcna" "Ccne2"
## [25] "Casp8ap2" "Usp1" "Nasp" "Rpa2" "Ung" "Rad51ap1"
## [31] "Blm" "Pold3" "Rrm1" "Gins2" "Tipin" "Brip1"
## [37] "Dtl" "Exo1" "Ubr7" "Clspn" "E2f8" "Cdca7"
g2m.genes.list<-subset(mouse94, mouse94$human_homolog %in% g2m.genes)
g2m.genes.mouse<-(g2m.genes.list$gene_name)
g2m.genes.mouse
## [1] "Ube2c" "Lbr" "Ctcf" "Cdc20" "Cbx5" "Kif11" "Anp32e"
## [8] "Birc5" "Cdk1" "Tmpo" "Hmmr" "Aurkb" "Top2a" "Gtse1"
## [15] "Rangap1" "Cdca3" "Ndc80" "Kif20b" "Cenpf" "Nek2" "Nuf2"
## [22] "Nusap1" "Bub1" "Tpx2" "Aurka" "Ect2" "Cks1b" "Kif2c"
## [29] "Cdca8" "Cenpa" "Mki67" "Ccnb2" "Kif23" "Smc4" "G2e3"
## [36] "Tubb4b" "Anln" "Tacc3" "Dlgap5" "Ckap2" "Ncapd2" "Ttk"
## [43] "Ckap5" "Cdc25c" "Hjurp" "Cenpe" "Ckap2l" "Cdca2" "Hmgb2"
## [50] "Cks2" "Psrc1" "Gas2l3"
SNL_CCSP_tumor <- CellCycleScoring(SNL_CCSP_tumor, s.features = s.genes.mouse, g2m.features = g2m.genes.mouse)
DimPlot(SNL_CCSP_tumor, reduction = "umap", group.by = "Phase", pt.size=.2)+ggtitle("Tumor cells by Cell Cycle Phase")
Does not look like it. Let’s take a more quantitative look…
proportions <- as.data.frame(100*prop.table(x = table(SNL_CCSP_tumor$seurat_clusters, SNL_CCSP_tumor$Phase), margin = 2))
colnames(proportions)<-c("Cluster", "Sample", "Frequency")
library(ggpubr)
ggbarplot(proportions, x="Sample", y="Frequency", fill = "Sample", color="black",
group = "Sample", ylab = "Frequency (percent)", xlab="",
# palette = sample_cols
)+
theme_bw()+
facet_wrap(facets = "Cluster", scales="free_y", ncol =4)+
theme(axis.text.y = element_text(size=12)) +
rotate_x_text(angle = 45)
Clusters aren’t being determined by cell cycle phase. Moving on to DEG analyses.
proportions <- as.data.frame(100*prop.table(x = table(SNL_CCSP_tumor$seurat_clusters, SNL_CCSP_tumor$tx), margin = 2))
colnames(proportions)<-c("Cluster", "Sample", "Frequency")
library(ggpubr)
ggbarplot(proportions, x="Sample", y="Frequency", fill = "Sample", color="black",
group = "Sample", ylab = "Frequency (percent)", xlab="",
# palette = sample_cols
)+
theme_bw()+
facet_wrap(facets = "Cluster", scales="free_y", ncol =4)+
theme(axis.text.y = element_text(size=12)) +
rotate_x_text(angle = 45)
Downsampling to 100 cells per cluster, and top 50 genes per cluster. To see all, see excel file.
Cluster.Markers <- FindAllMarkers(SNL_CCSP_tumor, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
test<-as.data.frame(Cluster.Markers)
write.csv(test, "~/SNL CCSP Tumor Cluster Marker Genes.csv")
top100 <- Cluster.Markers %>% group_by(cluster) %>% top_n(n = 50, wt = avg_log2FC)
top100_names<-top100$gene
DoHeatmap(subset(SNL_CCSP_tumor, downsample = 100), features = top100_names, size = 3) + NoLegend()
DT::datatable(
Cluster.Markers %>%
mutate(gene=rownames(.)) %>%
group_by(cluster) %>%
top_n(n = 50, wt = avg_log2FC) %>%
select(gene, everything())
)
Downsampling to 2000 cells per cluster, and top 100 genes per cluster. To see all, see excel file.
Idents(SNL_CCSP_tumor)<-"tx"
Tx.Markers <- FindAllMarkers(SNL_CCSP_tumor, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster Control
## Calculating cluster CXCR2i
test<-as.data.frame(Tx.Markers)
write.csv(test, "~/Tx.Markers SNL CCSP Tumor cells.csv")
top100 <- Tx.Markers %>% group_by(cluster) %>% top_n(n = 100, wt = avg_log2FC)
top100_names<-top100$gene
DoHeatmap(subset(SNL_CCSP_tumor, downsample = 2000), features = top100_names, size = 3, group.colors=c("orchid4","orange"))
DT::datatable(
Tx.Markers %>%
mutate(gene=rownames(.)) %>%
group_by(cluster) %>%
top_n(n = 100, wt = avg_log2FC) %>%
select(gene, everything())
)
saveRDS(SNL_CCSP_tumor,“/scratch/Aireland/SNL for R01 Analysis/full_processed_SNL_CCSP_tumor.rds”)