Load packages

suppressPackageStartupMessages({ 
  library(BiocFileCache)
  library(scran)
  library(scater)
  library(tidyverse)
  library(SingleCellExperiment)
  library(DropletUtils)
  library(ggplot2)
  library(cowplot)
  library(Seurat)
  library(monocle3)
  library(SeuratObject)
})

Read in all tumor samples, combined from previous analysis.

SNL_All<-readRDS("/scratch/Aireland/SNL for R01 Analysis/SNL_All tumor seurat object QC.rds")

Subset because 15739 experiment is outlier

SNL_Tumor<-subset(SNL_All,idents=c("SNL_CCSP","SNL_CCSP_Tx","SNL_CMV_Tx","SNL_CMV1"))
table(SNL_Tumor@meta.data$Cre)
## 
## CCSP  CMV 
## 8704 6901
table(SNL_Tumor@meta.data$tx)
## 
## Control  CXCR2i 
##    7335    8270

Normalize data

SNL_Tumor <- NormalizeData(SNL_Tumor, normalization.method = "LogNormalize", scale.factor = 10000)
SNL_Tumor
## An object of class Seurat 
## 32454 features across 15605 samples within 1 assay 
## Active assay: RNA (32454 features, 0 variable features)

Identify Highly variable features

SNL_Tumor <- FindVariableFeatures(SNL_Tumor, selection.method = "vst", nfeatures = 2000)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(SNL_Tumor), 50)
top10
##  [1] "Sftpc"         "Sftpa1"        "Lyz2"          "Scd1"         
##  [5] "Sftpb"         "Gkn2"          "Gkn1"          "Cxcl15"       
##  [9] "Chil3"         "S100a9"        "Krt15"         "Lcn2"         
## [13] "Slc34a2"       "Wfdc17"        "Spp1"          "Chil1"        
## [17] "Chil4"         "Ccl6"          "Reg3g"         "Ppbp"         
## [21] "S100a8"        "Crct1"         "Cd74"          "Napsa"        
## [25] "Tyrobp"        "Krt14"         "Sprr2h"        "Apoe"         
## [29] "Scgb1a1"       "Scgb3a1"       "Prap1"         "Hp"           
## [33] "Ctss"          "Cym"           "Hc"            "Fcer1g"       
## [37] "Lyz1"          "9130204L05Rik" "Krt17"         "Cxcl3"        
## [41] "AU040972"      "Wfdc12"        "Cxcl2"         "Sprr1a"       
## [45] "Ifitm1"        "Ambp"          "Krtap3-2"      "Rgs13"        
## [49] "AW112010"      "Reg1"

Scale data

all.genes <- rownames(SNL_Tumor)
SNL_Tumor <- ScaleData(SNL_Tumor, features = all.genes)
## Centering and scaling data matrix

Perform linear dimension reduction/ PCA

SNL_Tumor <- RunPCA(SNL_Tumor, features = VariableFeatures(object = SNL_Tumor))
## PC_ 1 
## Positive:  Ptma, Krt8, Lgals4, Fabp5, Tagln2, Tff2, Wfdc2, Agr2, Gnb2l1, Gsta4 
##     Crip1, Gstm1, Prom1, Ftl1, Sepp1, Krt18, Cldn18, Car2, Tff1, Dpcr1 
##     Prdx6, Oat, Ctse, Sftpd, Anxa10, Wbp5, Tmem176b, Il18, Chchd10, Gpx2 
## Negative:  Clca3b, Psca, Gm9573, S100a14, Prss27, Cited2, Plpp1, Nccrp1, Mxd1, 2300002M23Rik 
##     Emp1, Tmprss11g, Ctsb, Irf7, Cdkn2b, Lpin2, Plet1, Arg1, Lgals3, Map2k3 
##     Mal, Gm42418, Slc6a14, Mall, Tcp11l2, Cnfn, Itgam, Ifit1bl1, Isg15, Ppp1r15a 
## PC_ 2 
## Positive:  Dpcr1, Lgals4, Tff1, Anxa10, Agr2, Ctse, 2210407C18Rik, Tff2, Gnb2l1, Phgr1 
##     Il18, Gsta4, Sult1c2, Rgs17, Gsta1, Gm3336, Sptssb, Sepp1, Car2, Krt18 
##     Pglyrp1, Spink4, Csrp2, Prom1, Ccnd2, Hn1, Krt8, Iyd, Aldh3a1, Gmds 
## Negative:  Slc34a2, Sftpb, Napsa, Sftpa1, Lamp3, Lyz2, Hc, Dram1, H2-Aa, Scd1 
##     Lpcat1, Cxcl15, Chil1, Ager, Sftpc, H2-Ab1, Lgi3, Cd74, H2-Eb1, Apoe 
##     Egfl6, Lrp2, Ppp1r14c, Ptgs1, Itih4, Lrrk2, Lcn2, Sfta2, Mme, Mlc1 
## PC_ 3 
## Positive:  Basp1, Tpm2, Tnfrsf11b, Nupr1, Gapdh, Tacstd2, Anxa8, Cldn3, Cldn4, Pmepa1 
##     Srd5a1, Krt20, Serpinb6b, Jpt1, Phlda1, Prss12, Muc4, Cwh43, Fam213a, Rack1 
##     Junb, H2afz, Plat, Tnip3, Adam28, Akr1b8, Dhrs1, Urah, Nectin2, Hk2 
## Negative:  Sepp1, Tcp11l2, Glrx, Lrg1, Gas6, Tff2, Chil1, Dclk1, Klk14, Cxcl15 
##     Car2, Nkd1, Pla2g1b, Rgs13, Cnfn, Ly6g6f, Selm, Napsa, Cfi, Sftpb 
##     Sox9, Espn, Sh2d6, 2300002M23Rik, Hepacam2, Slc34a2, Lamp3, Hc, AU021092, Aqp5 
## PC_ 4 
## Positive:  Slc34a2, Sftpb, Napsa, Lamp3, Cxcl15, Hc, Sftpa1, Sftpc, Lgi3, Ager 
##     Sfta2, Sftpd, Lpcat1, Egfl6, Scd1, Lrp2, Itih4, Mme, Fasn, Wfdc2 
##     Mlc1, Rbpjl, Tspan11, Dram1, Lcn2, Ppp1r14c, Clip4, Etv5, Apoc1, Enpep 
## Negative:  Alox5ap, Ltc4s, Rgs13, Dclk1, Fyb, Ly6g6f, Hck, Sh2d6, Hepacam2, Rac2 
##     Gng13, Rgs2, Cd300lf, Sh2d7, Trpm5, Matk, Pik3r5, Selm, Kctd12, Lrmp 
##     Hpgds, Vav1, Espn, Nrgn, Hmx2, Dpysl2, Ptpn6, Mctp1, Etv1, Bmx 
## PC_ 5 
## Positive:  Lyz2, Fxyd5, Tyrobp, Lgals1, Fcer1g, Ctss, Cd302, Wfdc17, Cd53, Cybb 
##     Vim, Atp6v0d2, Gngt2, Ccl6, Bcl2a1b, Mpeg1, Emp3, Gpnmb, Chil3, Trem2 
##     Lilrb4a, Fabp4, Plek, Apoe, Cd52, Mcemp1, Hebp1, Cd84, Sirpa, Dab2 
## Negative:  Ccdc153, 1110017D15Rik, Dynlrb2, AU040972, Tmem212, Cfap126, Fam183b, Mlf1, Sec14l3, Sntn 
##     1700016K19Rik, Dnah12, 1700007K13Rik, Rsph1, Ccdc113, Riiad1, Calml4, Tctex1d4, Meig1, Tuba1a 
##     Gm867, Efcab10, Arhgdig, Foxj1, 1700001C02Rik, Dnah5, Prr29, Nme5, Cfap45, Ccdc78
# PCA plot of PC1, PC2 
DimPlot(SNL_Tumor, reduction = "pca", group.by='tx')+ggtitle("PCA of all CCSP and CMV cells captured")

DimPlot(SNL_Tumor, reduction = "pca", group.by='Cre')+ggtitle("PCA of all CCSP and CMV cells captured by Cre")

DimPlot(SNL_Tumor, reduction = "pca", group.by='gnomex')+ggtitle("PCA of all CCSP and CMV cells captured by experiment")

Determine the dimensionality for clustering

# Use elbow plot to visualize dimensions that encompass most of variance
ElbowPlot(SNL_Tumor, ndims=50)

# Choosing 25 PCs for dimensionality
SNL_Tumor <- FindNeighbors(SNL_Tumor, dims = 1:25)
## Computing nearest neighbor graph
## Computing SNN
SNL_Tumor<- FindClusters(SNL_Tumor, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 15605
## Number of edges: 513661
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8882
## Number of communities: 15
## Elapsed time: 2 seconds

Run Non-Linear Dimension reduction (tSNE/UMAP)

SNL_Tumor <- RunUMAP(SNL_Tumor, dims = 1:25)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 15:32:24 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:32:24 Read 15605 rows and found 25 numeric columns
## 15:32:24 Using Annoy for neighbor search, n_neighbors = 30
## 15:32:24 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:32:27 Writing NN index file to temp file /tmp/RtmpCk0sii/file902035e77a40
## 15:32:27 Searching Annoy index using 1 thread, search_k = 3000
## 15:32:33 Annoy recall = 100%
## 15:32:34 Commencing smooth kNN distance calibration using 1 thread
## 15:32:36 Initializing from normalized Laplacian + noise
## 15:32:37 Commencing optimization for 200 epochs, with 642628 positive edges
## 15:33:00 Optimization finished
# Plot UMAP
DimPlot(SNL_Tumor, reduction = "umap",label=TRUE,pt.size=.1)+ggtitle("All cells by cluster ID")

DimPlot(SNL_Tumor, reduction = "umap", group.by = "tx",label=FALSE,pt.size=0.1)+ggtitle("All cells by treatment")

DimPlot(SNL_Tumor, reduction = "umap", group.by = "Cre",label=FALSE,pt.size=0.1)+ggtitle("All cells by Cre")

DimPlot(SNL_Tumor, reduction = "umap", group.by = "gnomex",label=FALSE,pt.size=0.1)+ggtitle("All cells by experiment")

Many outlying clusters, lets do some cell type ID.

Cell type ID with Dotplot

### Assign cell types per cluster using Dotplots of common cell markers

more_types<-c("GFP","Sox2", "Nkx2-1", "Trp63", "Krt5","Krt13","Krt4",
              "Myc","Yap1","Sox9","Runx2","Sox6","Sp7",
              "Vcam1","Fgf10","Dcn","Clec3b","Fbln1",  # rpma
              "Col14a1", # matrix fibroblast/osteoclastic
              
              "Acta2","Myh11","Tagln","Mustn1", #myofibroblast
              "Lpl","Lipa","Pparg","Plin2","Ear1","Fabp1","Spp1", #lipofibroblast/osteoblastic
              
              "Mertk","Marco","Mrc1","Ly75","Adgre1","Itgax","Cd68","Csf1r","Mafb","Msr1","Arg1","Adgre4","Clec4a1", #Macs/Myeloid
              "Cx3cr1","Itgam","Cd14", #Monocytes
              "S100a9","S100a8","Mmp9","Csf3r","Cxcr2","Ly6g", #Neuts
              "Batf3","Xcr1","Clec9a","Ccl17","Ccl22", #DC
              
              
              "Cd3d","Cd3e","Cd3g","Cd28","Cd8a","Cd4","Foxp3", # Tcell
              
              "Gzma","Ncr1","Gzmb", #NK
              
              "Fcmr","Cd19","Fcer2a","Pax5","Cd22","Cd79b","Cd79a", #B cells
              "Slamf7", "Prdm1","Mzb1", #Plasma
              
              "Mcam","Pecam1","Icam2","Cd36","Cd93", #Endothelial
              
              "Scd1","Lamp3","Sftpb","Sftpa1","Aqp5", #Alveolar
              "Scgb1a1","Cyp2f2","Scgb3a2","Scgb3a1","Lypd2", #club
              "Foxj1","Rfx2","Rfx3","Trp73", #ciliated
              "Muc5b","Muc5ac", #goblet
              "Krt17","Perp", #basal
              "Pou2f3","Ascl2","Rgs13","Il25","Tslp", #tuft
              "Foxi1","Cftr","Ascl3", #ionocyte
              "Epgn","Slc16a14","Grhl1", #Eosinophils
              "Cpa3","Fcer1a","Ms4a2", #Basophils
              "Cma1","Tpsb2", #Mast
              "Cd200r3" #Combined with Mast, Baso, Eosinophil
)

DotPlot(SNL_Tumor,features = more_types, group.by="seurat_clusters")+theme(axis.text.x=element_text(angle=90, hjust=1,size=8), axis.text.y=element_text(hjust=1,size=10))

Cluster 9 is tuft

Cluster 10 is normal AT2/Club cells

Cluster 12 is endothelial cells

Cluster 13 is myeloid/macs

Cluster 14 is ciliated

Subset down to just tumor cell clusters

SNL_Tumor<-subset(SNL_Tumor, idents=c("0","1","2","3","4","5","6", "7","8","11"))

#Check new cell numbers
table(SNL_Tumor@meta.data$tx)
## 
## Control  CXCR2i 
##    7050    8014
table(SNL_Tumor@meta.data$Cre)
## 
## CCSP  CMV 
## 8516 6548

Re-clustering, PCA, UMAP

#Perform linear dimension reduction/ PCA
SNL_Tumor <- RunPCA(SNL_Tumor, features = VariableFeatures(object = SNL_Tumor))
## Warning in PrepDR(object = object, features = features, verbose = verbose): The
## following 86 features requested have zero variance (running reduction without
## them): Osm, Gm21149, Foxf1, Il7r, C3ar1, Clec4a2, Penk, Ccdc121, Sirpb1c, Dpt,
## 1700012B09Rik, Drc7, Rgs22, Jam3, Slamf7, Loxl1, Elmod1, Speer4c, Gm34059,
## Gm21541, Ear6, Gm281, Fcgr4, Cd200r4, Ccr1, Slc36a2, Nek10, Fpr1, Ccdc13, Wif1,
## Dthd1, Fendrr, Zfp474, 4933406B17Rik, Pdgfrb, Mlana, Ptx3, Ms4a4b, Dnah11,
## Col5a2, Tfec, Gm47969, Cd300ld, Gm14461, Syt10, T2, Lrrc43, Fpr2, Gm5483, Wdr95,
## Frmpd2, Zbbx, D830026I12Rik, Cd3d, Gpm6a, Dydc2, Gm13571, AC126280.1, Nkg7,
## 5330413P13Rik, Gm7361, Speer4b, Tbx3os1, Gm28729, Ptpn7, Ptger3, Adcy4, Fam150a,
## Siglecf, Tmem252, Msr1, Scimp, Hgf, Irg1, F630028O10Rik, Gm9733, Klra3, Xrra1,
## Spag11b, Zfp366, Adamts20, Ccbe1, Ankub1, Pramef12, H2-Eb2, Fam181a
## PC_ 1 
## Positive:  Clca3b, Psca, Gm9573, S100a14, Prss27, Cited2, Plpp1, Nccrp1, Mxd1, 2300002M23Rik 
##     Tmprss11g, Emp1, Ctsb, Irf7, Lpin2, Cdkn2b, Plet1, Arg1, Gm42418, Map2k3 
##     Lgals3, Mal, Slc6a14, Mall, Tcp11l2, Itgam, Cnfn, Ifit1bl1, Isg15, Ppp1r15a 
## Negative:  Ptma, Lgals4, Krt8, Fabp5, Tff2, Tagln2, Wfdc2, Agr2, Gsta4, Gnb2l1 
##     Prom1, Gstm1, Krt18, Dpcr1, Tff1, Sepp1, Crip1, Car2, Ctse, Cldn18 
##     Ftl1, Oat, Anxa10, Prdx6, Sftpd, Wbp5, Il18, Gpx2, 2210407C18Rik, Ccnd2 
## PC_ 2 
## Positive:  Basp1, Tpm2, Tnfrsf11b, Cldn3, Nupr1, Gapdh, Cldn4, Pmepa1, Tacstd2, Srd5a1 
##     Rack1, Jpt1, Anxa8, Muc4, Serpinb6b, Nectin2, Prss12, Cwh43, Phlda1, Selenof 
##     Krt20, Tnip3, Plat, Cebpd, Junb, Hk2, Urah, Rtraf, Adam28, Rex1bd 
## Negative:  Sepp1, Tcp11l2, Glrx, Tff2, Car2, Cnfn, Klk14, Gas6, Gstm1, 2300002M23Rik 
##     Cfi, Plpp1, Pbp2, Lrg1, AU021092, Gm44275, Dusp14, Sult1c2, Csta1, Gbp4 
##     Crybg2, Cnst, Pla2g1b, Spns3, Gm3336, Nccrp1, Fam84a, Ihh, Klk13, Dmbt1 
## PC_ 3 
## Positive:  Birc5, Ube2c, Cenpa, 2810417H13Rik, Cdca3, Mki67, Cenpf, Ccna2, Cdca8, Tpx2 
##     Ccnb2, Stmn1, Cdc20, Knstrn, Hmmr, Pbk, Racgap1, Cenpe, Cks2, Top2a 
##     Ccnb1, Lockd, Cdkn3, Prc1, Cdk1, Spc24, Nusap1, Spc25, Ckap2, Cenpm 
## Negative:  5330417C22Rik, Cbr2, mt-Nd2, AC149090.1, Btg2, Rack1, Lrg1, Pmepa1, Selenop, Ces1d 
##     Cfi, Chad, Igfbp5, Gda, Ces1f, Tff2, Cebpd, Muc4, Txnip, Gas6 
##     Lars2, Car8, Selenof, Vegfa, Nectin2, Mettl26, mt-Nd3, Sox2, Mfsd4a, Pla2g1b 
## PC_ 4 
## Positive:  Pkib, Tm4sf1, Eif4ebp1, Mthfd2, Areg, Tnfrsf12a, Fst, Serpinb9, Il33, Rhoc 
##     Ffar4, Phgdh, Asns, Ly6g6c, Plaur, Gldc, Zfos1, Ptrh1, 8430408G22Rik, Dhrs1 
##     Msln, Anxa10, F3, Ugp2, Tgif1, H2afz, Serpinb6b, Upk3bl, Krt7, 1810011O10Rik 
## Negative:  Birc5, Mki67, Cenpf, Ccna2, Cenpa, Tpx2, Selenop, Ube2c, 2810417H13Rik, Top2a 
##     Cenpe, Rack1, AC149090.1, Gm42418, Cdc20, Racgap1, Cdca8, Cdca3, Hmmr, Pbk 
##     mt-Nd2, Txnip, Ccnb1, Lars2, Nusap1, 5330417C22Rik, mt-Nd3, Atp5o.1, Prc1, Selenof 
## PC_ 5 
## Positive:  Rack1, Selenop, Gm47283, Mettl26, AC149090.1, Tceal9, Selenow, Gm42418, Pdia4, Cenpx 
##     Cavin1, Rflnb, Lars2, Atp5o.1, Selenos, Tcim, Selenof, Mrpl58, Kmt5a, Gm26917 
##     AY036118, Erg28, Bex3, Wtip, Mfsd4a, Sqor, Kit, Fam241a, Selenoh, Myof 
## Negative:  Sepp1, Srd5a1, Wbp5, Gnb2l1, Ifit3, Plat, Plet1, Ifit3b, Pla2g1b, Marcksl1 
##     Cxcl3, Akr1b8, Urah, Prss22, Tacstd2, Duoxa2, Vnn1, Ttr, Cd14, Ifi204 
##     Hn1, Sprr2f, Trim15, Ifit1, Ifit1bl1, Tnfrsf11b, Hilpda, Jun, Slfn2, Junb
# PCA plot of PC1, PC2 

DimPlot(SNL_Tumor, reduction = "pca", group.by='tx')+ggtitle("PCA of tumor cells by treatment")

DimPlot(SNL_Tumor, reduction = "pca", group.by='Cre')+ggtitle("PCA of tumor cells by Cre")

# Determine the dimensionality for clustering

# Use elbow plot to visualize dimensions that encompass most of variance
ElbowPlot(SNL_Tumor, ndims=50)

# shows ~15 dims captures most of variance

Run Non-Linear Dimension reduction (tSNE/UMAP)

SNL_Tumor <- FindNeighbors(SNL_Tumor, dims = 1:15)
## Computing nearest neighbor graph
## Computing SNN
SNL_Tumor<- FindClusters(SNL_Tumor, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 15064
## Number of edges: 482325
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8790
## Number of communities: 11
## Elapsed time: 2 seconds
SNL_Tumor <- RunUMAP(SNL_Tumor, dims = 1:15)
## 15:33:39 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:33:39 Read 15064 rows and found 15 numeric columns
## 15:33:39 Using Annoy for neighbor search, n_neighbors = 30
## 15:33:39 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:33:41 Writing NN index file to temp file /tmp/RtmpCk0sii/file902015c2f8a4
## 15:33:41 Searching Annoy index using 1 thread, search_k = 3000
## 15:33:48 Annoy recall = 100%
## 15:33:49 Commencing smooth kNN distance calibration using 1 thread
## 15:33:52 Initializing from normalized Laplacian + noise
## 15:33:52 Commencing optimization for 200 epochs, with 608568 positive edges
## 15:34:16 Optimization finished
# Plot UMAP
x<-DimPlot(SNL_Tumor, reduction = "umap",label=TRUE,pt.size=.1)+ggtitle("Tumor cells by cluster ID")
y<-DimPlot(SNL_Tumor, reduction = "umap", group.by = "tx",label=FALSE,pt.size=0.1)+ggtitle("Tumor cells by treatment")
z<-DimPlot(SNL_Tumor, reduction = "umap", group.by = "Cre",label=FALSE,pt.size=0.1)+ggtitle("Tumor cells by Cre")
multiplot(x,y,z,cols=3)

Differences are mainly driven by Cre.

Double check cell type ID

DotPlot(SNL_Tumor,features = more_types, group.by="seurat_clusters")+ theme(axis.text.x=element_text(angle=90, hjust=1,size=8), axis.text.y=element_text(hjust=1,size=10))

Looks good, moving on. Cell of origin make it quite different!

What are DEGs between CMV vs CCSP-Initiated?

Do they make sense biologically?

DEGs between CMV and CCSP

Idents(SNL_Tumor)<-"Cre"
Cre.Markers <- FindAllMarkers(SNL_Tumor, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster CCSP
## Calculating cluster CMV
test<-as.data.frame(Cre.Markers)
write.csv(test, "~/Cre.Markers SNL All Tumor cells.csv")

top100 <- Cre.Markers  %>% group_by(cluster) %>% top_n(n = 100, wt = avg_log2FC)
top100_names<-top100$gene
top100_names
##   [1] "Gm42418"       "Clca3b"        "Ecm1"          "Sprr2d"       
##   [5] "Ltf"           "2300002M23Rik" "Ly6a"          "Gm9573"       
##   [9] "Sprr2f"        "Psca"          "Arg1"          "Fth1"         
##  [13] "Cnfn"          "Emp1"          "Slc6a14"       "AY036118"     
##  [17] "Lars2"         "Prss27"        "Ifit1bl1"      "Trp53inp2"    
##  [21] "Cited2"        "Selenok"       "Hopx"          "Nccrp1"       
##  [25] "AA467197"      "S100a14"       "Slc39a4"       "Mxd1"         
##  [29] "Elob"          "Spns2"         "Plat"          "Tmprss11g"    
##  [33] "Krt13"         "Plpp1"         "Lpin2"         "Mal"          
##  [37] "Csta1"         "Glrx"          "Ctsb"          "Prss22"       
##  [41] "Lgals3"        "Srgn"          "Cdkn2b"        "Isg15"        
##  [45] "Plet1"         "AI661453"      "Rack1"         "Map1lc3b"     
##  [49] "Zfand5"        "Upk1b"         "Irf7"          "Map2k3"       
##  [53] "Gadd45b"       "Malat1"        "Klk13"         "Duoxa2"       
##  [57] "Gsto1"         "Malt1"         "Pglyrp1"       "Neat1"        
##  [61] "Trim25"        "Cysrt1"        "Tcp11l2"       "Krt7"         
##  [65] "Timp2"         "Krt80"         "Dusp5"         "Krt6a"        
##  [69] "Mall"          "Calml3"        "Rdh10"         "Slc16a11"     
##  [73] "Arid5b"        "Gbp4"          "Herpud1"       "Epha2"        
##  [77] "Prss8"         "Hist1h2bc"     "Tuba1c"        "Itgam"        
##  [81] "Txnrd1"        "Irf1"          "Tspan1"        "Fam84a"       
##  [85] "Dap"           "Ndrg1"         "Mbp"           "Prdx5"        
##  [89] "Elf3"          "Spns3"         "Sem1"          "Gde1"         
##  [93] "Dusp1"         "Ppp1r15a"      "Pmaip1"        "Tmbim4"       
##  [97] "Jdp2"          "Ttc9"          "Hist1h1c"      "S100a10"      
## [101] "Rpl13a"        "Tff1"          "Sepp1"         "Gkn1"         
## [105] "Tff2"          "Gnb2l1"        "Gkn2"          "Agr2"         
## [109] "Rps6"          "Rps19"         "2210407C18Rik" "Fabp5"        
## [113] "Rpl3"          "Rps18"         "Cldn18"        "Tceb2"        
## [117] "Rpl14"         "Prdx1"         "Crip1"         "Rps17"        
## [121] "Dpcr1"         "Cd63"          "H2afz"         "Rpl4"         
## [125] "Ppia"          "Smim6"         "Rpl31"         "Clu"          
## [129] "Shfm1"         "Ptma"          "Car2"          "Tmsb10"       
## [133] "Lgals4"        "Rps4x"         "Rpl36a"        "Wfdc1"        
## [137] "Rps5"          "Rpl32"         "Rpl35"         "Tm4sf1"       
## [141] "Selk"          "Rps15a"        "Gstm1"         "Rpl18a"       
## [145] "Rps23"         "Rps25"         "Rpl10"         "Tagln2"       
## [149] "Rpl36al"       "Spink4"        "Gsta4"         "Rps14"        
## [153] "Cxcl17"        "Rps15"         "Rpl22l1"       "Wbp5"         
## [157] "Rpl37a"        "Rps11"         "Rplp2"         "Rps3"         
## [161] "Dbi"           "Rps2"          "Il33"          "Atp5o"        
## [165] "Rpl39"         "Rpl8"          "Rps16"         "Eef1a1"       
## [169] "Anxa10"        "Sep15"         "Rpl13"         "Wfdc2"        
## [173] "Rpl23a"        "Eef1g"         "Krt8"          "Xist"         
## [177] "Rpl26"         "Rps8"          "Cox6a1"        "Slc25a5"      
## [181] "Uqcrb"         "Rps3a1"        "Lrg1"          "Rpl10a"       
## [185] "Rps27l"        "Gsta1"         "Rps27"         "Rps27a"       
## [189] "Aqp5"          "Rpl21"         "Rpl37"         "Rps28"        
## [193] "Rpl35a"        "Rpl11"         "Gas6"          "Krt18"        
## [197] "Dynll1"        "Gltscr2"       "Phgr1"         "Tspan8"
DoHeatmap(subset(SNL_Tumor, downsample = 3000), features = top100_names, size = 3)

Visualize top 100 Marker genes in CMV vs CCSP initated tumor cells

DT::datatable(
  Cre.Markers %>%
    mutate(gene=rownames(.)) %>%
    group_by(cluster) %>%
    top_n(n = 100, wt = avg_log2FC) %>%
    select(gene, everything())
  
)

Quick save to return

saveRDS(SNL_Tumor,“/scratch/Aireland/SNL for R01 Analysis/SNL_All_Tumor processed seurat.rds”)

Now visualize key tumor markers and hillock signatures

library(viridis)
## Loading required package: viridisLite
SNL_tumor<-SNL_Tumor

Add adeno vs mucinous adeno vs squamous lung cancer cell signatures

nsclc <-read.csv(file="~/NSCLC_sigs.csv")
muc_adeno<-nsclc$Mucinous.Adeno

library(hciR)
library(hciRdata)
muc_adeno_mus<-subset(mouse94, mouse94$human_homolog %in% muc_adeno)
muc_adeno_mus<-(muc_adeno_mus$gene_name)
muc_adeno_mus
##  [1] "Mmp11"   "Mnx1"    "Hnf4a"   "Barx1"   "Pitx1"   "Tff1"    "Spdef"  
##  [8] "Col17a1" "Slc15a1" "Tm4sf4"  "Car9"    "Spp1"    "Ern2"    "B3gnt3" 
## [15] "Barx2"   "Tmprss4" "Gcnt3"   "Rhov"    "Krt17"   "Entpd8"  "Muc5ac" 
## [22] "Dnajc22" "Col10a1" "Foxa3"   "Mmp1b"   "Gpx2"    "Mmp1a"   "Bcl2l15"
## [29] "Gjb2"    "C2cd4a"  "Mmp12"   "Fam83a"  "Atp10b"  "Fut2"    "Muc5b"  
## [36] "Tmem28"  "B3gnt6"  "Grem1"
#
SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(muc_adeno_mus),
  name = 'Mucinous_Adeno_Signature')

luad<-nsclc$Adeno[1:10]
luad_mus<-subset(mouse94, mouse94$human_homolog %in% luad)
luad_mus<-(luad_mus$gene_name)
luad_mus
## [1] "Nkx2-1"  "Gpr39"   "Rorc"    "Tmc5"    "Abcc6"   "Atp11a"  "Limch1" 
## [8] "Tmem125" "Sfta2"
lscc<-nsclc$Squamous[1:10]
lscc_mus<-subset(mouse94, mouse94$human_homolog %in% lscc)
lscc_mus<-(lscc_mus$gene_name)
lscc_mus
## [1] "Trp63"     "Dst"       "Clca2"     "Serpinb13" "Dsg3"      "Dsc3"     
## [7] "Krt5"      "Serpinb5"
#
SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(muc_adeno_mus),
  name = 'Mucinous_Adeno_Signature')

SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(luad_mus),
  name = 'LUAD_Signature')

SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(lscc_mus),
  name = 'LSCC_Signature')

Plot NSCLC signatures

x<-FeaturePlot(SNL_tumor, features = c("Mucinous_Adeno_Signature1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Mucinous Adeno Signature")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
y<-FeaturePlot(SNL_tumor, features = c("LUAD_Signature1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("LUAD Signature")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
z<-FeaturePlot(SNL_tumor, features = c("LSCC_Signature1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Squamous Cell Signature")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
multiplot(x,y,z,cols=3)

The Mucinous adeno signature is great. Ref is Guo et al., EMBO Mol Med, 2017. “Gene signature driving invasive mucinous adenocarcinoma of the lung”, Corresponding author = Yutaka Maeda (includes Snyder mucinoous adeno signature among others).

Need better signatures for LUAD and LSCC (work in progress).

Assign Hillock gene signatures

hillock <-read.csv(file="~/hillock_sigs_011021.csv")
hillock_montoro<-hillock$Hillock.Montoro.Mouse
hillock_plasschaert<-hillock$Hillock.Plasschaert.Mouse
hillock_combined<-hillock$Overlap.Mouse

hillock_combined<-hillock_combined[1:65]
hillock_plasschaert<-hillock_plasschaert[1:139]
hillock_combined
##  [1] "2200002D01Rik" "2300002M23Rik" "Adh7"          "Ahnak"        
##  [5] "Alcam"         "Alox12e"       "Alox15"        "Anxa1"        
##  [9] "Anxa2"         "Anxa3"         "Aqp5"          "Calml3"       
## [13] "Capg"          "Cav1"          "Cd44"          "Ces1d"        
## [17] "Ces1h"         "Clca3b"        "Cldn3"         "Clic3"        
## [21] "Cwh43"         "Ecm1"          "Esd"           "Fam129b"      
## [25] "Ffar4"         "Foxq1"         "Gabrp"         "Gprc5a"       
## [29] "Gsta4"         "Kctd14"        "Krt13"         "Krt15"        
## [33] "Krt19"         "Krt4"          "Krt7"          "Lgals3"       
## [37] "Lmo7"          "Ltf"           "Ly6g6c"        "Ly6g6e"       
## [41] "Mal"           "Mall"          "Mfge8"         "Muc4"         
## [45] "Pdlim1"        "Pdlim2"        "Pdzk1ip1"      "Pllp"         
## [49] "Pmm1"          "Pmp22"         "Porcn"         "Ppap2c"       
## [53] "Ptgr1"         "Ptgs2"         "S100a14"       "S100a6"       
## [57] "S100g"         "Serpinb2"      "Socs2"         "St3gal4"      
## [61] "Tacstd2"       "Tspo"          "Ttc36"         "Upk1b"        
## [65] "Upk3bl"
#
SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(hillock_montoro),
  name = 'hillock_montoro')

SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(hillock_plasschaert),
  name = 'hillock_plasschaert')

SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(hillock_combined),
  name = 'hillock_combined')

Plasschaert and Montoro signatures overlap by ~48% or 65 genes. Use of the overlapping/conserved hillock gene marker signature is justified.

Plot hillock signatures

x<-FeaturePlot(SNL_tumor, features = c("hillock_montoro1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Hillock Montoro et al")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
y<-FeaturePlot(SNL_tumor, features = c("hillock_plasschaert1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Hillock Plasschaert et al")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
z<-FeaturePlot(SNL_tumor, features = c("hillock_combined1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Hillock Overlapping Sig")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
multiplot(x,y,z,cols=3)

Save for quick restore..

saveRDS(SNL_Tumor,“/scratch/Aireland/SNL for R01 Analysis/SNL_All_Tumor processed seurat.rds”)

Here is where we stop for now…