Load packages

suppressPackageStartupMessages({ 
  library(BiocFileCache)
  library(scran)
  library(scater)
  library(tidyverse)
  library(SingleCellExperiment)
  library(DropletUtils)
  library(ggplot2)
  library(cowplot)
  library(Seurat)
  library(monocle3)
  library(SeuratObject)
})

Read in all tumor samples, combined from previous analysis.

SNL_All<-readRDS("/scratch/Aireland/SNL for R01 Analysis/SNL_All tumor seurat object QC.rds")

Subset because 15739 experiment is outlier

SNL_Tumor<-subset(SNL_All,idents=c("SNL_CCSP","SNL_CCSP_Tx","SNL_CMV_Tx","SNL_CMV1"))
table(SNL_Tumor@meta.data$Cre)

## 
## CCSP  CMV 
## 8704 6901

table(SNL_Tumor@meta.data$tx)

## 
## Control  CXCR2i 
##    7335    8270

Normalize data

SNL_Tumor <- NormalizeData(SNL_Tumor, normalization.method = "LogNormalize", scale.factor = 10000)
SNL_Tumor

## An object of class Seurat 
## 32454 features across 15605 samples within 1 assay 
## Active assay: RNA (32454 features, 0 variable features)

Identify Highly variable features

SNL_Tumor <- FindVariableFeatures(SNL_Tumor, selection.method = "vst", nfeatures = 2000)

# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(SNL_Tumor), 50)
top10

##  [1] "Sftpc"         "Sftpa1"        "Lyz2"          "Scd1"         
##  [5] "Sftpb"         "Gkn2"          "Gkn1"          "Cxcl15"       
##  [9] "Chil3"         "S100a9"        "Krt15"         "Lcn2"         
## [13] "Slc34a2"       "Wfdc17"        "Spp1"          "Chil1"        
## [17] "Chil4"         "Ccl6"          "Reg3g"         "Ppbp"         
## [21] "S100a8"        "Crct1"         "Cd74"          "Napsa"        
## [25] "Tyrobp"        "Krt14"         "Sprr2h"        "Apoe"         
## [29] "Scgb1a1"       "Scgb3a1"       "Prap1"         "Hp"           
## [33] "Ctss"          "Cym"           "Hc"            "Fcer1g"       
## [37] "Lyz1"          "9130204L05Rik" "Krt17"         "Cxcl3"        
## [41] "AU040972"      "Wfdc12"        "Cxcl2"         "Sprr1a"       
## [45] "Ifitm1"        "Ambp"          "Krtap3-2"      "Rgs13"        
## [49] "AW112010"      "Reg1"

Scale data

all.genes <- rownames(SNL_Tumor)
SNL_Tumor <- ScaleData(SNL_Tumor, features = all.genes)

## Centering and scaling data matrix

Perform linear dimension reduction/ PCA

SNL_Tumor <- RunPCA(SNL_Tumor, features = VariableFeatures(object = SNL_Tumor))

## PC_ 1 
## Positive:  Ptma, Krt8, Lgals4, Fabp5, Tagln2, Tff2, Wfdc2, Agr2, Gnb2l1, Gsta4 
##     Crip1, Gstm1, Prom1, Ftl1, Sepp1, Krt18, Cldn18, Car2, Tff1, Dpcr1 
##     Prdx6, Oat, Ctse, Sftpd, Anxa10, Wbp5, Tmem176b, Il18, Chchd10, Gpx2 
## Negative:  Clca3b, Psca, Gm9573, S100a14, Prss27, Cited2, Plpp1, Nccrp1, Mxd1, 2300002M23Rik 
##     Emp1, Tmprss11g, Ctsb, Irf7, Cdkn2b, Lpin2, Plet1, Arg1, Lgals3, Map2k3 
##     Mal, Gm42418, Slc6a14, Mall, Tcp11l2, Cnfn, Itgam, Ifit1bl1, Isg15, Ppp1r15a 
## PC_ 2 
## Positive:  Dpcr1, Lgals4, Tff1, Anxa10, Agr2, Ctse, 2210407C18Rik, Tff2, Gnb2l1, Phgr1 
##     Il18, Gsta4, Sult1c2, Rgs17, Gsta1, Gm3336, Sptssb, Sepp1, Car2, Krt18 
##     Pglyrp1, Spink4, Csrp2, Prom1, Ccnd2, Hn1, Krt8, Iyd, Aldh3a1, Gmds 
## Negative:  Slc34a2, Sftpb, Napsa, Sftpa1, Lamp3, Lyz2, Hc, Dram1, H2-Aa, Scd1 
##     Lpcat1, Cxcl15, Chil1, Ager, Sftpc, H2-Ab1, Lgi3, Cd74, H2-Eb1, Apoe 
##     Egfl6, Lrp2, Ppp1r14c, Ptgs1, Itih4, Lrrk2, Lcn2, Sfta2, Mme, Mlc1 
## PC_ 3 
## Positive:  Basp1, Tpm2, Tnfrsf11b, Nupr1, Gapdh, Tacstd2, Anxa8, Cldn3, Cldn4, Pmepa1 
##     Srd5a1, Krt20, Serpinb6b, Jpt1, Phlda1, Prss12, Muc4, Cwh43, Fam213a, Rack1 
##     Junb, H2afz, Plat, Tnip3, Adam28, Akr1b8, Dhrs1, Urah, Nectin2, Hk2 
## Negative:  Sepp1, Tcp11l2, Glrx, Lrg1, Gas6, Tff2, Chil1, Dclk1, Klk14, Cxcl15 
##     Car2, Nkd1, Pla2g1b, Rgs13, Cnfn, Ly6g6f, Selm, Napsa, Cfi, Sftpb 
##     Sox9, Espn, Sh2d6, 2300002M23Rik, Hepacam2, Slc34a2, Lamp3, Hc, AU021092, Aqp5 
## PC_ 4 
## Positive:  Slc34a2, Sftpb, Napsa, Lamp3, Cxcl15, Hc, Sftpa1, Sftpc, Lgi3, Ager 
##     Sfta2, Sftpd, Lpcat1, Egfl6, Scd1, Lrp2, Itih4, Mme, Fasn, Wfdc2 
##     Mlc1, Rbpjl, Tspan11, Dram1, Lcn2, Ppp1r14c, Clip4, Etv5, Apoc1, Enpep 
## Negative:  Alox5ap, Ltc4s, Rgs13, Dclk1, Fyb, Ly6g6f, Hck, Sh2d6, Hepacam2, Rac2 
##     Gng13, Rgs2, Cd300lf, Sh2d7, Trpm5, Matk, Pik3r5, Selm, Kctd12, Lrmp 
##     Hpgds, Vav1, Espn, Nrgn, Hmx2, Dpysl2, Ptpn6, Mctp1, Etv1, Bmx 
## PC_ 5 
## Positive:  Lyz2, Fxyd5, Tyrobp, Lgals1, Fcer1g, Ctss, Cd302, Wfdc17, Cd53, Cybb 
##     Vim, Atp6v0d2, Gngt2, Ccl6, Bcl2a1b, Mpeg1, Emp3, Gpnmb, Chil3, Trem2 
##     Lilrb4a, Fabp4, Plek, Apoe, Cd52, Mcemp1, Hebp1, Cd84, Sirpa, Dab2 
## Negative:  Ccdc153, 1110017D15Rik, Dynlrb2, AU040972, Tmem212, Cfap126, Fam183b, Mlf1, Sec14l3, Sntn 
##     1700016K19Rik, Dnah12, 1700007K13Rik, Rsph1, Ccdc113, Riiad1, Calml4, Tctex1d4, Meig1, Tuba1a 
##     Gm867, Efcab10, Arhgdig, Foxj1, 1700001C02Rik, Dnah5, Prr29, Nme5, Cfap45, Ccdc78

# PCA plot of PC1, PC2 
DimPlot(SNL_Tumor, reduction = "pca", group.by='tx')+ggtitle("PCA of all CCSP and CMV cells captured")

DimPlot(SNL_Tumor, reduction = "pca", group.by='Cre')+ggtitle("PCA of all CCSP and CMV cells captured by Cre")

DimPlot(SNL_Tumor, reduction = "pca", group.by='gnomex')+ggtitle("PCA of all CCSP and CMV cells captured by experiment")

Determine the dimensionality for clustering

# Use elbow plot to visualize dimensions that encompass most of variance
ElbowPlot(SNL_Tumor, ndims=50)

# Choosing 25 PCs for dimensionality
SNL_Tumor <- FindNeighbors(SNL_Tumor, dims = 1:25)

## Computing nearest neighbor graph

## Computing SNN

SNL_Tumor<- FindClusters(SNL_Tumor, resolution = 0.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 15605
## Number of edges: 513661
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8882
## Number of communities: 15
## Elapsed time: 2 seconds

Run Non-Linear Dimension reduction (tSNE/UMAP)

SNL_Tumor <- RunUMAP(SNL_Tumor, dims = 1:25)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 15:32:24 UMAP embedding parameters a = 0.9922 b = 1.112

## 15:32:24 Read 15605 rows and found 25 numeric columns

## 15:32:24 Using Annoy for neighbor search, n_neighbors = 30

## 15:32:24 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 15:32:27 Writing NN index file to temp file /tmp/RtmpCk0sii/file902035e77a40
## 15:32:27 Searching Annoy index using 1 thread, search_k = 3000
## 15:32:33 Annoy recall = 100%
## 15:32:34 Commencing smooth kNN distance calibration using 1 thread
## 15:32:36 Initializing from normalized Laplacian + noise
## 15:32:37 Commencing optimization for 200 epochs, with 642628 positive edges
## 15:33:00 Optimization finished

# Plot UMAP
DimPlot(SNL_Tumor, reduction = "umap",label=TRUE,pt.size=.1)+ggtitle("All cells by cluster ID")

DimPlot(SNL_Tumor, reduction = "umap", group.by = "tx",label=FALSE,pt.size=0.1)+ggtitle("All cells by treatment")

DimPlot(SNL_Tumor, reduction = "umap", group.by = "Cre",label=FALSE,pt.size=0.1)+ggtitle("All cells by Cre")

DimPlot(SNL_Tumor, reduction = "umap", group.by = "gnomex",label=FALSE,pt.size=0.1)+ggtitle("All cells by experiment")

Many outlying clusters, lets do some cell type ID.

Cell type ID with Dotplot

### Assign cell types per cluster using Dotplots of common cell markers

more_types<-c("GFP","Sox2", "Nkx2-1", "Trp63", "Krt5","Krt13","Krt4",
              "Myc","Yap1","Sox9","Runx2","Sox6","Sp7",
              "Vcam1","Fgf10","Dcn","Clec3b","Fbln1",  # rpma
              "Col14a1", # matrix fibroblast/osteoclastic
              
              "Acta2","Myh11","Tagln","Mustn1", #myofibroblast
              "Lpl","Lipa","Pparg","Plin2","Ear1","Fabp1","Spp1", #lipofibroblast/osteoblastic
              
              "Mertk","Marco","Mrc1","Ly75","Adgre1","Itgax","Cd68","Csf1r","Mafb","Msr1","Arg1","Adgre4","Clec4a1", #Macs/Myeloid
              "Cx3cr1","Itgam","Cd14", #Monocytes
              "S100a9","S100a8","Mmp9","Csf3r","Cxcr2","Ly6g", #Neuts
              "Batf3","Xcr1","Clec9a","Ccl17","Ccl22", #DC
              
              
              "Cd3d","Cd3e","Cd3g","Cd28","Cd8a","Cd4","Foxp3", # Tcell
              
              "Gzma","Ncr1","Gzmb", #NK
              
              "Fcmr","Cd19","Fcer2a","Pax5","Cd22","Cd79b","Cd79a", #B cells
              "Slamf7", "Prdm1","Mzb1", #Plasma
              
              "Mcam","Pecam1","Icam2","Cd36","Cd93", #Endothelial
              
              "Scd1","Lamp3","Sftpb","Sftpa1","Aqp5", #Alveolar
              "Scgb1a1","Cyp2f2","Scgb3a2","Scgb3a1","Lypd2", #club
              "Foxj1","Rfx2","Rfx3","Trp73", #ciliated
              "Muc5b","Muc5ac", #goblet
              "Krt17","Perp", #basal
              "Pou2f3","Ascl2","Rgs13","Il25","Tslp", #tuft
              "Foxi1","Cftr","Ascl3", #ionocyte
              "Epgn","Slc16a14","Grhl1", #Eosinophils
              "Cpa3","Fcer1a","Ms4a2", #Basophils
              "Cma1","Tpsb2", #Mast
              "Cd200r3" #Combined with Mast, Baso, Eosinophil
)

DotPlot(SNL_Tumor,features = more_types, group.by="seurat_clusters")+theme(axis.text.x=element_text(angle=90, hjust=1,size=8), axis.text.y=element_text(hjust=1,size=10))

Cluster 9 is tuft

Cluster 10 is normal AT2/Club cells

Cluster 12 is endothelial cells

Cluster 13 is myeloid/macs

Cluster 14 is ciliated

Subset down to just tumor cell clusters

SNL_Tumor<-subset(SNL_Tumor, idents=c("0","1","2","3","4","5","6", "7","8","11"))

#Check new cell numbers
table(SNL_Tumor@meta.data$tx)

## 
## Control  CXCR2i 
##    7050    8014

table(SNL_Tumor@meta.data$Cre)

## 
## CCSP  CMV 
## 8516 6548

Re-clustering, PCA, UMAP

#Perform linear dimension reduction/ PCA
SNL_Tumor <- RunPCA(SNL_Tumor, features = VariableFeatures(object = SNL_Tumor))

## Warning in PrepDR(object = object, features = features, verbose = verbose): The
## following 86 features requested have zero variance (running reduction without
## them): Osm, Gm21149, Foxf1, Il7r, C3ar1, Clec4a2, Penk, Ccdc121, Sirpb1c, Dpt,
## 1700012B09Rik, Drc7, Rgs22, Jam3, Slamf7, Loxl1, Elmod1, Speer4c, Gm34059,
## Gm21541, Ear6, Gm281, Fcgr4, Cd200r4, Ccr1, Slc36a2, Nek10, Fpr1, Ccdc13, Wif1,
## Dthd1, Fendrr, Zfp474, 4933406B17Rik, Pdgfrb, Mlana, Ptx3, Ms4a4b, Dnah11,
## Col5a2, Tfec, Gm47969, Cd300ld, Gm14461, Syt10, T2, Lrrc43, Fpr2, Gm5483, Wdr95,
## Frmpd2, Zbbx, D830026I12Rik, Cd3d, Gpm6a, Dydc2, Gm13571, AC126280.1, Nkg7,
## 5330413P13Rik, Gm7361, Speer4b, Tbx3os1, Gm28729, Ptpn7, Ptger3, Adcy4, Fam150a,
## Siglecf, Tmem252, Msr1, Scimp, Hgf, Irg1, F630028O10Rik, Gm9733, Klra3, Xrra1,
## Spag11b, Zfp366, Adamts20, Ccbe1, Ankub1, Pramef12, H2-Eb2, Fam181a

## PC_ 1 
## Positive:  Clca3b, Psca, Gm9573, S100a14, Prss27, Cited2, Plpp1, Nccrp1, Mxd1, 2300002M23Rik 
##     Tmprss11g, Emp1, Ctsb, Irf7, Lpin2, Cdkn2b, Plet1, Arg1, Gm42418, Map2k3 
##     Lgals3, Mal, Slc6a14, Mall, Tcp11l2, Itgam, Cnfn, Ifit1bl1, Isg15, Ppp1r15a 
## Negative:  Ptma, Lgals4, Krt8, Fabp5, Tff2, Tagln2, Wfdc2, Agr2, Gsta4, Gnb2l1 
##     Prom1, Gstm1, Krt18, Dpcr1, Tff1, Sepp1, Crip1, Car2, Ctse, Cldn18 
##     Ftl1, Oat, Anxa10, Prdx6, Sftpd, Wbp5, Il18, Gpx2, 2210407C18Rik, Ccnd2 
## PC_ 2 
## Positive:  Basp1, Tpm2, Tnfrsf11b, Cldn3, Nupr1, Gapdh, Cldn4, Pmepa1, Tacstd2, Srd5a1 
##     Rack1, Jpt1, Anxa8, Muc4, Serpinb6b, Nectin2, Prss12, Cwh43, Phlda1, Selenof 
##     Krt20, Tnip3, Plat, Cebpd, Junb, Hk2, Urah, Rtraf, Adam28, Rex1bd 
## Negative:  Sepp1, Tcp11l2, Glrx, Tff2, Car2, Cnfn, Klk14, Gas6, Gstm1, 2300002M23Rik 
##     Cfi, Plpp1, Pbp2, Lrg1, AU021092, Gm44275, Dusp14, Sult1c2, Csta1, Gbp4 
##     Crybg2, Cnst, Pla2g1b, Spns3, Gm3336, Nccrp1, Fam84a, Ihh, Klk13, Dmbt1 
## PC_ 3 
## Positive:  Birc5, Ube2c, Cenpa, 2810417H13Rik, Cdca3, Mki67, Cenpf, Ccna2, Cdca8, Tpx2 
##     Ccnb2, Stmn1, Cdc20, Knstrn, Hmmr, Pbk, Racgap1, Cenpe, Cks2, Top2a 
##     Ccnb1, Lockd, Cdkn3, Prc1, Cdk1, Spc24, Nusap1, Spc25, Ckap2, Cenpm 
## Negative:  5330417C22Rik, Cbr2, mt-Nd2, AC149090.1, Btg2, Rack1, Lrg1, Pmepa1, Selenop, Ces1d 
##     Cfi, Chad, Igfbp5, Gda, Ces1f, Tff2, Cebpd, Muc4, Txnip, Gas6 
##     Lars2, Car8, Selenof, Vegfa, Nectin2, Mettl26, mt-Nd3, Sox2, Mfsd4a, Pla2g1b 
## PC_ 4 
## Positive:  Pkib, Tm4sf1, Eif4ebp1, Mthfd2, Areg, Tnfrsf12a, Fst, Serpinb9, Il33, Rhoc 
##     Ffar4, Phgdh, Asns, Ly6g6c, Plaur, Gldc, Zfos1, Ptrh1, 8430408G22Rik, Dhrs1 
##     Msln, Anxa10, F3, Ugp2, Tgif1, H2afz, Serpinb6b, Upk3bl, Krt7, 1810011O10Rik 
## Negative:  Birc5, Mki67, Cenpf, Ccna2, Cenpa, Tpx2, Selenop, Ube2c, 2810417H13Rik, Top2a 
##     Cenpe, Rack1, AC149090.1, Gm42418, Cdc20, Racgap1, Cdca8, Cdca3, Hmmr, Pbk 
##     mt-Nd2, Txnip, Ccnb1, Lars2, Nusap1, 5330417C22Rik, mt-Nd3, Atp5o.1, Prc1, Selenof 
## PC_ 5 
## Positive:  Rack1, Selenop, Gm47283, Mettl26, AC149090.1, Tceal9, Selenow, Gm42418, Pdia4, Cenpx 
##     Cavin1, Rflnb, Lars2, Atp5o.1, Selenos, Tcim, Selenof, Mrpl58, Kmt5a, Gm26917 
##     AY036118, Erg28, Bex3, Wtip, Mfsd4a, Sqor, Kit, Fam241a, Selenoh, Myof 
## Negative:  Sepp1, Srd5a1, Wbp5, Gnb2l1, Ifit3, Plat, Plet1, Ifit3b, Pla2g1b, Marcksl1 
##     Cxcl3, Akr1b8, Urah, Prss22, Tacstd2, Duoxa2, Vnn1, Ttr, Cd14, Ifi204 
##     Hn1, Sprr2f, Trim15, Ifit1, Ifit1bl1, Tnfrsf11b, Hilpda, Jun, Slfn2, Junb

# PCA plot of PC1, PC2 

DimPlot(SNL_Tumor, reduction = "pca", group.by='tx')+ggtitle("PCA of tumor cells by treatment")

DimPlot(SNL_Tumor, reduction = "pca", group.by='Cre')+ggtitle("PCA of tumor cells by Cre")

# Determine the dimensionality for clustering

# Use elbow plot to visualize dimensions that encompass most of variance
ElbowPlot(SNL_Tumor, ndims=50)

# shows ~15 dims captures most of variance

Run Non-Linear Dimension reduction (tSNE/UMAP)

SNL_Tumor <- FindNeighbors(SNL_Tumor, dims = 1:15)

## Computing nearest neighbor graph

## Computing SNN

SNL_Tumor<- FindClusters(SNL_Tumor, resolution = 0.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 15064
## Number of edges: 482325
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8790
## Number of communities: 11
## Elapsed time: 2 seconds

SNL_Tumor <- RunUMAP(SNL_Tumor, dims = 1:15)

## 15:33:39 UMAP embedding parameters a = 0.9922 b = 1.112

## 15:33:39 Read 15064 rows and found 15 numeric columns

## 15:33:39 Using Annoy for neighbor search, n_neighbors = 30

## 15:33:39 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 15:33:41 Writing NN index file to temp file /tmp/RtmpCk0sii/file902015c2f8a4
## 15:33:41 Searching Annoy index using 1 thread, search_k = 3000
## 15:33:48 Annoy recall = 100%
## 15:33:49 Commencing smooth kNN distance calibration using 1 thread
## 15:33:52 Initializing from normalized Laplacian + noise
## 15:33:52 Commencing optimization for 200 epochs, with 608568 positive edges
## 15:34:16 Optimization finished

# Plot UMAP
x<-DimPlot(SNL_Tumor, reduction = "umap",label=TRUE,pt.size=.1)+ggtitle("Tumor cells by cluster ID")
y<-DimPlot(SNL_Tumor, reduction = "umap", group.by = "tx",label=FALSE,pt.size=0.1)+ggtitle("Tumor cells by treatment")
z<-DimPlot(SNL_Tumor, reduction = "umap", group.by = "Cre",label=FALSE,pt.size=0.1)+ggtitle("Tumor cells by Cre")
multiplot(x,y,z,cols=3)

Differences are mainly driven by Cre.

Double check cell type ID

DotPlot(SNL_Tumor,features = more_types, group.by="seurat_clusters")+ theme(axis.text.x=element_text(angle=90, hjust=1,size=8), axis.text.y=element_text(hjust=1,size=10))

Looks good, moving on. Cell of origin make it quite different!

What are DEGs between CMV vs CCSP-Initiated?

Do they make sense biologically?

DEGs between CMV and CCSP

Idents(SNL_Tumor)<-"Cre"
Cre.Markers <- FindAllMarkers(SNL_Tumor, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)

## Calculating cluster CCSP

## Calculating cluster CMV

test<-as.data.frame(Cre.Markers)
write.csv(test, "~/Cre.Markers SNL All Tumor cells.csv")

top100 <- Cre.Markers  %>% group_by(cluster) %>% top_n(n = 100, wt = avg_log2FC)
top100_names<-top100$gene
top100_names

##   [1] "Gm42418"       "Clca3b"        "Ecm1"          "Sprr2d"       
##   [5] "Ltf"           "2300002M23Rik" "Ly6a"          "Gm9573"       
##   [9] "Sprr2f"        "Psca"          "Arg1"          "Fth1"         
##  [13] "Cnfn"          "Emp1"          "Slc6a14"       "AY036118"     
##  [17] "Lars2"         "Prss27"        "Ifit1bl1"      "Trp53inp2"    
##  [21] "Cited2"        "Selenok"       "Hopx"          "Nccrp1"       
##  [25] "AA467197"      "S100a14"       "Slc39a4"       "Mxd1"         
##  [29] "Elob"          "Spns2"         "Plat"          "Tmprss11g"    
##  [33] "Krt13"         "Plpp1"         "Lpin2"         "Mal"          
##  [37] "Csta1"         "Glrx"          "Ctsb"          "Prss22"       
##  [41] "Lgals3"        "Srgn"          "Cdkn2b"        "Isg15"        
##  [45] "Plet1"         "AI661453"      "Rack1"         "Map1lc3b"     
##  [49] "Zfand5"        "Upk1b"         "Irf7"          "Map2k3"       
##  [53] "Gadd45b"       "Malat1"        "Klk13"         "Duoxa2"       
##  [57] "Gsto1"         "Malt1"         "Pglyrp1"       "Neat1"        
##  [61] "Trim25"        "Cysrt1"        "Tcp11l2"       "Krt7"         
##  [65] "Timp2"         "Krt80"         "Dusp5"         "Krt6a"        
##  [69] "Mall"          "Calml3"        "Rdh10"         "Slc16a11"     
##  [73] "Arid5b"        "Gbp4"          "Herpud1"       "Epha2"        
##  [77] "Prss8"         "Hist1h2bc"     "Tuba1c"        "Itgam"        
##  [81] "Txnrd1"        "Irf1"          "Tspan1"        "Fam84a"       
##  [85] "Dap"           "Ndrg1"         "Mbp"           "Prdx5"        
##  [89] "Elf3"          "Spns3"         "Sem1"          "Gde1"         
##  [93] "Dusp1"         "Ppp1r15a"      "Pmaip1"        "Tmbim4"       
##  [97] "Jdp2"          "Ttc9"          "Hist1h1c"      "S100a10"      
## [101] "Rpl13a"        "Tff1"          "Sepp1"         "Gkn1"         
## [105] "Tff2"          "Gnb2l1"        "Gkn2"          "Agr2"         
## [109] "Rps6"          "Rps19"         "2210407C18Rik" "Fabp5"        
## [113] "Rpl3"          "Rps18"         "Cldn18"        "Tceb2"        
## [117] "Rpl14"         "Prdx1"         "Crip1"         "Rps17"        
## [121] "Dpcr1"         "Cd63"          "H2afz"         "Rpl4"         
## [125] "Ppia"          "Smim6"         "Rpl31"         "Clu"          
## [129] "Shfm1"         "Ptma"          "Car2"          "Tmsb10"       
## [133] "Lgals4"        "Rps4x"         "Rpl36a"        "Wfdc1"        
## [137] "Rps5"          "Rpl32"         "Rpl35"         "Tm4sf1"       
## [141] "Selk"          "Rps15a"        "Gstm1"         "Rpl18a"       
## [145] "Rps23"         "Rps25"         "Rpl10"         "Tagln2"       
## [149] "Rpl36al"       "Spink4"        "Gsta4"         "Rps14"        
## [153] "Cxcl17"        "Rps15"         "Rpl22l1"       "Wbp5"         
## [157] "Rpl37a"        "Rps11"         "Rplp2"         "Rps3"         
## [161] "Dbi"           "Rps2"          "Il33"          "Atp5o"        
## [165] "Rpl39"         "Rpl8"          "Rps16"         "Eef1a1"       
## [169] "Anxa10"        "Sep15"         "Rpl13"         "Wfdc2"        
## [173] "Rpl23a"        "Eef1g"         "Krt8"          "Xist"         
## [177] "Rpl26"         "Rps8"          "Cox6a1"        "Slc25a5"      
## [181] "Uqcrb"         "Rps3a1"        "Lrg1"          "Rpl10a"       
## [185] "Rps27l"        "Gsta1"         "Rps27"         "Rps27a"       
## [189] "Aqp5"          "Rpl21"         "Rpl37"         "Rps28"        
## [193] "Rpl35a"        "Rpl11"         "Gas6"          "Krt18"        
## [197] "Dynll1"        "Gltscr2"       "Phgr1"         "Tspan8"

DoHeatmap(subset(SNL_Tumor, downsample = 3000), features = top100_names, size = 3)

Visualize top 100 Marker genes in CMV vs CCSP initated tumor cells

DT::datatable(
  Cre.Markers %>%
    mutate(gene=rownames(.)) %>%
    group_by(cluster) %>%
    top_n(n = 100, wt = avg_log2FC) %>%
    select(gene, everything())
  
)

Quick save to return

saveRDS(SNL_Tumor,“/scratch/Aireland/SNL for R01 Analysis/SNL_All_Tumor processed seurat.rds”)

Now visualize key tumor markers and hillock signatures

library(viridis)

## Loading required package: viridisLite

SNL_tumor<-SNL_Tumor

Add adeno vs mucinous adeno vs squamous lung cancer cell signatures

nsclc <-read.csv(file="~/NSCLC_sigs.csv")
muc_adeno<-nsclc$Mucinous.Adeno

library(hciR)
library(hciRdata)
muc_adeno_mus<-subset(mouse94, mouse94$human_homolog %in% muc_adeno)
muc_adeno_mus<-(muc_adeno_mus$gene_name)
muc_adeno_mus

##  [1] "Mmp11"   "Mnx1"    "Hnf4a"   "Barx1"   "Pitx1"   "Tff1"    "Spdef"  
##  [8] "Col17a1" "Slc15a1" "Tm4sf4"  "Car9"    "Spp1"    "Ern2"    "B3gnt3" 
## [15] "Barx2"   "Tmprss4" "Gcnt3"   "Rhov"    "Krt17"   "Entpd8"  "Muc5ac" 
## [22] "Dnajc22" "Col10a1" "Foxa3"   "Mmp1b"   "Gpx2"    "Mmp1a"   "Bcl2l15"
## [29] "Gjb2"    "C2cd4a"  "Mmp12"   "Fam83a"  "Atp10b"  "Fut2"    "Muc5b"  
## [36] "Tmem28"  "B3gnt6"  "Grem1"

#
SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(muc_adeno_mus),
  name = 'Mucinous_Adeno_Signature')

luad<-nsclc$Adeno[1:10]
luad_mus<-subset(mouse94, mouse94$human_homolog %in% luad)
luad_mus<-(luad_mus$gene_name)
luad_mus

## [1] "Nkx2-1"  "Gpr39"   "Rorc"    "Tmc5"    "Abcc6"   "Atp11a"  "Limch1" 
## [8] "Tmem125" "Sfta2"

lscc<-nsclc$Squamous[1:10]
lscc_mus<-subset(mouse94, mouse94$human_homolog %in% lscc)
lscc_mus<-(lscc_mus$gene_name)
lscc_mus

## [1] "Trp63"     "Dst"       "Clca2"     "Serpinb13" "Dsg3"      "Dsc3"     
## [7] "Krt5"      "Serpinb5"

#
SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(muc_adeno_mus),
  name = 'Mucinous_Adeno_Signature')

SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(luad_mus),
  name = 'LUAD_Signature')

SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(lscc_mus),
  name = 'LSCC_Signature')

Plot NSCLC signatures

x<-FeaturePlot(SNL_tumor, features = c("Mucinous_Adeno_Signature1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Mucinous Adeno Signature")

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

y<-FeaturePlot(SNL_tumor, features = c("LUAD_Signature1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("LUAD Signature")

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

z<-FeaturePlot(SNL_tumor, features = c("LSCC_Signature1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Squamous Cell Signature")

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

multiplot(x,y,z,cols=3)

The Mucinous adeno signature is great. Ref is Guo et al., EMBO Mol Med, 2017. “Gene signature driving invasive mucinous adenocarcinoma of the lung”, Corresponding author = Yutaka Maeda (includes Snyder mucinoous adeno signature among others).

Need better signatures for LUAD and LSCC (work in progress).

Assign Hillock gene signatures

hillock <-read.csv(file="~/hillock_sigs_011021.csv")
hillock_montoro<-hillock$Hillock.Montoro.Mouse
hillock_plasschaert<-hillock$Hillock.Plasschaert.Mouse
hillock_combined<-hillock$Overlap.Mouse

hillock_combined<-hillock_combined[1:65]
hillock_plasschaert<-hillock_plasschaert[1:139]
hillock_combined

##  [1] "2200002D01Rik" "2300002M23Rik" "Adh7"          "Ahnak"        
##  [5] "Alcam"         "Alox12e"       "Alox15"        "Anxa1"        
##  [9] "Anxa2"         "Anxa3"         "Aqp5"          "Calml3"       
## [13] "Capg"          "Cav1"          "Cd44"          "Ces1d"        
## [17] "Ces1h"         "Clca3b"        "Cldn3"         "Clic3"        
## [21] "Cwh43"         "Ecm1"          "Esd"           "Fam129b"      
## [25] "Ffar4"         "Foxq1"         "Gabrp"         "Gprc5a"       
## [29] "Gsta4"         "Kctd14"        "Krt13"         "Krt15"        
## [33] "Krt19"         "Krt4"          "Krt7"          "Lgals3"       
## [37] "Lmo7"          "Ltf"           "Ly6g6c"        "Ly6g6e"       
## [41] "Mal"           "Mall"          "Mfge8"         "Muc4"         
## [45] "Pdlim1"        "Pdlim2"        "Pdzk1ip1"      "Pllp"         
## [49] "Pmm1"          "Pmp22"         "Porcn"         "Ppap2c"       
## [53] "Ptgr1"         "Ptgs2"         "S100a14"       "S100a6"       
## [57] "S100g"         "Serpinb2"      "Socs2"         "St3gal4"      
## [61] "Tacstd2"       "Tspo"          "Ttc36"         "Upk1b"        
## [65] "Upk3bl"

#
SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(hillock_montoro),
  name = 'hillock_montoro')

SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(hillock_plasschaert),
  name = 'hillock_plasschaert')

SNL_tumor<-AddModuleScore(
  object = SNL_tumor,
  features = list(hillock_combined),
  name = 'hillock_combined')

Plasschaert and Montoro signatures overlap by ~48% or 65 genes. Use of the overlapping/conserved hillock gene marker signature is justified.

Plot hillock signatures

x<-FeaturePlot(SNL_tumor, features = c("hillock_montoro1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Hillock Montoro et al")

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

y<-FeaturePlot(SNL_tumor, features = c("hillock_plasschaert1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Hillock Plasschaert et al")

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

z<-FeaturePlot(SNL_tumor, features = c("hillock_combined1"), pt.size=1)+scale_color_viridis(option="magma",alpha=0.5)+ggtitle("Hillock Overlapping Sig")

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

multiplot(x,y,z,cols=3)

Save for quick restore..

saveRDS(SNL_Tumor,“/scratch/Aireland/SNL for R01 Analysis/SNL_All_Tumor processed seurat.rds”)

010721 All SNL Tumor CXCR2 comparison

Abbie Ireland

1/7/2022

Load packages

Read in all tumor samples, combined from previous analysis.

Subset because 15739 experiment is outlier

Normalize data

Identify Highly variable features

Scale data

Perform linear dimension reduction/ PCA

Determine the dimensionality for clustering

Run Non-Linear Dimension reduction (tSNE/UMAP)

Cell type ID with Dotplot

Subset down to just tumor cell clusters

Re-clustering, PCA, UMAP

Run Non-Linear Dimension reduction (tSNE/UMAP)

Double check cell type ID

What are DEGs between CMV vs CCSP-Initiated?

Do they make sense biologically?

DEGs between CMV and CCSP

Visualize top 100 Marker genes in CMV vs CCSP initated tumor cells

Quick save to return

Now visualize key tumor markers and hillock signatures

Add adeno vs mucinous adeno vs squamous lung cancer cell signatures

Plot NSCLC signatures

Assign Hillock gene signatures

Plot hillock signatures

Save for quick restore..

Here is where we stop for now…