Load packages

suppressPackageStartupMessages({ 
  library(BiocFileCache)
  library(scran)
  library(scater)
  library(tidyverse)
  library(SingleCellExperiment)
  library(DropletUtils)
  library(ggplot2)
  library(cowplot)
  library(Seurat)
  library(monocle3)
  library(SeuratObject)
  library(viridis)
})

Read in previously processed SO of collected CCSP tumor cells.

SNL_CCSP_tumor<-readRDS("/scratch/Aireland/SNL for R01 Analysis/full_processed_SNL_CCSP_tumor.rds")

Add individual mucinous adeno vs hillock signatures for downstream purposes…

### Add hillock cell signature
hillock <-read.csv(file="~/hillock_sigs_011021.csv")
hillock_combined<-hillock$Overlap.Mouse

hillock_combined<-hillock_combined[1:65]
hillock_combined
##  [1] "2200002D01Rik" "2300002M23Rik" "Adh7"          "Ahnak"        
##  [5] "Alcam"         "Alox12e"       "Alox15"        "Anxa1"        
##  [9] "Anxa2"         "Anxa3"         "Aqp5"          "Calml3"       
## [13] "Capg"          "Cav1"          "Cd44"          "Ces1d"        
## [17] "Ces1h"         "Clca3b"        "Cldn3"         "Clic3"        
## [21] "Cwh43"         "Ecm1"          "Esd"           "Fam129b"      
## [25] "Ffar4"         "Foxq1"         "Gabrp"         "Gprc5a"       
## [29] "Gsta4"         "Kctd14"        "Krt13"         "Krt15"        
## [33] "Krt19"         "Krt4"          "Krt7"          "Lgals3"       
## [37] "Lmo7"          "Ltf"           "Ly6g6c"        "Ly6g6e"       
## [41] "Mal"           "Mall"          "Mfge8"         "Muc4"         
## [45] "Pdlim1"        "Pdlim2"        "Pdzk1ip1"      "Pllp"         
## [49] "Pmm1"          "Pmp22"         "Porcn"         "Ppap2c"       
## [53] "Ptgr1"         "Ptgs2"         "S100a14"       "S100a6"       
## [57] "S100g"         "Serpinb2"      "Socs2"         "St3gal4"      
## [61] "Tacstd2"       "Tspo"          "Ttc36"         "Upk1b"        
## [65] "Upk3bl"
SNL_CCSP_tumor<-AddModuleScore(
  object = SNL_CCSP_tumor,
  features = list(hillock_combined),
  name = 'hillock_combined')

# Add mucinous adeno signature
nsclc <-read.csv(file="~/NSCLC_sigs.csv")
muc_adeno<-nsclc$Mucinous.Adeno

library(hciR)
library(hciRdata)
muc_adeno_mus<-subset(mouse94, mouse94$human_homolog %in% muc_adeno)
muc_adeno_mus<-(muc_adeno_mus$gene_name)
muc_adeno_mus
##  [1] "Mmp11"   "Mnx1"    "Hnf4a"   "Barx1"   "Pitx1"   "Tff1"    "Spdef"  
##  [8] "Col17a1" "Slc15a1" "Tm4sf4"  "Car9"    "Spp1"    "Ern2"    "B3gnt3" 
## [15] "Barx2"   "Tmprss4" "Gcnt3"   "Rhov"    "Krt17"   "Entpd8"  "Muc5ac" 
## [22] "Dnajc22" "Col10a1" "Foxa3"   "Mmp1b"   "Gpx2"    "Mmp1a"   "Bcl2l15"
## [29] "Gjb2"    "C2cd4a"  "Mmp12"   "Fam83a"  "Atp10b"  "Fut2"    "Muc5b"  
## [36] "Tmem28"  "B3gnt6"  "Grem1"
#
SNL_CCSP_tumor<-AddModuleScore(
  object = SNL_CCSP_tumor,
  features = list(muc_adeno_mus),
  name = 'Mucinous_Adeno_Signature')


Idents(SNL_CCSP_tumor)<-"seurat_clusters"

x<-DimPlot(SNL_CCSP_tumor, reduction = "umap",label=TRUE,pt.size=.1)+ggtitle("CCSP Tumor Cells by Cluster")
y<-DimPlot(SNL_CCSP_tumor, reduction = "umap", group.by = "tx",label=FALSE,pt.size=0.1)+ggtitle("CCSP Tumor Cells by Treatment")

multiplot(x,y,cols=2)

Convert Seurat object to Monocle3 CellDataSet (CDS)

my.so<-SNL_CCSP_tumor

# Project PC dimensions to whole data set
my.so <- ProjectDim(my.so, reduction = "pca")
## PC_ 1 
## Positive:  Ecm1, Clca3b, Fth1, Ly6a, Ltf, Psca, Gsto1, Map1lc3b, Gm9573, Cited2 
##     Slc39a4, Trp53inp2, Spns2, Ctsb, Plpp1, S100a14, AA467197, Gde1, Prss8, AI661453 
## Negative:  Krt8, Ptma, Cd63, Rpsa, Rpl3, Ppia, Lgals4, Rps18, Eef1a1, Dbi 
##     Fabp5, Prom1, Atp1a1, Rps2, Tagln2, Rpl12, Oat, Rps8, Agr2, Prdx1 
## PC_ 2 
## Positive:  Tff2, Selenop, Ern2, Agr2, Prom1, Ces1f, Lgals4, Ctse, Xist, Vsig1 
##     Cfi, Galnt6, Muc16, Paqr5, 5330417C22Rik, Zfp704, Tff1, Gstm1, Fabp5, Reg3g 
## Negative:  Tpm2, Cldn3, Tnfrsf11b, Tacstd2, Cldn4, Srd5a1, Gapdh, Urah, Cldn7, Anxa8 
##     Nupr1, Capg, Plat, Serpinb6b, Akr1b8, Arpc1b, Cwh43, H2afz, Aldoa, Tnip3 
## PC_ 3 
## Positive:  Ccdc153, Sec14l3, Tmem212, Enkur, 1110017D15Rik, Lrrc10b, Rsph1, Dnah12, Gm19935, Cfap126 
##     Cfap206, Mlf1, Mns1, Dnah9, Mapk15, Ccdc113, Tppp3, Gm867, Foxj1, Ccdc39 
## Negative:  Tff1, Ctse, Dpcr1, Anxa10, Lgals4, Clu, Rgs17, Vsig1, Fabp5, Cldn18 
##     Fut2, Vsig2, Ptprn2, Phgr1, Prss32, Ces1f, Tesc, Paqr5, Oit1, Pla2g10 
## PC_ 4 
## Positive:  Lgals3, Cnfn, Cysrt1, Krt4, Klk13, Gadd45b, Crct1, Krt14, Krt7, Klk10 
##     Krt13, Csta1, Ly6g6c, S100a10, Klk14, S100a14, Dmkn, Tcp11l2, S100a7a, 2300002M23Rik 
## Negative:  Pigr, Srd5a1, Muc4, Muc1, Myo1b, Basp1, Tnfrsf11b, Tmc5, Plat, Cd47 
##     Btg2, Pmepa1, Marcksl1, Gfpt1, Vtcn1, Rrbp1, Aplp2, Cyba, Cldn3, Galnt7 
## PC_ 5 
## Positive:  Cbr2, Ces1d, Tmem176b, Mgst1, Tmem176a, Cyp2f2, Chad, Hp, Ldhb, Cp 
##     Lrg1, Gsta3, 5330417C22Rik, Selenbp1, Ifitm3, Rassf9, Car8, Gas6, Gabrp, Wnt4 
## Negative:  Dpcr1, Gldc, Tff1, Cldn18, Il33, Ndnf, Cftr, B4galt6, Fst, Avil 
##     Areg, Kit, Anxa10, Bok, Emp3, Ereg, Bmyc, Krt20, Gcnt1, Epop

Form CDS

# Create an expression matrix
expression_matrix <- my.so@assays$RNA@counts

# Get cell metadata
cell_metadata <- my.so@meta.data
if (all.equal(colnames(expression_matrix), rownames(cell_metadata))) {
  print(sprintf("Cell identifiers match"))
} else {
  print(sprintf("Cell identifier mismatch - %i cells in expression matrix, %i cells in metadata",
                ncol(expression_matrix), nrow(cell_metadata)))
  print("If the counts are equal, sort differences will throw this error")
}
## [1] "Cell identifiers match"
# get gene annotations
gene_annotation <- data.frame(gene_short_name = rownames(my.so@assays$RNA), row.names = rownames(my.so@assays$RNA))
if (all.equal(rownames(expression_matrix), rownames(gene_annotation))) {
  print(sprintf("Gene identifiers all match"))
} else {
  print(sprintf("Gene identifier mismatch - %i genes in expression matrix, %i gene in gene annotation",
                nrow(expression_matrix), nrow(gene_annotation)))
  print("If the counts are equal, sort differences will throw this error")
}
## [1] "Gene identifiers all match"
# Seurat-derived CDS
my.cds <- new_cell_data_set(expression_matrix,
                            cell_metadata = cell_metadata,
                            gene_metadata = gene_annotation)

Transfer Seurat embeddings and cluster infoto get same UMAP

reducedDim(my.cds, type = "PCA") <- my.so@reductions$pca@cell.embeddings 
my.cds@preprocess_aux$prop_var_expl <- my.so@reductions$pca@stdev
plot_pc_variance_explained(my.cds)

my.cds@int_colData@listData$reducedDims$UMAP <- my.so@reductions$umap@cell.embeddings
plot_cells(my.cds)
## No trajectory to plot. Has learn_graph() been called yet?
## cluster not found in colData(cds), cells will not be colored
## cluster_cells() has not been called yet, can't color cells by cluster

# Copy cluster info from Seurat
my.cds@clusters$UMAP_so$clusters <- my.so@meta.data$gt_tp_cell_type_integrated_.0.9
my.cds <- cluster_cells(my.cds, reduction_method = "UMAP", resolution=1e-3)

Plot

x<-DimPlot(my.so, reduction = "umap")+ggtitle("Tumor cells by Seurat Cluster")
y<-plot_cells(my.cds, color_cells_by = "cluster", label_cell_groups=FALSE)+ggtitle("Tumor cells by Monocle Cluster")
## No trajectory to plot. Has learn_graph() been called yet?
multiplot(x,y, cols=2)

#Assign partition

plot_cells(my.cds, color_cells_by = "partition", group_label_size = 3.5)+ggtitle("Monocle Partition")
## No trajectory to plot. Has learn_graph() been called yet?

Learn graph (predict differentiation/progression pathways)

my.cds <- learn_graph(my.cds,use_partition=TRUE)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
plot_cells(my.cds,
           group_cells_by="cluster", color_cells_by="tx",
           label_groups_by_cluster=FALSE, label_cell_groups=FALSE,
           label_leaves=FALSE,
           label_branch_points=TRUE,cell_size = 1, label_roots=TRUE,
           group_label_size = 4,alpha=0.3)+scale_discrete_manual(aesthetics = c("colour", "fill"),values=c("darkorchid4","orange","turquoise"))
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

plot_cells(my.cds,
           group_cells_by="cluster", color_cells_by="tx",
           label_groups_by_cluster=FALSE, label_cell_groups=FALSE,
           label_principal_points = TRUE,
           group_label_size = 4,alpha=0.3)+scale_discrete_manual(aesthetics = c("colour", "fill"),values=c("darkorchid4","orange","turquoise"))

Order in pseudotime

Can use biological knowledge that start in basal cells… or experimental knowledge that we are studying how treatment changes control tissue.. so we start at control

Starting at control nodes.

my.cds <- order_cells(my.cds,root_pr_nodes=c("Y_43","Y_158"))

library(viridis)
plot_cells(my.cds,
           color_cells_by = "pseudotime",
           label_cell_groups=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE, label_roots = FALSE,
           graph_label_size=1.5, cell_size = 1,alpha=0.4)+scale_color_viridis(option="turbo") + ggtitle("Pseudotime")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

We know that CCSP tumors start in club cells.

Let’s look at markers of club cells.

plot_cells(my.cds, genes=c("Scgb1a1","Cyp2f2","Scgb3a2","Scgb3a1","Lypd2","H3","Upk3a","Nkx2-1","Sftpb"),
           label_cell_groups=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE,
           graph_label_size=1.5, cell_size = .8,alpha=0.5)+scale_color_viridis(option="viridis")+ggtitle("Club cell markers")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Looks like connecting branch, cluster 7 is most club like.

Look at hillock state vs mucinous adeno state in pseudotime…

x<-plot_cells(my.cds,
           color_cells_by = "hillock_combined1",
           label_cell_groups=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE, label_roots = FALSE,
           graph_label_size=1.5, cell_size = 1,alpha=0.7)+scale_color_viridis(option="magma") + ggtitle("Hillock signature")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
y<-plot_cells(my.cds,
           color_cells_by = "Mucinous_Adeno_Signature1",
           label_cell_groups=FALSE,
           label_leaves=FALSE,
           label_branch_points=FALSE, label_roots = FALSE,
           graph_label_size=1.5, cell_size = 1,alpha=0.7)+scale_color_viridis(option="magma") + ggtitle("Mucinous adeno signature")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
multiplot(x,y,cols=2)

Conclusions…

At time of treatment, we have one state that is high in hillock, one state that is high in mucinous adeno Both converge to a state reminiscent of club cell of origin

Let’s take a more unbiased approach …. (still a work in progress)