#Loading the Data

library(Seurat)

## Attaching SeuratObject

library(sctransform)
library(MAST)

## Loading required package: SingleCellExperiment

## Loading required package: SummarizedExperiment

## Loading required package: MatrixGenerics

## Loading required package: matrixStats

## 
## Attaching package: 'MatrixGenerics'

## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars

## Loading required package: GenomicRanges

## Loading required package: stats4

## Loading required package: BiocGenerics

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## Loading required package: IRanges

## Loading required package: GenomeInfoDb

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## 
## Attaching package: 'Biobase'

## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians

## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians

## 
## Attaching package: 'SummarizedExperiment'

## The following object is masked from 'package:SeuratObject':
## 
##     Assays

## The following object is masked from 'package:Seurat':
## 
##     Assays

library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ ggplot2 3.4.2     ✔ purrr   1.0.1
## ✔ tibble  3.2.1     ✔ dplyr   1.1.2
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse()   masks IRanges::collapse()
## ✖ dplyr::combine()    masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()      masks matrixStats::count()
## ✖ dplyr::desc()       masks IRanges::desc()
## ✖ tidyr::expand()     masks S4Vectors::expand()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::first()      masks S4Vectors::first()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()     masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()     masks S4Vectors::rename()
## ✖ dplyr::slice()      masks IRanges::slice()

library(rcna)
library(openxlsx)
library(HGNChelper)
library(harmony)

## Loading required package: Rcpp

library(glmGamPoi)

## 
## Attaching package: 'glmGamPoi'
## 
## The following object is masked from 'package:dplyr':
## 
##     vars
## 
## The following object is masked from 'package:ggplot2':
## 
##     vars

library(reshape2)

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(dplyr)
library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats)

setwd('/hpc/group/patellab/aam131/UW44SliceCultureNew/UW44scx')

slice = readRDS("annotatedUW44Data.RDS")
slice$Time <- factor(slice$Time, levels = c("D0", "D2", "D4","D5","D7","D10","D12","D14","D16"))
new.cluster.ids <- c("Tumor","Tumor","Tumor", "Radial Glial Cells","Tumor", "Microglia","Oligodendrocytes","Microglia", "Neuroepithelial Cells","Astrocytes","Oligodendrocytes","Pericytes","Microglia","Endothelial Cells","Neurons","Astrocytes","Neurons")
names(new.cluster.ids) <- levels(slice)
slice <- RenameIdents(slice, new.cluster.ids)
slice$AnnotedClustedShallow <- Idents(slice)
Idents(slice)<-'Time'
slice7and10<-subset(slice, idents=c('D7','D10'))

#Subsetting the Tumor

Idents(slice7and10)<-'AnnotedClustedShallow'
slice7and10.tumor <- subset(x = slice7and10, idents = c("Tumor"))


slice7and10.tumor <- FindVariableFeatures(object = slice7and10.tumor)
slice7and10.tumor <- RunPCA(object = slice7and10.tumor, verbose = TRUE, npcs = 100)

## PC_ 1 
## Positive:  LRMDA, FRMD4A, MSR1, SLC11A1, KCNMA1, FMN1, RBM47, NKAIN2, LNCAROD, MERTK 
##     LYN, PLCG2, EEPD1, DOCK8, CTSB, CNTNAP4, DSCAM, CPQ, BMP2K, TNS3 
##     FRMD4B, SAT1, SLC16A10, DOCK4, APBB1IP, EPB41L3, MAP7, KYNU, PPP2R2B, ELMO1 
## Negative:  RASGEF1B, CNTNAP2, GPC6, LRRC4C, TSHZ2, ALK, NRG3, NPAS3, FGF14, KCNIP4 
##     DCBLD2, LRFN5, GPR39, IL33, RGS6, PARM1, FAM155A, ADAMTSL1, PCDH7, FLRT2 
##     MMP16, DACH1, EHBP1, ADAMTS16, CACNB2, ROBO1, CADPS, HMGA2, NRCAM, SLIT2 
## PC_ 2 
## Positive:  RASGEF1B, GPC6, ADAMTSL1, TSHZ2, CHI3L1, NEAT1, IL33, NAMPT, HMGA2, DPYD 
##     LRFN5, SLIT2, TENM3, BRINP2, LRMDA, PSD3, PLXDC2, NFKBIZ, KIF26B, AC083837.1 
##     PTX3, DCBLD2, HDAC9, BICC1, SLC39A14, LINC01411, PDZD2, BNC2, AL078604.4, ST6GALNAC5 
## Negative:  NXPH1, CA10, LHFPL3, DMD, LRRTM3, SNTG1, KCND2, DSCAM, SGCZ, CHST9 
##     LRRTM4, DCLK2, DGKB, NRXN3, AQP4-AS1, PCDH15, ERBB4, GRID2, CADM2, DLGAP1 
##     CTNNA3, SOX6, LRP1B, OPHN1, ROBO2, PTPRZ1, AL139383.1, MYO16, LSAMP, CSMD3 
## PC_ 3 
## Positive:  CNTNAP2, ANKS1B, MMP16, LRRC4C, KCNQ5, DCBLD2, ALK, FLRT2, NRG3, CHSY3 
##     NEBL, SLC24A3, AL139383.1, OXR1, GRM7, DACH1, ROBO1, PCDH9, AC069437.1, PDGFRA 
##     MEIS1, TRPM3, PTPRZ1, PLCXD3, IGF2BP2, LDLRAD4, LINC00907, LRMDA, AL008633.1, P3H2 
## Negative:  CHI3L1, RBFOX1, SDK1, DIAPH3, SLIT2, BRINP2, DSCAM, DPYD, SGCZ, RASGEF1B 
##     NAMPT, GALNT17, AC083837.1, CHST9, NRG1, PSD3, APOLD1, GPC6, PDZD2, AQP4-AS1 
##     NTM, GLIS3, ZFPM2, ADAMTSL1, NRXN1, CDH13, LHFPL3, PTX3, BICC1, CACNA2D3 
## PC_ 4 
## Positive:  DIAPH3, HS3ST5, LRRC4C, RBFOX1, SNTG2, HTR4, FGF13, HMCN2, HDAC2-AS2, PDE4D 
##     NEK10, PLCB1, PLCXD3, CACNA2D3, GUCY1A2, HMGA2, APOLD1, SLC4A7, CDH13, BRIP1 
##     KIF18B, DSCAM, POLQ, DGKG, KCNQ5, SLC44A5, ST6GALNAC5, AC007319.1, ASPM, DCBLD2 
## Negative:  RASGEF1B, NAMPT, DCLK2, CHI3L1, SCN1A, SULF1, CNTNAP2, CA10, DPYD, AL139383.1 
##     AC083837.1, CHST9, DMD, AQP4-AS1, LRRTM3, NEAT1, TMEM108, RBMS3, NPAS3, PSD3 
##     FGF14, PDZD2, TSHZ2, IL33, NXPH1, GPC6, LHFPL3, TCF7L2, CACNB2, PRKD1 
## PC_ 5 
## Positive:  RASGEF1B, RGS6, PARM1, NEK10, PLXDC2, GPR39, ADAMTS16, LRRC4C, ALK, EPHA5 
##     GPM6A, SNTG2, TNR, LRFN5, TAFA2, KCNIP4, ABLIM1, SYBU, FGF1, ST8SIA4 
##     AL139231.1, BRINP2, HTR1E, AC124254.2, GRIK2, EFEMP1, COL14A1, PARD3B, EPHA3, SLC4A7 
## Negative:  HMGA2, DIAPH3, DCBLD2, MMP16, SLC44A5, HMGA2-AS1, CSMD1, LHFPL3, PDZRN3, FLRT2 
##     PBX3, NRXN3, SGCZ, HS2ST1, LINC01411, FAM155A, GRIA4, APOLD1, CACNB2, GPC6 
##     ZFPM2, RRM2, BRIP1, GALNT17, KIF18B, KCND2, ASPM, KCNQ5, BTBD11, MDGA2

slice7and10.tumor <- ProjectDim(object = slice7and10.tumor)

## PC_ 1 
## Positive:  LRMDA, FRMD4A, MSR1, SLC11A1, KCNMA1, FMN1, RBM47, NKAIN2, LNCAROD, MERTK 
##     LYN, PLCG2, EEPD1, DOCK8, CTSB, CNTNAP4, DSCAM, CPQ, BMP2K, TNS3 
## Negative:  RASGEF1B, CNTNAP2, GPC6, LRRC4C, TSHZ2, ALK, NRG3, NPAS3, FGF14, KCNIP4 
##     DCBLD2, LRFN5, GPR39, IL33, RGS6, PARM1, FAM155A, ADAMTSL1, PCDH7, FLRT2 
## PC_ 2 
## Positive:  RASGEF1B, GPC6, ADAMTSL1, TSHZ2, CHI3L1, NEAT1, IL33, NAMPT, HMGA2, DPYD 
##     LRFN5, SLIT2, TENM3, BRINP2, LRMDA, PSD3, PLXDC2, NFKBIZ, KIF26B, AC083837.1 
## Negative:  NXPH1, CA10, LHFPL3, DMD, LRRTM3, SNTG1, KCND2, DSCAM, SGCZ, CHST9 
##     LRRTM4, DCLK2, DGKB, NRXN3, AQP4-AS1, PCDH15, ERBB4, GRID2, CADM2, DLGAP1 
## PC_ 3 
## Positive:  CNTNAP2, ANKS1B, MMP16, LRRC4C, KCNQ5, DCBLD2, ALK, FLRT2, NRG3, CHSY3 
##     NEBL, SLC24A3, AL139383.1, OXR1, GRM7, DACH1, ROBO1, PCDH9, AC069437.1, PDGFRA 
## Negative:  CHI3L1, RBFOX1, SDK1, DIAPH3, SLIT2, BRINP2, DSCAM, DPYD, SGCZ, RASGEF1B 
##     NAMPT, GALNT17, AC083837.1, CHST9, NRG1, PSD3, APOLD1, GPC6, PDZD2, AQP4-AS1 
## PC_ 4 
## Positive:  DIAPH3, HS3ST5, LRRC4C, RBFOX1, SNTG2, HTR4, FGF13, HMCN2, HDAC2-AS2, PDE4D 
##     NEK10, PLCB1, PLCXD3, CACNA2D3, GUCY1A2, HMGA2, APOLD1, SLC4A7, CDH13, BRIP1 
## Negative:  RASGEF1B, NAMPT, DCLK2, CHI3L1, SCN1A, SULF1, CNTNAP2, CA10, DPYD, AL139383.1 
##     AC083837.1, CHST9, DMD, AQP4-AS1, LRRTM3, NEAT1, TMEM108, RBMS3, NPAS3, PSD3 
## PC_ 5 
## Positive:  RASGEF1B, RGS6, PARM1, NEK10, PLXDC2, GPR39, ADAMTS16, LRRC4C, ALK, EPHA5 
##     GPM6A, SNTG2, TNR, LRFN5, TAFA2, KCNIP4, ABLIM1, SYBU, FGF1, ST8SIA4 
## Negative:  HMGA2, DIAPH3, DCBLD2, MMP16, SLC44A5, HMGA2-AS1, CSMD1, LHFPL3, PDZRN3, FLRT2 
##     PBX3, NRXN3, SGCZ, HS2ST1, LINC01411, FAM155A, GRIA4, APOLD1, CACNB2, GPC6

slice7and10.tumor <- FindNeighbors(slice7and10.tumor, reduction = "pca", dims = 1:40)

## Computing nearest neighbor graph

## Computing SNN

slice7and10.tumor <- FindClusters(slice7and10.tumor, resolution = 0.5)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3075
## Number of edges: 144181
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7405
## Number of communities: 8
## Elapsed time: 0 seconds

slice7and10.tumor <- RunUMAP(slice7and10.tumor, reduction = "pca", dims = 1:40)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 22:57:54 UMAP embedding parameters a = 0.9922 b = 1.112

## 22:57:54 Read 3075 rows and found 40 numeric columns

## 22:57:54 Using Annoy for neighbor search, n_neighbors = 30

## 22:57:54 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 22:57:54 Writing NN index file to temp file /tmp/RtmpfGXSUW/file3e5ced442940cd
## 22:57:54 Searching Annoy index using 1 thread, search_k = 3000
## 22:57:55 Annoy recall = 100%
## 22:57:55 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 22:57:57 Initializing from normalized Laplacian + noise (using irlba)
## 22:57:57 Commencing optimization for 500 epochs, with 124212 positive edges
## 22:58:00 Optimization finished

DimPlot(slice7and10.tumor,pt.size = 1.5, label=TRUE)+ NoLegend()

DimPlot(slice7and10.tumor,pt.size = 1.5, split.by="Condition")+ NoLegend()

#CNA Analysis of the Tumor Clusters

library(harmony)
library(Seurat)
library(sctransform)
library(MAST)
library(tidyverse)
library(rcna)
library(glmGamPoi)
library(reshape2)
library(dplyr)
library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot) 
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))

## List of 1
##  $ plot.title:List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "plain"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

library(forcats)
library(glue)

## 
## Attaching package: 'glue'

## The following object is masked from 'package:SummarizedExperiment':
## 
##     trim

## The following object is masked from 'package:GenomicRanges':
## 
##     trim

## The following object is masked from 'package:IRanges':
## 
##     trim

library(ggthemes)

## 
## Attaching package: 'ggthemes'

## The following object is masked from 'package:cowplot':
## 
##     theme_map

slice7and10.tumor@meta.data$ConditionNum <- as.numeric(factor(slice7and10.tumor@meta.data$Condition, c('Ctrl', 'Ivosidenib','XRT/TMZ')))
Idents(slice7and10.tumor)<-'Condition'

ivo<-subset(slice7and10.tumor, idents=c('Ctrl','Ivosidenib'))
ivo<-FindNeighbors(ivo,reduction='harmony')

## Computing nearest neighbor graph

## Computing SNN

ivo<-association.Seurat(
  seurat_object = ivo, 
  test_var = 'ConditionNum', 
  samplem_key = 'Time', 
  graph_use = 'SCT_nn', 
  verbose = TRUE,
  batches = NULL, ## no batch variables to include
  covs = NULL, ## no covariates to include 
)

## Build NAM PCs

## stopping after 4 steps

## only one unique batch supplied to qc

## only one unique batch supplied to prep

## Perform association testing

## Warning in .association(NAMsvd = list(nam_res$NAM_sampleXpc, nam_res$NAM_svs, :
## data supported use of 2 NAM PCs, which is the maximum considered. Consider
## allowing more PCs by using the "ks" argument.

## computing neighborhood-level FDRs

FeaturePlot(ivo, features = c('cna_ncorrs'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', color = 'Correlation',subtitle = sprintf('global p=%0.3f', ivo@reductions$cna@misc$p))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(ivo, features = c('cna_ncorrs_fdr10'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', subtitle = 'Filtered for FDR<0.10', color = 'Correlation')

## Warning in FeaturePlot(ivo, features = c("cna_ncorrs_fdr10")): All cells have
## the same value (0) of cna_ncorrs_fdr10.

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

DimPlot(ivo, group.by = 'Condition')[[1]] + scale_color_tableau() + labs(title = 'Condition')

xrt<-subset(slice7and10.tumor, idents=c('Ctrl','XRT/TMZ'))
xrt<-FindNeighbors(xrt,reduction='harmony')

## Computing nearest neighbor graph

## Computing SNN

xrt<-association.Seurat(
  seurat_object = xrt, 
  test_var = 'ConditionNum', 
  samplem_key = 'Time', 
  graph_use = 'SCT_nn', 
  verbose = TRUE,
  batches = NULL, ## no batch variables to include
  covs = NULL, ## no covariates to include 
)

## Build NAM PCs

## stopping after 4 steps

## only one unique batch supplied to qc

## only one unique batch supplied to prep

## Perform association testing

## Warning in .association(NAMsvd = list(nam_res$NAM_sampleXpc, nam_res$NAM_svs, :
## data supported use of 2 NAM PCs, which is the maximum considered. Consider
## allowing more PCs by using the "ks" argument.

## computing neighborhood-level FDRs

FeaturePlot(xrt, features = c('cna_ncorrs'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', color = 'Correlation',subtitle = sprintf('global p=%0.3f', xrt@reductions$cna@misc$p))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(xrt, features = c('cna_ncorrs_fdr10'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', subtitle = 'Filtered for FDR<0.10', color = 'Correlation')

## Warning in FeaturePlot(xrt, features = c("cna_ncorrs_fdr10")): All cells have
## the same value (0) of cna_ncorrs_fdr10.

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

DimPlot(xrt, group.by = 'Condition')[[1]] + scale_color_tableau() + labs(title = 'Condition')

ivovsxrt<-subset(slice7and10.tumor, idents=c('Ivosidenib','XRT/TMZ'))
ivovsxrt<-FindNeighbors(ivovsxrt,reduction='harmony')

## Computing nearest neighbor graph

## Computing SNN

ivovsxrt<-association.Seurat(
  seurat_object = ivovsxrt, 
  test_var = 'ConditionNum', 
  samplem_key = 'Time', 
  graph_use = 'SCT_nn', 
  verbose = TRUE,
  batches = NULL, ## no batch variables to include
  covs = NULL, ## no covariates to include 
)

## Build NAM PCs

## stopping after 4 steps

## only one unique batch supplied to qc

## only one unique batch supplied to prep

## Perform association testing

## Warning in .association(NAMsvd = list(nam_res$NAM_sampleXpc, nam_res$NAM_svs, :
## data supported use of 2 NAM PCs, which is the maximum considered. Consider
## allowing more PCs by using the "ks" argument.

## computing neighborhood-level FDRs

FeaturePlot(ivovsxrt, features = c('cna_ncorrs'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', color = 'Correlation',subtitle = sprintf('global p=%0.3f', ivovsxrt@reductions$cna@misc$p))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(ivovsxrt, features = c('cna_ncorrs_fdr10'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', subtitle = 'Filtered for FDR<0.10', color = 'Correlation')

## Warning in FeaturePlot(ivovsxrt, features = c("cna_ncorrs_fdr10")): All cells
## have the same value (0) of cna_ncorrs_fdr10.

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

DimPlot(ivovsxrt, group.by = 'Condition')[[1]] + scale_color_tableau() + labs(title = 'Condition')

##Slingshot

cell_pal <- function(cell_vars, pal_fun,...) {
  if (is.numeric(cell_vars)) {
    pal <- pal_fun(100, ...)
    return(pal[cut(cell_vars, breaks = 100)])
  } else {
    categories <- sort(unique(cell_vars))
    pal <- setNames(pal_fun(length(categories), ...), categories)
    return(pal[cell_vars])
  }
}


library(TSCAN)

## Loading required package: TrajectoryUtils

library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

library(scater)

## Loading required package: scuttle

library(viridis)

## Loading required package: viridisLite

## 
## Attaching package: 'viridis'

## The following object is masked from 'package:scales':
## 
##     viridis_pal

suppressPackageStartupMessages({
  library(slingshot)
})

set.seed(1)
sds <- slingshot(Embeddings(slice7and10.tumor, "umap"), clusterLabels = slice7and10.tumor$seurat_clusters)
sds <-SlingshotDataSet(sds)
cell_colors <- cell_pal(slice7and10.tumor$seurat_clusters, brewer_pal("qual", "Set2"))
cell_colors_clust <- cell_pal(slice7and10.tumor$seurat_clusters, hue_pal())

plot(reducedDim(sds), col = cell_colors_clust, pch = 16, cex = 0.5)
lines(sds, lwd = 2, type = 'lineages', col = 'black')

nc <- 3
pt <- slingPseudotime(sds)
nms <- colnames(pt)
nr <- ceiling(length(nms)/nc)
pal <- viridis(100, end = 0.95)
plot(reducedDim(sds), col = cell_colors_clust, pch = 16, cex = 0.5)

par(mfrow = c(nr, nc))
for (i in nms) {
  colors <- pal[cut(pt[,i], breaks = 100)]
  plot(reducedDim(sds), col = colors, pch = 16, cex = 0.5, main = i)
  lines(sds, lwd = 2, col = 'black', type = 'lineages')
}

sds@lineages

## $Lineage1
## [1] "7" "0" "4" "5"
## 
## $Lineage2
## [1] "7" "0" "4" "2"
## 
## $Lineage3
## [1] "7" "0" "4" "1"
## 
## $Lineage4
## [1] "7" "0" "4" "3"
## 
## $Lineage5
## [1] "7" "0" "4" "6"

# top_hvg
top_hvg <- HVFInfo(slice7and10.tumor) %>% 
  mutate(., bc = rownames(.)) %>% 
  arrange(desc(variance)) %>% 
  top_n(300, variance) %>% 
  pull(bc)

#Slingshot Genes Lineage 1

dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,1], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)

## 
## Attaching package: 'rsample'

## The following object is masked from 'package:scater':
## 
##     bootstraps

## The following object is masked from 'package:Rcpp':
## 
##     populate

library(tidymodels)

## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.0 ──

## ✔ broom        1.0.4     ✔ recipes      1.0.6
## ✔ dials        1.2.0     ✔ tune         1.1.1
## ✔ infer        1.0.4     ✔ workflows    1.1.3
## ✔ modeldata    1.1.0     ✔ workflowsets 1.0.1
## ✔ parsnip      1.1.0     ✔ yardstick    1.2.0

## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ rsample::bootstraps() masks scater::bootstraps()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ scales::discard()     masks purrr::discard()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks S4Vectors::first()
## ✖ recipes::fixed()      masks stringr::fixed()
## ✖ infer::generate()     masks sctransform::generate()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ rsample::populate()   masks Rcpp::populate()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ .GlobalEnv::slice()   masks dplyr::slice(), IRanges::slice()
## ✖ yardstick::spec()     masks readr::spec()
## ✖ recipes::step()       masks stats::step()
## ✖ recipes::update()     masks MAST::update(), GenomicRanges::update(), stats4::update(), stats::update()
## ✖ glmGamPoi::vars()     masks dplyr::vars(), ggplot2::vars()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org

library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)

model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
  set_engine("ranger", importance = "impurity", num.threads = 3) %>%
  fit(pseudotime ~ ., data = dat_train)

val_results <- dat_val %>% 
  mutate(estimate = predict(model, .[,-1]) %>% pull()) %>% 
  select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)

summary(dat_use_df$pseudotime)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.591   5.788   5.925   8.055  11.726

var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])


top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))

par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
  colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
  plot(reducedDim(sds), col = colors, 
       pch = 16, cex = 0.5, main = top_genes[i])
  lines(sds, lwd = 2, col = 'black', type = 'lineages')
}

#Slingshot Genes Lineage 2

dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,2], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)
library(tidymodels)
library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)

model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
  set_engine("ranger", importance = "impurity", num.threads = 3) %>%
  fit(pseudotime ~ ., data = dat_train)

val_results <- dat_val %>% 
  mutate(estimate = predict(model, .[,-1]) %>% pull()) %>% 
  select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)

summary(dat_use_df$pseudotime)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.573   5.713   5.978   8.325  11.955

var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])


top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))

par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
  colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
  plot(reducedDim(sds), col = colors, 
       pch = 16, cex = 0.5, main = top_genes[i])
  lines(sds, lwd = 2, col = 'black', type = 'lineages')
}

#Slingshot Genes Lineage 3

dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,3], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)
library(tidymodels)
library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)

model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
  set_engine("ranger", importance = "impurity", num.threads = 3) %>%
  fit(pseudotime ~ ., data = dat_train)

val_results <- dat_val %>% 
  mutate(estimate = predict(model, .[,-1]) %>% pull()) %>% 
  select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)

summary(dat_use_df$pseudotime)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.705   6.286   6.482   9.213  13.898

var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])


top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))

par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
  colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
  plot(reducedDim(sds), col = colors, 
       pch = 16, cex = 0.5, main = top_genes[i])
  lines(sds, lwd = 2, col = 'black', type = 'lineages')
}

#Slingshot Genes Lineage 4

dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,4], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)
library(tidymodels)
library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)

model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
  set_engine("ranger", importance = "impurity", num.threads = 3) %>%
  fit(pseudotime ~ ., data = dat_train)

val_results <- dat_val %>% 
  mutate(estimate = predict(model, .[,-1]) %>% pull()) %>% 
  select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)

summary(dat_use_df$pseudotime)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.534   5.723   5.983   8.478  11.557

var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])


top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))

par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
  colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
  plot(reducedDim(sds), col = colors, 
       pch = 16, cex = 0.5, main = top_genes[i])
  lines(sds, lwd = 2, col = 'black', type = 'lineages')
}

#Slingshot Genes Lineage 5

dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,5], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)
library(tidymodels)
library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)

model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
  set_engine("ranger", importance = "impurity", num.threads = 3) %>%
  fit(pseudotime ~ ., data = dat_train)

val_results <- dat_val %>% 
  mutate(estimate = predict(model, .[,-1]) %>% pull()) %>% 
  select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)

summary(dat_use_df$pseudotime)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.691   5.947   6.370   8.715  12.980

var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])


top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))

par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
  colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
  plot(reducedDim(sds), col = colors, 
       pch = 16, cex = 0.5, main = top_genes[i])
  lines(sds, lwd = 2, col = 'black', type = 'lineages')
}

Code to map genes

#par(mfrow = c(1, 1))
#par(mar = c(5.1, 4.1, 4.1, 2.1))
#gene <- "Ifng"
#colors <- pal[cut(dat_use[,gene], breaks = 100)]
#plot(reducedDim(sds), col = colors, 
#     pch = 16, cex = 0.5, main = gene)
#lines(sds, lwd = 2, col = 'black', type = 'lineages')

UW44 Patient Slice Culture Analysis pt.3

Code to map genes