#Loading the Data
library(Seurat)
## Attaching SeuratObject
library(sctransform)
library(MAST)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.2 ✔ purrr 1.0.1
## ✔ tibble 3.2.1 ✔ dplyr 1.1.2
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks IRanges::slice()
library(rcna)
library(openxlsx)
library(HGNChelper)
library(harmony)
## Loading required package: Rcpp
library(glmGamPoi)
##
## Attaching package: 'glmGamPoi'
##
## The following object is masked from 'package:dplyr':
##
## vars
##
## The following object is masked from 'package:ggplot2':
##
## vars
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(dplyr)
library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot)
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))
## List of 1
## $ plot.title:List of 11
## ..$ family : NULL
## ..$ face : chr "plain"
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0.5
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
library(forcats)
setwd('/hpc/group/patellab/aam131/UW44SliceCultureNew/UW44scx')
slice = readRDS("annotatedUW44Data.RDS")
slice$Time <- factor(slice$Time, levels = c("D0", "D2", "D4","D5","D7","D10","D12","D14","D16"))
new.cluster.ids <- c("Tumor","Tumor","Tumor", "Radial Glial Cells","Tumor", "Microglia","Oligodendrocytes","Microglia", "Neuroepithelial Cells","Astrocytes","Oligodendrocytes","Pericytes","Microglia","Endothelial Cells","Neurons","Astrocytes","Neurons")
names(new.cluster.ids) <- levels(slice)
slice <- RenameIdents(slice, new.cluster.ids)
slice$AnnotedClustedShallow <- Idents(slice)
Idents(slice)<-'Time'
slice7and10<-subset(slice, idents=c('D7','D10'))
#Subsetting the Tumor
Idents(slice7and10)<-'AnnotedClustedShallow'
slice7and10.tumor <- subset(x = slice7and10, idents = c("Tumor"))
slice7and10.tumor <- FindVariableFeatures(object = slice7and10.tumor)
slice7and10.tumor <- RunPCA(object = slice7and10.tumor, verbose = TRUE, npcs = 100)
## PC_ 1
## Positive: LRMDA, FRMD4A, MSR1, SLC11A1, KCNMA1, FMN1, RBM47, NKAIN2, LNCAROD, MERTK
## LYN, PLCG2, EEPD1, DOCK8, CTSB, CNTNAP4, DSCAM, CPQ, BMP2K, TNS3
## FRMD4B, SAT1, SLC16A10, DOCK4, APBB1IP, EPB41L3, MAP7, KYNU, PPP2R2B, ELMO1
## Negative: RASGEF1B, CNTNAP2, GPC6, LRRC4C, TSHZ2, ALK, NRG3, NPAS3, FGF14, KCNIP4
## DCBLD2, LRFN5, GPR39, IL33, RGS6, PARM1, FAM155A, ADAMTSL1, PCDH7, FLRT2
## MMP16, DACH1, EHBP1, ADAMTS16, CACNB2, ROBO1, CADPS, HMGA2, NRCAM, SLIT2
## PC_ 2
## Positive: RASGEF1B, GPC6, ADAMTSL1, TSHZ2, CHI3L1, NEAT1, IL33, NAMPT, HMGA2, DPYD
## LRFN5, SLIT2, TENM3, BRINP2, LRMDA, PSD3, PLXDC2, NFKBIZ, KIF26B, AC083837.1
## PTX3, DCBLD2, HDAC9, BICC1, SLC39A14, LINC01411, PDZD2, BNC2, AL078604.4, ST6GALNAC5
## Negative: NXPH1, CA10, LHFPL3, DMD, LRRTM3, SNTG1, KCND2, DSCAM, SGCZ, CHST9
## LRRTM4, DCLK2, DGKB, NRXN3, AQP4-AS1, PCDH15, ERBB4, GRID2, CADM2, DLGAP1
## CTNNA3, SOX6, LRP1B, OPHN1, ROBO2, PTPRZ1, AL139383.1, MYO16, LSAMP, CSMD3
## PC_ 3
## Positive: CNTNAP2, ANKS1B, MMP16, LRRC4C, KCNQ5, DCBLD2, ALK, FLRT2, NRG3, CHSY3
## NEBL, SLC24A3, AL139383.1, OXR1, GRM7, DACH1, ROBO1, PCDH9, AC069437.1, PDGFRA
## MEIS1, TRPM3, PTPRZ1, PLCXD3, IGF2BP2, LDLRAD4, LINC00907, LRMDA, AL008633.1, P3H2
## Negative: CHI3L1, RBFOX1, SDK1, DIAPH3, SLIT2, BRINP2, DSCAM, DPYD, SGCZ, RASGEF1B
## NAMPT, GALNT17, AC083837.1, CHST9, NRG1, PSD3, APOLD1, GPC6, PDZD2, AQP4-AS1
## NTM, GLIS3, ZFPM2, ADAMTSL1, NRXN1, CDH13, LHFPL3, PTX3, BICC1, CACNA2D3
## PC_ 4
## Positive: DIAPH3, HS3ST5, LRRC4C, RBFOX1, SNTG2, HTR4, FGF13, HMCN2, HDAC2-AS2, PDE4D
## NEK10, PLCB1, PLCXD3, CACNA2D3, GUCY1A2, HMGA2, APOLD1, SLC4A7, CDH13, BRIP1
## KIF18B, DSCAM, POLQ, DGKG, KCNQ5, SLC44A5, ST6GALNAC5, AC007319.1, ASPM, DCBLD2
## Negative: RASGEF1B, NAMPT, DCLK2, CHI3L1, SCN1A, SULF1, CNTNAP2, CA10, DPYD, AL139383.1
## AC083837.1, CHST9, DMD, AQP4-AS1, LRRTM3, NEAT1, TMEM108, RBMS3, NPAS3, PSD3
## FGF14, PDZD2, TSHZ2, IL33, NXPH1, GPC6, LHFPL3, TCF7L2, CACNB2, PRKD1
## PC_ 5
## Positive: RASGEF1B, RGS6, PARM1, NEK10, PLXDC2, GPR39, ADAMTS16, LRRC4C, ALK, EPHA5
## GPM6A, SNTG2, TNR, LRFN5, TAFA2, KCNIP4, ABLIM1, SYBU, FGF1, ST8SIA4
## AL139231.1, BRINP2, HTR1E, AC124254.2, GRIK2, EFEMP1, COL14A1, PARD3B, EPHA3, SLC4A7
## Negative: HMGA2, DIAPH3, DCBLD2, MMP16, SLC44A5, HMGA2-AS1, CSMD1, LHFPL3, PDZRN3, FLRT2
## PBX3, NRXN3, SGCZ, HS2ST1, LINC01411, FAM155A, GRIA4, APOLD1, CACNB2, GPC6
## ZFPM2, RRM2, BRIP1, GALNT17, KIF18B, KCND2, ASPM, KCNQ5, BTBD11, MDGA2
slice7and10.tumor <- ProjectDim(object = slice7and10.tumor)
## PC_ 1
## Positive: LRMDA, FRMD4A, MSR1, SLC11A1, KCNMA1, FMN1, RBM47, NKAIN2, LNCAROD, MERTK
## LYN, PLCG2, EEPD1, DOCK8, CTSB, CNTNAP4, DSCAM, CPQ, BMP2K, TNS3
## Negative: RASGEF1B, CNTNAP2, GPC6, LRRC4C, TSHZ2, ALK, NRG3, NPAS3, FGF14, KCNIP4
## DCBLD2, LRFN5, GPR39, IL33, RGS6, PARM1, FAM155A, ADAMTSL1, PCDH7, FLRT2
## PC_ 2
## Positive: RASGEF1B, GPC6, ADAMTSL1, TSHZ2, CHI3L1, NEAT1, IL33, NAMPT, HMGA2, DPYD
## LRFN5, SLIT2, TENM3, BRINP2, LRMDA, PSD3, PLXDC2, NFKBIZ, KIF26B, AC083837.1
## Negative: NXPH1, CA10, LHFPL3, DMD, LRRTM3, SNTG1, KCND2, DSCAM, SGCZ, CHST9
## LRRTM4, DCLK2, DGKB, NRXN3, AQP4-AS1, PCDH15, ERBB4, GRID2, CADM2, DLGAP1
## PC_ 3
## Positive: CNTNAP2, ANKS1B, MMP16, LRRC4C, KCNQ5, DCBLD2, ALK, FLRT2, NRG3, CHSY3
## NEBL, SLC24A3, AL139383.1, OXR1, GRM7, DACH1, ROBO1, PCDH9, AC069437.1, PDGFRA
## Negative: CHI3L1, RBFOX1, SDK1, DIAPH3, SLIT2, BRINP2, DSCAM, DPYD, SGCZ, RASGEF1B
## NAMPT, GALNT17, AC083837.1, CHST9, NRG1, PSD3, APOLD1, GPC6, PDZD2, AQP4-AS1
## PC_ 4
## Positive: DIAPH3, HS3ST5, LRRC4C, RBFOX1, SNTG2, HTR4, FGF13, HMCN2, HDAC2-AS2, PDE4D
## NEK10, PLCB1, PLCXD3, CACNA2D3, GUCY1A2, HMGA2, APOLD1, SLC4A7, CDH13, BRIP1
## Negative: RASGEF1B, NAMPT, DCLK2, CHI3L1, SCN1A, SULF1, CNTNAP2, CA10, DPYD, AL139383.1
## AC083837.1, CHST9, DMD, AQP4-AS1, LRRTM3, NEAT1, TMEM108, RBMS3, NPAS3, PSD3
## PC_ 5
## Positive: RASGEF1B, RGS6, PARM1, NEK10, PLXDC2, GPR39, ADAMTS16, LRRC4C, ALK, EPHA5
## GPM6A, SNTG2, TNR, LRFN5, TAFA2, KCNIP4, ABLIM1, SYBU, FGF1, ST8SIA4
## Negative: HMGA2, DIAPH3, DCBLD2, MMP16, SLC44A5, HMGA2-AS1, CSMD1, LHFPL3, PDZRN3, FLRT2
## PBX3, NRXN3, SGCZ, HS2ST1, LINC01411, FAM155A, GRIA4, APOLD1, CACNB2, GPC6
slice7and10.tumor <- FindNeighbors(slice7and10.tumor, reduction = "pca", dims = 1:40)
## Computing nearest neighbor graph
## Computing SNN
slice7and10.tumor <- FindClusters(slice7and10.tumor, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3075
## Number of edges: 144181
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7405
## Number of communities: 8
## Elapsed time: 0 seconds
slice7and10.tumor <- RunUMAP(slice7and10.tumor, reduction = "pca", dims = 1:40)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 22:57:54 UMAP embedding parameters a = 0.9922 b = 1.112
## 22:57:54 Read 3075 rows and found 40 numeric columns
## 22:57:54 Using Annoy for neighbor search, n_neighbors = 30
## 22:57:54 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 22:57:54 Writing NN index file to temp file /tmp/RtmpfGXSUW/file3e5ced442940cd
## 22:57:54 Searching Annoy index using 1 thread, search_k = 3000
## 22:57:55 Annoy recall = 100%
## 22:57:55 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 22:57:57 Initializing from normalized Laplacian + noise (using irlba)
## 22:57:57 Commencing optimization for 500 epochs, with 124212 positive edges
## 22:58:00 Optimization finished
DimPlot(slice7and10.tumor,pt.size = 1.5, label=TRUE)+ NoLegend()
DimPlot(slice7and10.tumor,pt.size = 1.5, split.by="Condition")+ NoLegend()
#CNA Analysis of the Tumor Clusters
library(harmony)
library(Seurat)
library(sctransform)
library(MAST)
library(tidyverse)
library(rcna)
library(glmGamPoi)
library(reshape2)
library(dplyr)
library(ggrepel)
options(ggrepel.max.overlaps = Inf)
library(cowplot)
theme_set(theme_cowplot())
theme(plot.title = element_text(hjust = 0.5,face = "plain"))
## List of 1
## $ plot.title:List of 11
## ..$ family : NULL
## ..$ face : chr "plain"
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0.5
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
library(forcats)
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:SummarizedExperiment':
##
## trim
## The following object is masked from 'package:GenomicRanges':
##
## trim
## The following object is masked from 'package:IRanges':
##
## trim
library(ggthemes)
##
## Attaching package: 'ggthemes'
## The following object is masked from 'package:cowplot':
##
## theme_map
slice7and10.tumor@meta.data$ConditionNum <- as.numeric(factor(slice7and10.tumor@meta.data$Condition, c('Ctrl', 'Ivosidenib','XRT/TMZ')))
Idents(slice7and10.tumor)<-'Condition'
ivo<-subset(slice7and10.tumor, idents=c('Ctrl','Ivosidenib'))
ivo<-FindNeighbors(ivo,reduction='harmony')
## Computing nearest neighbor graph
## Computing SNN
ivo<-association.Seurat(
seurat_object = ivo,
test_var = 'ConditionNum',
samplem_key = 'Time',
graph_use = 'SCT_nn',
verbose = TRUE,
batches = NULL, ## no batch variables to include
covs = NULL, ## no covariates to include
)
## Build NAM PCs
## stopping after 4 steps
## only one unique batch supplied to qc
## only one unique batch supplied to prep
## Perform association testing
## Warning in .association(NAMsvd = list(nam_res$NAM_sampleXpc, nam_res$NAM_svs, :
## data supported use of 2 NAM PCs, which is the maximum considered. Consider
## allowing more PCs by using the "ks" argument.
## computing neighborhood-level FDRs
FeaturePlot(ivo, features = c('cna_ncorrs'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', color = 'Correlation',subtitle = sprintf('global p=%0.3f', ivo@reductions$cna@misc$p))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
FeaturePlot(ivo, features = c('cna_ncorrs_fdr10'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', subtitle = 'Filtered for FDR<0.10', color = 'Correlation')
## Warning in FeaturePlot(ivo, features = c("cna_ncorrs_fdr10")): All cells have
## the same value (0) of cna_ncorrs_fdr10.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
DimPlot(ivo, group.by = 'Condition')[[1]] + scale_color_tableau() + labs(title = 'Condition')
xrt<-subset(slice7and10.tumor, idents=c('Ctrl','XRT/TMZ'))
xrt<-FindNeighbors(xrt,reduction='harmony')
## Computing nearest neighbor graph
## Computing SNN
xrt<-association.Seurat(
seurat_object = xrt,
test_var = 'ConditionNum',
samplem_key = 'Time',
graph_use = 'SCT_nn',
verbose = TRUE,
batches = NULL, ## no batch variables to include
covs = NULL, ## no covariates to include
)
## Build NAM PCs
## stopping after 4 steps
## only one unique batch supplied to qc
## only one unique batch supplied to prep
## Perform association testing
## Warning in .association(NAMsvd = list(nam_res$NAM_sampleXpc, nam_res$NAM_svs, :
## data supported use of 2 NAM PCs, which is the maximum considered. Consider
## allowing more PCs by using the "ks" argument.
## computing neighborhood-level FDRs
FeaturePlot(xrt, features = c('cna_ncorrs'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', color = 'Correlation',subtitle = sprintf('global p=%0.3f', xrt@reductions$cna@misc$p))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
FeaturePlot(xrt, features = c('cna_ncorrs_fdr10'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', subtitle = 'Filtered for FDR<0.10', color = 'Correlation')
## Warning in FeaturePlot(xrt, features = c("cna_ncorrs_fdr10")): All cells have
## the same value (0) of cna_ncorrs_fdr10.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
DimPlot(xrt, group.by = 'Condition')[[1]] + scale_color_tableau() + labs(title = 'Condition')
ivovsxrt<-subset(slice7and10.tumor, idents=c('Ivosidenib','XRT/TMZ'))
ivovsxrt<-FindNeighbors(ivovsxrt,reduction='harmony')
## Computing nearest neighbor graph
## Computing SNN
ivovsxrt<-association.Seurat(
seurat_object = ivovsxrt,
test_var = 'ConditionNum',
samplem_key = 'Time',
graph_use = 'SCT_nn',
verbose = TRUE,
batches = NULL, ## no batch variables to include
covs = NULL, ## no covariates to include
)
## Build NAM PCs
## stopping after 4 steps
## only one unique batch supplied to qc
## only one unique batch supplied to prep
## Perform association testing
## Warning in .association(NAMsvd = list(nam_res$NAM_sampleXpc, nam_res$NAM_svs, :
## data supported use of 2 NAM PCs, which is the maximum considered. Consider
## allowing more PCs by using the "ks" argument.
## computing neighborhood-level FDRs
FeaturePlot(ivovsxrt, features = c('cna_ncorrs'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', color = 'Correlation',subtitle = sprintf('global p=%0.3f', ivovsxrt@reductions$cna@misc$p))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
FeaturePlot(ivovsxrt, features = c('cna_ncorrs_fdr10'))[[1]] + scale_color_gradient2_tableau() + labs(title = 'CNA by Condition', subtitle = 'Filtered for FDR<0.10', color = 'Correlation')
## Warning in FeaturePlot(ivovsxrt, features = c("cna_ncorrs_fdr10")): All cells
## have the same value (0) of cna_ncorrs_fdr10.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
DimPlot(ivovsxrt, group.by = 'Condition')[[1]] + scale_color_tableau() + labs(title = 'Condition')
##Slingshot
cell_pal <- function(cell_vars, pal_fun,...) {
if (is.numeric(cell_vars)) {
pal <- pal_fun(100, ...)
return(pal[cut(cell_vars, breaks = 100)])
} else {
categories <- sort(unique(cell_vars))
pal <- setNames(pal_fun(length(categories), ...), categories)
return(pal[cell_vars])
}
}
library(TSCAN)
## Loading required package: TrajectoryUtils
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(scater)
## Loading required package: scuttle
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
suppressPackageStartupMessages({
library(slingshot)
})
set.seed(1)
sds <- slingshot(Embeddings(slice7and10.tumor, "umap"), clusterLabels = slice7and10.tumor$seurat_clusters)
sds <-SlingshotDataSet(sds)
cell_colors <- cell_pal(slice7and10.tumor$seurat_clusters, brewer_pal("qual", "Set2"))
cell_colors_clust <- cell_pal(slice7and10.tumor$seurat_clusters, hue_pal())
plot(reducedDim(sds), col = cell_colors_clust, pch = 16, cex = 0.5)
lines(sds, lwd = 2, type = 'lineages', col = 'black')
nc <- 3
pt <- slingPseudotime(sds)
nms <- colnames(pt)
nr <- ceiling(length(nms)/nc)
pal <- viridis(100, end = 0.95)
plot(reducedDim(sds), col = cell_colors_clust, pch = 16, cex = 0.5)
par(mfrow = c(nr, nc))
for (i in nms) {
colors <- pal[cut(pt[,i], breaks = 100)]
plot(reducedDim(sds), col = colors, pch = 16, cex = 0.5, main = i)
lines(sds, lwd = 2, col = 'black', type = 'lineages')
}
sds@lineages
## $Lineage1
## [1] "7" "0" "4" "5"
##
## $Lineage2
## [1] "7" "0" "4" "2"
##
## $Lineage3
## [1] "7" "0" "4" "1"
##
## $Lineage4
## [1] "7" "0" "4" "3"
##
## $Lineage5
## [1] "7" "0" "4" "6"
# top_hvg
top_hvg <- HVFInfo(slice7and10.tumor) %>%
mutate(., bc = rownames(.)) %>%
arrange(desc(variance)) %>%
top_n(300, variance) %>%
pull(bc)
#Slingshot Genes Lineage 1
dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,1], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)
##
## Attaching package: 'rsample'
## The following object is masked from 'package:scater':
##
## bootstraps
## The following object is masked from 'package:Rcpp':
##
## populate
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.0 ──
## ✔ broom 1.0.4 ✔ recipes 1.0.6
## ✔ dials 1.2.0 ✔ tune 1.1.1
## ✔ infer 1.0.4 ✔ workflows 1.1.3
## ✔ modeldata 1.1.0 ✔ workflowsets 1.0.1
## ✔ parsnip 1.1.0 ✔ yardstick 1.2.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ rsample::bootstraps() masks scater::bootstraps()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ scales::discard() masks purrr::discard()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ infer::generate() masks sctransform::generate()
## ✖ dplyr::lag() masks stats::lag()
## ✖ rsample::populate() masks Rcpp::populate()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ .GlobalEnv::slice() masks dplyr::slice(), IRanges::slice()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## ✖ recipes::update() masks MAST::update(), GenomicRanges::update(), stats4::update(), stats::update()
## ✖ glmGamPoi::vars() masks dplyr::vars(), ggplot2::vars()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org
library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)
model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
set_engine("ranger", importance = "impurity", num.threads = 3) %>%
fit(pseudotime ~ ., data = dat_train)
val_results <- dat_val %>%
mutate(estimate = predict(model, .[,-1]) %>% pull()) %>%
select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)
summary(dat_use_df$pseudotime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.591 5.788 5.925 8.055 11.726
var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])
top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))
par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
plot(reducedDim(sds), col = colors,
pch = 16, cex = 0.5, main = top_genes[i])
lines(sds, lwd = 2, col = 'black', type = 'lineages')
}
#Slingshot Genes Lineage 2
dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,2], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)
library(tidymodels)
library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)
model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
set_engine("ranger", importance = "impurity", num.threads = 3) %>%
fit(pseudotime ~ ., data = dat_train)
val_results <- dat_val %>%
mutate(estimate = predict(model, .[,-1]) %>% pull()) %>%
select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)
summary(dat_use_df$pseudotime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.573 5.713 5.978 8.325 11.955
var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])
top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))
par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
plot(reducedDim(sds), col = colors,
pch = 16, cex = 0.5, main = top_genes[i])
lines(sds, lwd = 2, col = 'black', type = 'lineages')
}
#Slingshot Genes Lineage 3
dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,3], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)
library(tidymodels)
library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)
model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
set_engine("ranger", importance = "impurity", num.threads = 3) %>%
fit(pseudotime ~ ., data = dat_train)
val_results <- dat_val %>%
mutate(estimate = predict(model, .[,-1]) %>% pull()) %>%
select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)
summary(dat_use_df$pseudotime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.705 6.286 6.482 9.213 13.898
var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])
top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))
par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
plot(reducedDim(sds), col = colors,
pch = 16, cex = 0.5, main = top_genes[i])
lines(sds, lwd = 2, col = 'black', type = 'lineages')
}
#Slingshot Genes Lineage 4
dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,4], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)
library(tidymodels)
library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)
model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
set_engine("ranger", importance = "impurity", num.threads = 3) %>%
fit(pseudotime ~ ., data = dat_train)
val_results <- dat_val %>%
mutate(estimate = predict(model, .[,-1]) %>% pull()) %>%
select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)
summary(dat_use_df$pseudotime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.534 5.723 5.983 8.478 11.557
var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])
top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))
par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
plot(reducedDim(sds), col = colors,
pch = 16, cex = 0.5, main = top_genes[i])
lines(sds, lwd = 2, col = 'black', type = 'lineages')
}
#Slingshot Genes Lineage 5
dat_use <- t(GetAssayData(slice7and10.tumor, slot = "data")[top_hvg,])
dat_use_df <- cbind(slingPseudotime(sds)[,5], dat_use) # Do curve 1, so 1nd columnn
colnames(dat_use_df)[1] <- "pseudotime"
dat_use_df <- as.data.frame(dat_use_df[!is.na(dat_use_df[,1]),])
library(rsample)
library(tidymodels)
library(ranger)
dat_split <- initial_split(dat_use_df)
dat_train <- training(dat_split)
dat_val <- testing(dat_split)
model <- rand_forest(mtry = 200, trees = 1400, min_n = 15, mode = "regression") %>%
set_engine("ranger", importance = "impurity", num.threads = 3) %>%
fit(pseudotime ~ ., data = dat_train)
val_results <- dat_val %>%
mutate(estimate = predict(model, .[,-1]) %>% pull()) %>%
select(truth = pseudotime, estimate)
metrics(data = val_results, truth, estimate)
summary(dat_use_df$pseudotime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.691 5.947 6.370 8.715 12.980
var_imp <- sort(model$fit$variable.importance, decreasing = TRUE)
top_genes_dec <- as.data.frame(names(var_imp)[1:100])
top_genes <- names(var_imp)[1:30]
par(mar = c(1, 1, 1, 1))
par(mfrow = c(5, 6))
for (i in seq_along(top_genes)) {
colors <- pal[cut(dat_use[,top_genes[i]], breaks = 100)]
plot(reducedDim(sds), col = colors,
pch = 16, cex = 0.5, main = top_genes[i])
lines(sds, lwd = 2, col = 'black', type = 'lineages')
}
#par(mfrow = c(1, 1))
#par(mar = c(5.1, 4.1, 4.1, 2.1))
#gene <- "Ifng"
#colors <- pal[cut(dat_use[,gene], breaks = 100)]
#plot(reducedDim(sds), col = colors,
# pch = 16, cex = 0.5, main = gene)
#lines(sds, lwd = 2, col = 'black', type = 'lineages')